2cf27b861b878a69f35205c57be963333004278c
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
20
21 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
22
23 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
24
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
28
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
32
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
36
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
40
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
42                                      SDTCisVT<2, i32>]>;
43 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
45
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
47                                SDTCisSameAs<0, 2>]>;
48 def Neon_uzp1    : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2    : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1    : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2    : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1    : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2    : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
54
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
60                        [SDTCisVec<0>]>>;
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62                            [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64                            [SDTCisVec<0>,  SDTCisSameAs<0, 1>,
65                            SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
66
67 def SDT_assertext : SDTypeProfile<1, 1,
68   [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
69 def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
70 def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
71
72 //===----------------------------------------------------------------------===//
73 // Addressing-mode instantiations
74 //===----------------------------------------------------------------------===//
75
76 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
77 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
78                       !foreach(decls.pattern, Offset,
79                                !subst(OFFSET, dword_uimm12, decls.pattern)),
80                       !foreach(decls.pattern, address,
81                                !subst(OFFSET, dword_uimm12,
82                                !subst(ALIGN, min_align8, decls.pattern))),
83                       Ty>;
84 }
85
86 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
87 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
88                        !foreach(decls.pattern, Offset,
89                                 !subst(OFFSET, qword_uimm12, decls.pattern)),
90                        !foreach(decls.pattern, address,
91                                 !subst(OFFSET, qword_uimm12,
92                                 !subst(ALIGN, min_align16, decls.pattern))),
93                       Ty>;
94 }
95
96 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
97   defm : ls_64_pats<address, Base, Offset, v8i8>;
98   defm : ls_64_pats<address, Base, Offset, v4i16>;
99   defm : ls_64_pats<address, Base, Offset, v2i32>;
100   defm : ls_64_pats<address, Base, Offset, v1i64>;
101   defm : ls_64_pats<address, Base, Offset, v2f32>;
102   defm : ls_64_pats<address, Base, Offset, v1f64>;
103
104   defm : ls_128_pats<address, Base, Offset, v16i8>;
105   defm : ls_128_pats<address, Base, Offset, v8i16>;
106   defm : ls_128_pats<address, Base, Offset, v4i32>;
107   defm : ls_128_pats<address, Base, Offset, v2i64>;
108   defm : ls_128_pats<address, Base, Offset, v4f32>;
109   defm : ls_128_pats<address, Base, Offset, v2f64>;
110 }
111
112 defm : uimm12_neon_pats<(A64WrapperSmall
113                           tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
114                         (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
115
116 //===----------------------------------------------------------------------===//
117 // Multiclasses
118 //===----------------------------------------------------------------------===//
119
120 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
121                                 string asmop, SDPatternOperator opnode8B,
122                                 SDPatternOperator opnode16B,
123                                 bit Commutable = 0> {
124   let isCommutable = Commutable in {
125     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
126                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128                [(set (v8i8 VPR64:$Rd),
129                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
130                NoItinerary>;
131
132     def _16B : NeonI_3VSame<0b1, u, size, opcode,
133                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135                [(set (v16i8 VPR128:$Rd),
136                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137                NoItinerary>;
138   }
139
140 }
141
142 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
143                                   string asmop, SDPatternOperator opnode,
144                                   bit Commutable = 0> {
145   let isCommutable = Commutable in {
146     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
147               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
148               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
149               [(set (v4i16 VPR64:$Rd),
150                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
151               NoItinerary>;
152
153     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
154               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
155               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
156               [(set (v8i16 VPR128:$Rd),
157                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
158               NoItinerary>;
159
160     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
161               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
162               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
163               [(set (v2i32 VPR64:$Rd),
164                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
165               NoItinerary>;
166
167     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
168               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
169               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
170               [(set (v4i32 VPR128:$Rd),
171                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
172               NoItinerary>;
173   }
174 }
175 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
176                                   string asmop, SDPatternOperator opnode,
177                                   bit Commutable = 0>
178    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> {
179   let isCommutable = Commutable in {
180     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
181                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
182                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
183                [(set (v8i8 VPR64:$Rd),
184                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
185                NoItinerary>;
186
187     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
188                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
189                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
190                [(set (v16i8 VPR128:$Rd),
191                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
192                NoItinerary>;
193   }
194 }
195
196 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
197                                    string asmop, SDPatternOperator opnode,
198                                    bit Commutable = 0>
199    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> {
200   let isCommutable = Commutable in {
201     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
202               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
203               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
204               [(set (v2i64 VPR128:$Rd),
205                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
206               NoItinerary>;
207   }
208 }
209
210 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
211 // but Result types can be integer or floating point types.
212 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
213                                  string asmop, SDPatternOperator opnode2S,
214                                  SDPatternOperator opnode4S,
215                                  SDPatternOperator opnode2D,
216                                  ValueType ResTy2S, ValueType ResTy4S,
217                                  ValueType ResTy2D, bit Commutable = 0> {
218   let isCommutable = Commutable in {
219     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
220               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
221               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
222               [(set (ResTy2S VPR64:$Rd),
223                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
224               NoItinerary>;
225
226     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
227               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
229               [(set (ResTy4S VPR128:$Rd),
230                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
231               NoItinerary>;
232
233     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
234               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
235               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
236               [(set (ResTy2D VPR128:$Rd),
237                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
238                NoItinerary>;
239   }
240 }
241
242 //===----------------------------------------------------------------------===//
243 // Instruction Definitions
244 //===----------------------------------------------------------------------===//
245
246 // Vector Arithmetic Instructions
247
248 // Vector Add (Integer and Floating-Point)
249
250 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
251 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
252                                      v2f32, v4f32, v2f64, 1>;
253
254 // Vector Sub (Integer and Floating-Point)
255
256 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
257 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
258                                      v2f32, v4f32, v2f64, 0>;
259
260 // Vector Multiply (Integer and Floating-Point)
261
262 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
263 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
264                                      v2f32, v4f32, v2f64, 1>;
265
266 // Vector Multiply (Polynomial)
267
268 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
269                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
270
271 // Vector Multiply-accumulate and Multiply-subtract (Integer)
272
273 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
274 // two operands constraints.
275 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
276   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
277   bits<5> opcode, SDPatternOperator opnode>
278   : NeonI_3VSame<q, u, size, opcode,
279     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
280     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
281     [(set (OpTy VPRC:$Rd),
282        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
283     NoItinerary> {
284   let Constraints = "$src = $Rd";
285 }
286
287 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
288                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
289
290 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
291                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
292
293
294 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
295                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
296 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
297                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
298 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
299                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
300 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
301                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
302 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
303                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
304 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
305                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
306
307 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
308                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
309 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
310                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
311 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
312                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
313 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
314                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
315 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
316                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
317 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
318                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
319
320 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
321
322 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
323                         (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
324
325 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
326                         (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
327
328 let Predicates = [HasNEON, UseFusedMAC] in {
329 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
330                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
331 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
332                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
333 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
334                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
335
336 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
337                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
338 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
339                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
340 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
341                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
342 }
343
344 // We're also allowed to match the fma instruction regardless of compile
345 // options.
346 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
347           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
348 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
349           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
350 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
351           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
352
353 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
354           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
355 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
356           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
357 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
358           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
359
360 // Vector Divide (Floating-Point)
361
362 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
363                                      v2f32, v4f32, v2f64, 0>;
364
365 // Vector Bitwise Operations
366
367 // Vector Bitwise AND
368
369 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
370
371 // Vector Bitwise Exclusive OR
372
373 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
374
375 // Vector Bitwise OR
376
377 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
378
379 // ORR disassembled as MOV if Vn==Vm
380
381 // Vector Move - register
382 // Alias for ORR if Vn=Vm.
383 // FIXME: This is actually the preferred syntax but TableGen can't deal with
384 // custom printing of aliases.
385 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
386                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
387 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
388                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
389
390 // The MOVI instruction takes two immediate operands.  The first is the
391 // immediate encoding, while the second is the cmode.  A cmode of 14, or
392 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
393 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
394 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
395
396 def Neon_not8B  : PatFrag<(ops node:$in),
397                           (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
398 def Neon_not16B : PatFrag<(ops node:$in),
399                           (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
400
401 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
402                          (or node:$Rn, (Neon_not8B node:$Rm))>;
403
404 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
405                           (or node:$Rn, (Neon_not16B node:$Rm))>;
406
407 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
408                          (and node:$Rn, (Neon_not8B node:$Rm))>;
409
410 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
411                           (and node:$Rn, (Neon_not16B node:$Rm))>;
412
413
414 // Vector Bitwise OR NOT - register
415
416 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
417                                    Neon_orn8B, Neon_orn16B, 0>;
418
419 // Vector Bitwise Bit Clear (AND NOT) - register
420
421 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
422                                    Neon_bic8B, Neon_bic16B, 0>;
423
424 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
425                                    SDPatternOperator opnode16B,
426                                    Instruction INST8B,
427                                    Instruction INST16B> {
428   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
429             (INST8B VPR64:$Rn, VPR64:$Rm)>;
430   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
431             (INST8B VPR64:$Rn, VPR64:$Rm)>;
432   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
433             (INST8B VPR64:$Rn, VPR64:$Rm)>;
434   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
435             (INST16B VPR128:$Rn, VPR128:$Rm)>;
436   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
437             (INST16B VPR128:$Rn, VPR128:$Rm)>;
438   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
439             (INST16B VPR128:$Rn, VPR128:$Rm)>;
440 }
441
442 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
443 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
444 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
445 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
446 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
447 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
448
449 //   Vector Bitwise Select
450 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
451                                               0b0, 0b1, 0b01, 0b00011, vselect>;
452
453 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
454                                               0b1, 0b1, 0b01, 0b00011, vselect>;
455
456 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
457                                    Instruction INST8B,
458                                    Instruction INST16B> {
459   // Disassociate type from instruction definition
460   def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
461             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462   def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
463             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
464   def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
465             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466   def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
467             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468   def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
469             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
470   def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
471             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
472   def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
473             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474   def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
475             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476   def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
477             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
478   def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
479             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
480   def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
481             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
482   def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
483             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
484
485   // Allow to match BSL instruction pattern with non-constant operand
486   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
487                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
488           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
489   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
490                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
491           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
492   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
493                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
494           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
495   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
496                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
497           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
498   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
499                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
500           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
501   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
502                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
503           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
504   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
505                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
506           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
507   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
508                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
509           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
510
511   // Allow to match llvm.arm.* intrinsics.
512   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
513                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
514             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
515   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
516                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
517             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
518   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
519                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
520             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
521   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
522                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
523             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
524   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
525                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
526             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
527   def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
528                     (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
529             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
530   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
531                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
532             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
533   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
534                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
535             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
536   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
537                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
538             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
539   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
540                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
541             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
542   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
543                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
544             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
545   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
546                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
547             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
548 }
549
550 // Additional patterns for bitwise instruction BSL
551 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
552
553 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
554                            (vselect node:$src, node:$Rn, node:$Rm),
555                            [{ (void)N; return false; }]>;
556
557 // Vector Bitwise Insert if True
558
559 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
560                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
561 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
562                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
563
564 // Vector Bitwise Insert if False
565
566 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
567                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
568 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
569                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
570
571 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
572
573 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
574                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
575 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
576                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
577
578 // Vector Absolute Difference and Accumulate (Unsigned)
579 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
580                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
581 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
582                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
583 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
584                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
585 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
586                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
587 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
588                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
589 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
590                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
591
592 // Vector Absolute Difference and Accumulate (Signed)
593 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
594                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
595 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
596                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
597 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
598                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
599 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
600                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
601 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
602                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
603 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
604                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
605
606
607 // Vector Absolute Difference (Signed, Unsigned)
608 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
609 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
610
611 // Vector Absolute Difference (Floating Point)
612 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
613                                     int_arm_neon_vabds, int_arm_neon_vabds,
614                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
615
616 // Vector Reciprocal Step (Floating Point)
617 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
618                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
619                                        int_arm_neon_vrecps,
620                                        v2f32, v4f32, v2f64, 0>;
621
622 // Vector Reciprocal Square Root Step (Floating Point)
623 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
624                                         int_arm_neon_vrsqrts,
625                                         int_arm_neon_vrsqrts,
626                                         int_arm_neon_vrsqrts,
627                                         v2f32, v4f32, v2f64, 0>;
628
629 // Vector Comparisons
630
631 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
632                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
633 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
634                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
635 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
636                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
637 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
638                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
639 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
640                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
641
642 // NeonI_compare_aliases class: swaps register operands to implement
643 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
644 class NeonI_compare_aliases<string asmop, string asmlane,
645                             Instruction inst, RegisterOperand VPRC>
646   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
647                     ", $Rm" # asmlane,
648                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
649
650 // Vector Comparisons (Integer)
651
652 // Vector Compare Mask Equal (Integer)
653 let isCommutable =1 in {
654 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
655 }
656
657 // Vector Compare Mask Higher or Same (Unsigned Integer)
658 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
659
660 // Vector Compare Mask Greater Than or Equal (Integer)
661 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
662
663 // Vector Compare Mask Higher (Unsigned Integer)
664 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
665
666 // Vector Compare Mask Greater Than (Integer)
667 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
668
669 // Vector Compare Mask Bitwise Test (Integer)
670 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
671
672 // Vector Compare Mask Less or Same (Unsigned Integer)
673 // CMLS is alias for CMHS with operands reversed.
674 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
675 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
676 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
677 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
678 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
679 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
680 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
681
682 // Vector Compare Mask Less Than or Equal (Integer)
683 // CMLE is alias for CMGE with operands reversed.
684 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
685 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
686 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
687 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
688 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
689 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
690 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
691
692 // Vector Compare Mask Lower (Unsigned Integer)
693 // CMLO is alias for CMHI with operands reversed.
694 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
695 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
696 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
697 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
698 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
699 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
700 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
701
702 // Vector Compare Mask Less Than (Integer)
703 // CMLT is alias for CMGT with operands reversed.
704 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
705 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
706 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
707 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
708 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
709 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
710 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
711
712
713 def neon_uimm0_asmoperand : AsmOperandClass
714 {
715   let Name = "UImm0";
716   let PredicateMethod = "isUImm<0>";
717   let RenderMethod = "addImmOperands";
718 }
719
720 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
721   let ParserMatchClass = neon_uimm0_asmoperand;
722   let PrintMethod = "printNeonUImm0Operand";
723
724 }
725
726 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
727 {
728   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
729              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
730              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
731              [(set (v8i8 VPR64:$Rd),
732                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
733              NoItinerary>;
734
735   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
736              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
737              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
738              [(set (v16i8 VPR128:$Rd),
739                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
740              NoItinerary>;
741
742   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
743             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
744             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
745             [(set (v4i16 VPR64:$Rd),
746                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
747             NoItinerary>;
748
749   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
750             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
751             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
752             [(set (v8i16 VPR128:$Rd),
753                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
754             NoItinerary>;
755
756   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
757             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
758             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
759             [(set (v2i32 VPR64:$Rd),
760                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
761             NoItinerary>;
762
763   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
764             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
765             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
766             [(set (v4i32 VPR128:$Rd),
767                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
768             NoItinerary>;
769
770   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
771             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
772             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
773             [(set (v2i64 VPR128:$Rd),
774                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
775             NoItinerary>;
776 }
777
778 // Vector Compare Mask Equal to Zero (Integer)
779 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
780
781 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
782 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
783
784 // Vector Compare Mask Greater Than Zero (Signed Integer)
785 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
786
787 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
788 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
789
790 // Vector Compare Mask Less Than Zero (Signed Integer)
791 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
792
793 // Vector Comparisons (Floating Point)
794
795 // Vector Compare Mask Equal (Floating Point)
796 let isCommutable =1 in {
797 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
798                                       Neon_cmeq, Neon_cmeq,
799                                       v2i32, v4i32, v2i64, 0>;
800 }
801
802 // Vector Compare Mask Greater Than Or Equal (Floating Point)
803 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
804                                       Neon_cmge, Neon_cmge,
805                                       v2i32, v4i32, v2i64, 0>;
806
807 // Vector Compare Mask Greater Than (Floating Point)
808 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
809                                       Neon_cmgt, Neon_cmgt,
810                                       v2i32, v4i32, v2i64, 0>;
811
812 // Vector Compare Mask Less Than Or Equal (Floating Point)
813 // FCMLE is alias for FCMGE with operands reversed.
814 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
815 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
816 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
817
818 // Vector Compare Mask Less Than (Floating Point)
819 // FCMLT is alias for FCMGT with operands reversed.
820 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
821 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
822 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
823
824 def fpzero_izero_asmoperand : AsmOperandClass {
825   let Name = "FPZeroIZero";
826   let ParserMethod = "ParseFPImm0AndImm0Operand";
827   let DiagnosticType = "FPZero";
828 }
829
830 def fpzz32 : Operand<f32>,
831              ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
832   let ParserMatchClass = fpzero_izero_asmoperand;
833   let PrintMethod = "printFPZeroOperand";
834   let DecoderMethod = "DecodeFPZeroOperand";
835 }
836
837 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
838                               string asmop, CondCode CC>
839 {
840   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
841             (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm),
842             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
843             [(set (v2i32 VPR64:$Rd),
844                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
845             NoItinerary>;
846
847   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
848             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
849             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
850             [(set (v4i32 VPR128:$Rd),
851                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
852             NoItinerary>;
853
854   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
855             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
856             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
857             [(set (v2i64 VPR128:$Rd),
858                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
859             NoItinerary>;
860 }
861
862 // Vector Compare Mask Equal to Zero (Floating Point)
863 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
864
865 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
866 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
867
868 // Vector Compare Mask Greater Than Zero (Floating Point)
869 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
870
871 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
872 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
873
874 // Vector Compare Mask Less Than Zero (Floating Point)
875 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
876
877 // Vector Absolute Comparisons (Floating Point)
878
879 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
880 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
881                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
882                                       int_aarch64_neon_vacgeq,
883                                       v2i32, v4i32, v2i64, 0>;
884
885 // Vector Absolute Compare Mask Greater Than (Floating Point)
886 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
887                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
888                                       int_aarch64_neon_vacgtq,
889                                       v2i32, v4i32, v2i64, 0>;
890
891 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
892 // FACLE is alias for FACGE with operands reversed.
893 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
894 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
895 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
896
897 // Vector Absolute Compare Mask Less Than (Floating Point)
898 // FACLT is alias for FACGT with operands reversed.
899 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
900 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
901 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
902
903 // Vector halving add (Integer Signed, Unsigned)
904 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
905                                         int_arm_neon_vhadds, 1>;
906 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
907                                         int_arm_neon_vhaddu, 1>;
908
909 // Vector halving sub (Integer Signed, Unsigned)
910 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
911                                         int_arm_neon_vhsubs, 0>;
912 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
913                                         int_arm_neon_vhsubu, 0>;
914
915 // Vector rouding halving add (Integer Signed, Unsigned)
916 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
917                                          int_arm_neon_vrhadds, 1>;
918 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
919                                          int_arm_neon_vrhaddu, 1>;
920
921 // Vector Saturating add (Integer Signed, Unsigned)
922 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
923                    int_arm_neon_vqadds, 1>;
924 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
925                    int_arm_neon_vqaddu, 1>;
926
927 // Vector Saturating sub (Integer Signed, Unsigned)
928 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
929                    int_arm_neon_vqsubs, 1>;
930 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
931                    int_arm_neon_vqsubu, 1>;
932
933 // Vector Shift Left (Signed and Unsigned Integer)
934 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
935                  int_arm_neon_vshifts, 1>;
936 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
937                  int_arm_neon_vshiftu, 1>;
938
939 // Vector Saturating Shift Left (Signed and Unsigned Integer)
940 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
941                   int_arm_neon_vqshifts, 1>;
942 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
943                   int_arm_neon_vqshiftu, 1>;
944
945 // Vector Rouding Shift Left (Signed and Unsigned Integer)
946 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
947                   int_arm_neon_vrshifts, 1>;
948 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
949                   int_arm_neon_vrshiftu, 1>;
950
951 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
952 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
953                    int_arm_neon_vqrshifts, 1>;
954 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
955                    int_arm_neon_vqrshiftu, 1>;
956
957 // Vector Maximum (Signed and Unsigned Integer)
958 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
959 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
960
961 // Vector Minimum (Signed and Unsigned Integer)
962 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
963 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
964
965 // Vector Maximum (Floating Point)
966 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
967                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
968                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
969
970 // Vector Minimum (Floating Point)
971 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
972                                      int_arm_neon_vmins, int_arm_neon_vmins,
973                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
974
975 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
976 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
977                                        int_aarch64_neon_vmaxnm,
978                                        int_aarch64_neon_vmaxnm,
979                                        int_aarch64_neon_vmaxnm,
980                                        v2f32, v4f32, v2f64, 1>;
981
982 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
983 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
984                                        int_aarch64_neon_vminnm,
985                                        int_aarch64_neon_vminnm,
986                                        int_aarch64_neon_vminnm,
987                                        v2f32, v4f32, v2f64, 1>;
988
989 // Vector Maximum Pairwise (Signed and Unsigned Integer)
990 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
991 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
992
993 // Vector Minimum Pairwise (Signed and Unsigned Integer)
994 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
995 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
996
997 // Vector Maximum Pairwise (Floating Point)
998 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
999                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
1000                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
1001
1002 // Vector Minimum Pairwise (Floating Point)
1003 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
1004                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
1005                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
1006
1007 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
1008 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
1009                                        int_aarch64_neon_vpmaxnm,
1010                                        int_aarch64_neon_vpmaxnm,
1011                                        int_aarch64_neon_vpmaxnm,
1012                                        v2f32, v4f32, v2f64, 1>;
1013
1014 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
1015 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
1016                                        int_aarch64_neon_vpminnm,
1017                                        int_aarch64_neon_vpminnm,
1018                                        int_aarch64_neon_vpminnm,
1019                                        v2f32, v4f32, v2f64, 1>;
1020
1021 // Vector Addition Pairwise (Integer)
1022 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1023
1024 // Vector Addition Pairwise (Floating Point)
1025 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1026                                        int_arm_neon_vpadd,
1027                                        int_arm_neon_vpadd,
1028                                        int_arm_neon_vpadd,
1029                                        v2f32, v4f32, v2f64, 1>;
1030
1031 // Vector Saturating Doubling Multiply High
1032 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1033                     int_arm_neon_vqdmulh, 1>;
1034
1035 // Vector Saturating Rouding Doubling Multiply High
1036 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1037                      int_arm_neon_vqrdmulh, 1>;
1038
1039 // Vector Multiply Extended (Floating Point)
1040 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1041                                       int_aarch64_neon_vmulx,
1042                                       int_aarch64_neon_vmulx,
1043                                       int_aarch64_neon_vmulx,
1044                                       v2f32, v4f32, v2f64, 1>;
1045
1046 // Patterns to match llvm.aarch64.* intrinsic for 
1047 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1048 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1049   : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1050         (EXTRACT_SUBREG
1051              (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1052              sub_32)>;
1053
1054 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1055 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1056 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1057 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1058 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1059
1060 // Vector Immediate Instructions
1061
1062 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1063 {
1064   def _asmoperand : AsmOperandClass
1065     {
1066       let Name = "NeonMovImmShift" # PREFIX;
1067       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1068       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1069     }
1070 }
1071
1072 // Definition of vector immediates shift operands
1073
1074 // The selectable use-cases extract the shift operation
1075 // information from the OpCmode fields encoded in the immediate.
1076 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1077   uint64_t OpCmode = N->getZExtValue();
1078   unsigned ShiftImm;
1079   unsigned ShiftOnesIn;
1080   unsigned HasShift =
1081     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1082   if (!HasShift) return SDValue();
1083   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1084 }]>;
1085
1086 // Vector immediates shift operands which accept LSL and MSL
1087 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1088 // or 0, 8 (LSLH) or 8, 16 (MSL).
1089 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1090 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1091 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
1092 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1093
1094 multiclass neon_mov_imm_shift_operands<string PREFIX,
1095                                        string HALF, string ISHALF, code pred>
1096 {
1097    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1098     {
1099       let PrintMethod =
1100         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1101       let DecoderMethod =
1102         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1103       let ParserMatchClass =
1104         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1105     }
1106 }
1107
1108 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1109   unsigned ShiftImm;
1110   unsigned ShiftOnesIn;
1111   unsigned HasShift =
1112     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1113   return (HasShift && !ShiftOnesIn);
1114 }]>;
1115
1116 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1117   unsigned ShiftImm;
1118   unsigned ShiftOnesIn;
1119   unsigned HasShift =
1120     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1121   return (HasShift && ShiftOnesIn);
1122 }]>;
1123
1124 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1125   unsigned ShiftImm;
1126   unsigned ShiftOnesIn;
1127   unsigned HasShift =
1128     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1129   return (HasShift && !ShiftOnesIn);
1130 }]>;
1131
1132 def neon_uimm1_asmoperand : AsmOperandClass
1133 {
1134   let Name = "UImm1";
1135   let PredicateMethod = "isUImm<1>";
1136   let RenderMethod = "addImmOperands";
1137 }
1138
1139 def neon_uimm2_asmoperand : AsmOperandClass
1140 {
1141   let Name = "UImm2";
1142   let PredicateMethod = "isUImm<2>";
1143   let RenderMethod = "addImmOperands";
1144 }
1145
1146 def neon_uimm8_asmoperand : AsmOperandClass
1147 {
1148   let Name = "UImm8";
1149   let PredicateMethod = "isUImm<8>";
1150   let RenderMethod = "addImmOperands";
1151 }
1152
1153 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1154   let ParserMatchClass = neon_uimm8_asmoperand;
1155   let PrintMethod = "printUImmHexOperand";
1156 }
1157
1158 def neon_uimm64_mask_asmoperand : AsmOperandClass
1159 {
1160   let Name = "NeonUImm64Mask";
1161   let PredicateMethod = "isNeonUImm64Mask";
1162   let RenderMethod = "addNeonUImm64MaskOperands";
1163 }
1164
1165 // MCOperand for 64-bit bytemask with each byte having only the
1166 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1167 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1168   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1169   let PrintMethod = "printNeonUImm64MaskOperand";
1170 }
1171
1172 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1173                                    SDPatternOperator opnode>
1174 {
1175     // shift zeros, per word
1176     def _2S  : NeonI_1VModImm<0b0, op,
1177                               (outs VPR64:$Rd),
1178                               (ins neon_uimm8:$Imm,
1179                                 neon_mov_imm_LSL_operand:$Simm),
1180                               !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1181                               [(set (v2i32 VPR64:$Rd),
1182                                  (v2i32 (opnode (timm:$Imm),
1183                                    (neon_mov_imm_LSL_operand:$Simm))))],
1184                               NoItinerary> {
1185        bits<2> Simm;
1186        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1187      }
1188
1189     def _4S  : NeonI_1VModImm<0b1, op,
1190                               (outs VPR128:$Rd),
1191                               (ins neon_uimm8:$Imm,
1192                                 neon_mov_imm_LSL_operand:$Simm),
1193                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1194                               [(set (v4i32 VPR128:$Rd),
1195                                  (v4i32 (opnode (timm:$Imm),
1196                                    (neon_mov_imm_LSL_operand:$Simm))))],
1197                               NoItinerary> {
1198       bits<2> Simm;
1199       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1200     }
1201
1202     // shift zeros, per halfword
1203     def _4H  : NeonI_1VModImm<0b0, op,
1204                               (outs VPR64:$Rd),
1205                               (ins neon_uimm8:$Imm,
1206                                 neon_mov_imm_LSLH_operand:$Simm),
1207                               !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1208                               [(set (v4i16 VPR64:$Rd),
1209                                  (v4i16 (opnode (timm:$Imm),
1210                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1211                               NoItinerary> {
1212       bit  Simm;
1213       let cmode = {0b1, 0b0, Simm, 0b0};
1214     }
1215
1216     def _8H  : NeonI_1VModImm<0b1, op,
1217                               (outs VPR128:$Rd),
1218                               (ins neon_uimm8:$Imm,
1219                                 neon_mov_imm_LSLH_operand:$Simm),
1220                               !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1221                               [(set (v8i16 VPR128:$Rd),
1222                                  (v8i16 (opnode (timm:$Imm),
1223                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1224                               NoItinerary> {
1225       bit Simm;
1226       let cmode = {0b1, 0b0, Simm, 0b0};
1227      }
1228 }
1229
1230 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1231                                                    SDPatternOperator opnode,
1232                                                    SDPatternOperator neonopnode>
1233 {
1234   let Constraints = "$src = $Rd" in {
1235     // shift zeros, per word
1236     def _2S  : NeonI_1VModImm<0b0, op,
1237                  (outs VPR64:$Rd),
1238                  (ins VPR64:$src, neon_uimm8:$Imm,
1239                    neon_mov_imm_LSL_operand:$Simm),
1240                  !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1241                  [(set (v2i32 VPR64:$Rd),
1242                     (v2i32 (opnode (v2i32 VPR64:$src),
1243                       (v2i32 (neonopnode timm:$Imm,
1244                         neon_mov_imm_LSL_operand:$Simm)))))],
1245                  NoItinerary> {
1246       bits<2> Simm;
1247       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1248     }
1249
1250     def _4S  : NeonI_1VModImm<0b1, op,
1251                  (outs VPR128:$Rd),
1252                  (ins VPR128:$src, neon_uimm8:$Imm,
1253                    neon_mov_imm_LSL_operand:$Simm),
1254                  !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1255                  [(set (v4i32 VPR128:$Rd),
1256                     (v4i32 (opnode (v4i32 VPR128:$src),
1257                       (v4i32 (neonopnode timm:$Imm,
1258                         neon_mov_imm_LSL_operand:$Simm)))))],
1259                  NoItinerary> {
1260       bits<2> Simm;
1261       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1262     }
1263
1264     // shift zeros, per halfword
1265     def _4H  : NeonI_1VModImm<0b0, op,
1266                  (outs VPR64:$Rd),
1267                  (ins VPR64:$src, neon_uimm8:$Imm,
1268                    neon_mov_imm_LSLH_operand:$Simm),
1269                  !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1270                  [(set (v4i16 VPR64:$Rd),
1271                     (v4i16 (opnode (v4i16 VPR64:$src),
1272                        (v4i16 (neonopnode timm:$Imm,
1273                           neon_mov_imm_LSL_operand:$Simm)))))],
1274                  NoItinerary> {
1275       bit  Simm;
1276       let cmode = {0b1, 0b0, Simm, 0b1};
1277     }
1278
1279     def _8H  : NeonI_1VModImm<0b1, op,
1280                  (outs VPR128:$Rd),
1281                  (ins VPR128:$src, neon_uimm8:$Imm,
1282                    neon_mov_imm_LSLH_operand:$Simm),
1283                  !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1284                  [(set (v8i16 VPR128:$Rd),
1285                     (v8i16 (opnode (v8i16 VPR128:$src),
1286                       (v8i16 (neonopnode timm:$Imm,
1287                         neon_mov_imm_LSL_operand:$Simm)))))],
1288                  NoItinerary> {
1289       bit Simm;
1290       let cmode = {0b1, 0b0, Simm, 0b1};
1291     }
1292   }
1293 }
1294
1295 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1296                                    SDPatternOperator opnode>
1297 {
1298     // shift ones, per word
1299     def _2S  : NeonI_1VModImm<0b0, op,
1300                              (outs VPR64:$Rd),
1301                              (ins neon_uimm8:$Imm,
1302                                neon_mov_imm_MSL_operand:$Simm),
1303                              !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1304                               [(set (v2i32 VPR64:$Rd),
1305                                  (v2i32 (opnode (timm:$Imm),
1306                                    (neon_mov_imm_MSL_operand:$Simm))))],
1307                              NoItinerary> {
1308        bit Simm;
1309        let cmode = {0b1, 0b1, 0b0, Simm};
1310      }
1311
1312    def _4S  : NeonI_1VModImm<0b1, op,
1313                               (outs VPR128:$Rd),
1314                               (ins neon_uimm8:$Imm,
1315                                 neon_mov_imm_MSL_operand:$Simm),
1316                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1317                               [(set (v4i32 VPR128:$Rd),
1318                                  (v4i32 (opnode (timm:$Imm),
1319                                    (neon_mov_imm_MSL_operand:$Simm))))],
1320                               NoItinerary> {
1321      bit Simm;
1322      let cmode = {0b1, 0b1, 0b0, Simm};
1323    }
1324 }
1325
1326 // Vector Move Immediate Shifted
1327 let isReMaterializable = 1 in {
1328 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1329 }
1330
1331 // Vector Move Inverted Immediate Shifted
1332 let isReMaterializable = 1 in {
1333 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1334 }
1335
1336 // Vector Bitwise Bit Clear (AND NOT) - immediate
1337 let isReMaterializable = 1 in {
1338 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1339                                                          and, Neon_mvni>;
1340 }
1341
1342 // Vector Bitwise OR - immedidate
1343
1344 let isReMaterializable = 1 in {
1345 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1346                                                            or, Neon_movi>;
1347 }
1348
1349 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1350 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1351 // BIC immediate instructions selection requires additional patterns to
1352 // transform Neon_movi operands into BIC immediate operands
1353
1354 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1355   uint64_t OpCmode = N->getZExtValue();
1356   unsigned ShiftImm;
1357   unsigned ShiftOnesIn;
1358   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1359   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1360   // Transform encoded shift amount 0 to 1 and 1 to 0.
1361   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1362 }]>;
1363
1364 def neon_mov_imm_LSLH_transform_operand
1365   : ImmLeaf<i32, [{
1366     unsigned ShiftImm;
1367     unsigned ShiftOnesIn;
1368     unsigned HasShift =
1369       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1370     return (HasShift && !ShiftOnesIn); }],
1371   neon_mov_imm_LSLH_transform_XFORM>;
1372
1373 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8)
1374 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff)
1375 def : Pat<(v4i16 (and VPR64:$src,
1376             (v4i16 (Neon_movi 255,
1377               neon_mov_imm_LSLH_transform_operand:$Simm)))),
1378           (BICvi_lsl_4H VPR64:$src, 255,
1379             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1380
1381 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8)
1382 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff)
1383 def : Pat<(v8i16 (and VPR128:$src,
1384             (v8i16 (Neon_movi 255,
1385               neon_mov_imm_LSLH_transform_operand:$Simm)))),
1386           (BICvi_lsl_8H VPR128:$src, 255,
1387             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1388
1389 def : Pat<(v8i8 (and VPR64:$src,
1390                   (bitconvert(v4i16 (Neon_movi 255,
1391                     neon_mov_imm_LSLH_transform_operand:$Simm))))),
1392           (BICvi_lsl_4H VPR64:$src, 255,
1393             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1394 def : Pat<(v2i32 (and VPR64:$src,
1395                  (bitconvert(v4i16 (Neon_movi 255,
1396                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1397           (BICvi_lsl_4H VPR64:$src, 255,
1398             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1399 def : Pat<(v1i64 (and VPR64:$src,
1400                 (bitconvert(v4i16 (Neon_movi 255,
1401                   neon_mov_imm_LSLH_transform_operand:$Simm))))),
1402         (BICvi_lsl_4H VPR64:$src, 255,
1403           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1404
1405 def : Pat<(v16i8 (and VPR128:$src,
1406                  (bitconvert(v8i16 (Neon_movi 255,
1407                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1408         (BICvi_lsl_8H VPR128:$src, 255,
1409           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1410 def : Pat<(v4i32 (and VPR128:$src,
1411                  (bitconvert(v8i16 (Neon_movi 255,
1412                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1413         (BICvi_lsl_8H VPR128:$src, 255,
1414           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1415 def : Pat<(v2i64 (and VPR128:$src,
1416                  (bitconvert(v8i16 (Neon_movi 255,
1417                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1418         (BICvi_lsl_8H VPR128:$src, 255,
1419           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1420
1421 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1422                                    SDPatternOperator neonopnode,
1423                                    Instruction INST4H,
1424                                    Instruction INST8H,
1425                                    Instruction INST2S,
1426                                    Instruction INST4S> {
1427   def : Pat<(v8i8 (opnode VPR64:$src,
1428                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1429                       neon_mov_imm_LSLH_operand:$Simm))))),
1430             (INST4H VPR64:$src, neon_uimm8:$Imm,
1431               neon_mov_imm_LSLH_operand:$Simm)>;
1432   def : Pat<(v2i32 (opnode VPR64:$src,
1433                    (bitconvert(v4i16 (neonopnode timm:$Imm,
1434                      neon_mov_imm_LSLH_operand:$Simm))))),
1435             (INST4H VPR64:$src, neon_uimm8:$Imm,
1436               neon_mov_imm_LSLH_operand:$Simm)>;
1437   def : Pat<(v1i64 (opnode VPR64:$src,
1438                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1439                     neon_mov_imm_LSLH_operand:$Simm))))),
1440           (INST4H VPR64:$src, neon_uimm8:$Imm,
1441             neon_mov_imm_LSLH_operand:$Simm)>;
1442
1443   def : Pat<(v16i8 (opnode VPR128:$src,
1444                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1445                      neon_mov_imm_LSLH_operand:$Simm))))),
1446           (INST8H VPR128:$src, neon_uimm8:$Imm,
1447             neon_mov_imm_LSLH_operand:$Simm)>;
1448   def : Pat<(v4i32 (opnode VPR128:$src,
1449                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1450                      neon_mov_imm_LSLH_operand:$Simm))))),
1451           (INST8H VPR128:$src, neon_uimm8:$Imm,
1452             neon_mov_imm_LSLH_operand:$Simm)>;
1453   def : Pat<(v2i64 (opnode VPR128:$src,
1454                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1455                      neon_mov_imm_LSLH_operand:$Simm))))),
1456           (INST8H VPR128:$src, neon_uimm8:$Imm,
1457             neon_mov_imm_LSLH_operand:$Simm)>;
1458
1459   def : Pat<(v8i8 (opnode VPR64:$src,
1460                     (bitconvert(v2i32 (neonopnode timm:$Imm,
1461                       neon_mov_imm_LSLH_operand:$Simm))))),
1462             (INST2S VPR64:$src, neon_uimm8:$Imm,
1463               neon_mov_imm_LSLH_operand:$Simm)>;
1464   def : Pat<(v4i16 (opnode VPR64:$src,
1465                    (bitconvert(v2i32 (neonopnode timm:$Imm,
1466                      neon_mov_imm_LSLH_operand:$Simm))))),
1467             (INST2S VPR64:$src, neon_uimm8:$Imm,
1468               neon_mov_imm_LSLH_operand:$Simm)>;
1469   def : Pat<(v1i64 (opnode VPR64:$src,
1470                   (bitconvert(v2i32 (neonopnode timm:$Imm,
1471                     neon_mov_imm_LSLH_operand:$Simm))))),
1472           (INST2S VPR64:$src, neon_uimm8:$Imm,
1473             neon_mov_imm_LSLH_operand:$Simm)>;
1474
1475   def : Pat<(v16i8 (opnode VPR128:$src,
1476                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1477                      neon_mov_imm_LSLH_operand:$Simm))))),
1478           (INST4S VPR128:$src, neon_uimm8:$Imm,
1479             neon_mov_imm_LSLH_operand:$Simm)>;
1480   def : Pat<(v8i16 (opnode VPR128:$src,
1481                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1482                      neon_mov_imm_LSLH_operand:$Simm))))),
1483           (INST4S VPR128:$src, neon_uimm8:$Imm,
1484             neon_mov_imm_LSLH_operand:$Simm)>;
1485   def : Pat<(v2i64 (opnode VPR128:$src,
1486                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1487                      neon_mov_imm_LSLH_operand:$Simm))))),
1488           (INST4S VPR128:$src, neon_uimm8:$Imm,
1489             neon_mov_imm_LSLH_operand:$Simm)>;
1490 }
1491
1492 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1493 defm : Neon_bitwiseVi_patterns<and, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H,
1494                                BICvi_lsl_2S, BICvi_lsl_4S>;
1495
1496 // Additional patterns for Vector Bitwise OR - immedidate
1497 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H,
1498                                ORRvi_lsl_2S, ORRvi_lsl_4S>;
1499
1500
1501 // Vector Move Immediate Masked
1502 let isReMaterializable = 1 in {
1503 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1504 }
1505
1506 // Vector Move Inverted Immediate Masked
1507 let isReMaterializable = 1 in {
1508 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1509 }
1510
1511 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1512                                 Instruction inst, RegisterOperand VPRC>
1513   : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1514                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1515
1516 // Aliases for Vector Move Immediate Shifted
1517 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1518 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1519 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1520 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1521
1522 // Aliases for Vector Move Inverted Immediate Shifted
1523 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1524 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1525 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1526 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1527
1528 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1529 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1530 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1531 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1532 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1533
1534 // Aliases for Vector Bitwise OR - immedidate
1535 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1536 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1537 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1538 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1539
1540 //  Vector Move Immediate - per byte
1541 let isReMaterializable = 1 in {
1542 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1543                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1544                                "movi\t$Rd.8b, $Imm",
1545                                [(set (v8i8 VPR64:$Rd),
1546                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1547                                 NoItinerary> {
1548   let cmode = 0b1110;
1549 }
1550
1551 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1552                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1553                                 "movi\t$Rd.16b, $Imm",
1554                                 [(set (v16i8 VPR128:$Rd),
1555                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1556                                  NoItinerary> {
1557   let cmode = 0b1110;
1558 }
1559 }
1560
1561 // Vector Move Immediate - bytemask, per double word
1562 let isReMaterializable = 1 in {
1563 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1564                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1565                                "movi\t $Rd.2d, $Imm",
1566                                [(set (v2i64 VPR128:$Rd),
1567                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1568                                NoItinerary> {
1569   let cmode = 0b1110;
1570 }
1571 }
1572
1573 // Vector Move Immediate - bytemask, one doubleword
1574
1575 let isReMaterializable = 1 in {
1576 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1577                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1578                            "movi\t $Rd, $Imm",
1579                            [(set (v1i64 FPR64:$Rd),
1580                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1581                            NoItinerary> {
1582   let cmode = 0b1110;
1583 }
1584 }
1585
1586 // Vector Floating Point Move Immediate
1587
1588 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1589                       Operand immOpType, bit q, bit op>
1590   : NeonI_1VModImm<q, op,
1591                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1592                    "fmov\t$Rd" # asmlane # ", $Imm",
1593                    [(set (OpTy VPRC:$Rd),
1594                       (OpTy (Neon_fmovi (timm:$Imm))))],
1595                    NoItinerary> {
1596      let cmode = 0b1111;
1597    }
1598
1599 let isReMaterializable = 1 in {
1600 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1601 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1602 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1603 }
1604
1605 // Vector Shift (Immediate)
1606 // Immediate in [0, 63]
1607 def imm0_63 : Operand<i32> {
1608   let ParserMatchClass = uimm6_asmoperand;
1609 }
1610
1611 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1612 // as follows:
1613 //
1614 //    Offset    Encoding
1615 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1616 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1617 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1618 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1619 //
1620 // The shift right immediate amount, in the range 1 to element bits, is computed
1621 // as Offset - UInt(immh:immb).  The shift left immediate amount, in the range 0
1622 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1623
1624 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1625   let Name = "ShrImm" # OFFSET;
1626   let RenderMethod = "addImmOperands";
1627   let DiagnosticType = "ShrImm" # OFFSET;
1628 }
1629
1630 class shr_imm<string OFFSET> : Operand<i32> {
1631   let EncoderMethod = "getShiftRightImm" # OFFSET;
1632   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1633   let ParserMatchClass =
1634     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1635 }
1636
1637 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1638 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1639 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1640 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1641
1642 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1643 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1644 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1645 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1646
1647 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1648   let Name = "ShlImm" # OFFSET;
1649   let RenderMethod = "addImmOperands";
1650   let DiagnosticType = "ShlImm" # OFFSET;
1651 }
1652
1653 class shl_imm<string OFFSET> : Operand<i32> {
1654   let EncoderMethod = "getShiftLeftImm" # OFFSET;
1655   let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1656   let ParserMatchClass =
1657     !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1658 }
1659
1660 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1661 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1662 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1663 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1664
1665 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1666 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1667 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1668 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1669
1670 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1671                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1672   : NeonI_2VShiftImm<q, u, opcode,
1673                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1674                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1675                      [(set (Ty VPRC:$Rd),
1676                         (Ty (OpNode (Ty VPRC:$Rn),
1677                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1678                      NoItinerary>;
1679
1680 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1681   // 64-bit vector types.
1682   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1683     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1684   }
1685
1686   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1687     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1688   }
1689
1690   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1691     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1692   }
1693
1694   // 128-bit vector types.
1695   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1696     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1697   }
1698
1699   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1700     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1701   }
1702
1703   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1704     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1705   }
1706
1707   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1708     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1709   }
1710 }
1711
1712 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1713   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1714                      OpNode> {
1715     let Inst{22-19} = 0b0001;
1716   }
1717
1718   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1719                      OpNode> {
1720     let Inst{22-20} = 0b001;
1721   }
1722
1723   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1724                      OpNode> {
1725      let Inst{22-21} = 0b01;
1726   }
1727
1728   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1729                       OpNode> {
1730                       let Inst{22-19} = 0b0001;
1731                     }
1732
1733   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1734                      OpNode> {
1735                      let Inst{22-20} = 0b001;
1736                     }
1737
1738   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1739                      OpNode> {
1740                       let Inst{22-21} = 0b01;
1741                     }
1742
1743   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1744                      OpNode> {
1745                       let Inst{22} = 0b1;
1746                     }
1747 }
1748
1749 // Shift left
1750 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1751
1752 // Shift right
1753 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1754 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1755
1756 def Neon_High16B : PatFrag<(ops node:$in),
1757                            (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1758 def Neon_High8H  : PatFrag<(ops node:$in),
1759                            (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1760 def Neon_High4S  : PatFrag<(ops node:$in),
1761                            (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1762 def Neon_High2D  : PatFrag<(ops node:$in),
1763                            (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1764 def Neon_High4float : PatFrag<(ops node:$in),
1765                                (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1766 def Neon_High2double : PatFrag<(ops node:$in),
1767                                (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1768
1769 def Neon_Low16B : PatFrag<(ops node:$in),
1770                           (v8i8 (extract_subvector (v16i8 node:$in),
1771                                                    (iPTR 0)))>;
1772 def Neon_Low8H : PatFrag<(ops node:$in),
1773                          (v4i16 (extract_subvector (v8i16 node:$in),
1774                                                    (iPTR 0)))>;
1775 def Neon_Low4S : PatFrag<(ops node:$in),
1776                          (v2i32 (extract_subvector (v4i32 node:$in),
1777                                                    (iPTR 0)))>;
1778 def Neon_Low2D : PatFrag<(ops node:$in),
1779                          (v1i64 (extract_subvector (v2i64 node:$in),
1780                                                    (iPTR 0)))>;
1781 def Neon_Low4float : PatFrag<(ops node:$in),
1782                              (v2f32 (extract_subvector (v4f32 node:$in),
1783                                                        (iPTR 0)))>;
1784 def Neon_Low2double : PatFrag<(ops node:$in),
1785                               (v1f64 (extract_subvector (v2f64 node:$in),
1786                                                         (iPTR 0)))>;
1787
1788 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1789                    string SrcT, ValueType DestTy, ValueType SrcTy,
1790                    Operand ImmTy, SDPatternOperator ExtOp>
1791   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1792                      (ins VPR64:$Rn, ImmTy:$Imm),
1793                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1794                      [(set (DestTy VPR128:$Rd),
1795                         (DestTy (shl
1796                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1797                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1798                      NoItinerary>;
1799
1800 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1801                        string SrcT, ValueType DestTy, ValueType SrcTy,
1802                        int StartIndex, Operand ImmTy,
1803                        SDPatternOperator ExtOp, PatFrag getTop>
1804   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1805                      (ins VPR128:$Rn, ImmTy:$Imm),
1806                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1807                      [(set (DestTy VPR128:$Rd),
1808                         (DestTy (shl
1809                           (DestTy (ExtOp
1810                             (SrcTy (getTop VPR128:$Rn)))),
1811                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1812                      NoItinerary>;
1813
1814 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1815                          SDNode ExtOp> {
1816   // 64-bit vector types.
1817   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1818                          shl_imm8, ExtOp> {
1819     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1820   }
1821
1822   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1823                          shl_imm16, ExtOp> {
1824     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1825   }
1826
1827   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1828                          shl_imm32, ExtOp> {
1829     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1830   }
1831
1832   // 128-bit vector types
1833   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1834                               8, shl_imm8, ExtOp, Neon_High16B> {
1835     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1836   }
1837
1838   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1839                              4, shl_imm16, ExtOp, Neon_High8H> {
1840     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1841   }
1842
1843   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1844                              2, shl_imm32, ExtOp, Neon_High4S> {
1845     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1846   }
1847
1848   // Use other patterns to match when the immediate is 0.
1849   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1850             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1851
1852   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1853             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1854
1855   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1856             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1857
1858   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1859             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1860
1861   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1862             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1863
1864   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1865             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1866 }
1867
1868 // Shift left long
1869 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1870 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1871
1872 class NeonI_ext_len_alias<string asmop, string lane, string laneOp,
1873                        Instruction inst, RegisterOperand VPRC,
1874                        RegisterOperand VPRCOp>
1875   : NeonInstAlias<asmop # "\t$Rd" # lane #", $Rn" # laneOp,
1876                   (inst VPRC:$Rd, VPRCOp:$Rn, 0), 0b0>;
1877
1878 // Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0
1879 // Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0
1880 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1881 // custom printing of aliases.
1882 def SXTLvv_8B  : NeonI_ext_len_alias<"sxtl", ".8h", ".8b",  SSHLLvvi_8B, VPR128, VPR64>;
1883 def SXTLvv_4H  : NeonI_ext_len_alias<"sxtl", ".4s", ".4h",  SSHLLvvi_4H, VPR128, VPR64>;
1884 def SXTLvv_2S  : NeonI_ext_len_alias<"sxtl", ".2d", ".2s",  SSHLLvvi_2S, VPR128, VPR64>;
1885 def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b",  SSHLLvvi_16B, VPR128, VPR128>;
1886 def SXTL2vv_8H  : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h",  SSHLLvvi_8H, VPR128, VPR128>;
1887 def SXTL2vv_4S  : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s",  SSHLLvvi_4S, VPR128, VPR128>;
1888
1889 // Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0
1890 // Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0
1891 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1892 // custom printing of aliases.
1893 def UXTLvv_8B  : NeonI_ext_len_alias<"uxtl", ".8h", ".8b",  USHLLvvi_8B, VPR128, VPR64>;
1894 def UXTLvv_4H  : NeonI_ext_len_alias<"uxtl", ".4s", ".4h",  USHLLvvi_4H, VPR128, VPR64>;
1895 def UXTLvv_2S  : NeonI_ext_len_alias<"uxtl", ".2d", ".2s",  USHLLvvi_2S, VPR128, VPR64>;
1896 def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b",  USHLLvvi_16B, VPR128, VPR128>;
1897 def UXTL2vv_8H  : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h",  USHLLvvi_8H, VPR128, VPR128>;
1898 def UXTL2vv_4S  : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s",  USHLLvvi_4S, VPR128, VPR128>;
1899
1900 def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
1901 def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
1902 def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
1903
1904 // Rounding/Saturating shift
1905 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1906                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1907                   SDPatternOperator OpNode>
1908   : NeonI_2VShiftImm<q, u, opcode,
1909                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1910                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1911                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1912                         (i32 ImmTy:$Imm))))],
1913                      NoItinerary>;
1914
1915 // shift right (vector by immediate)
1916 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1917                            SDPatternOperator OpNode> {
1918   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1919                          OpNode> {
1920     let Inst{22-19} = 0b0001;
1921   }
1922
1923   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1924                          OpNode> {
1925     let Inst{22-20} = 0b001;
1926   }
1927
1928   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1929                          OpNode> {
1930     let Inst{22-21} = 0b01;
1931   }
1932
1933   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1934                          OpNode> {
1935     let Inst{22-19} = 0b0001;
1936   }
1937
1938   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1939                         OpNode> {
1940     let Inst{22-20} = 0b001;
1941   }
1942
1943   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1944                         OpNode> {
1945     let Inst{22-21} = 0b01;
1946   }
1947
1948   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1949                         OpNode> {
1950     let Inst{22} = 0b1;
1951   }
1952 }
1953
1954 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1955                           SDPatternOperator OpNode> {
1956   // 64-bit vector types.
1957   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
1958                         OpNode> {
1959     let Inst{22-19} = 0b0001;
1960   }
1961
1962   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
1963                         OpNode> {
1964     let Inst{22-20} = 0b001;
1965   }
1966
1967   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
1968                         OpNode> {
1969     let Inst{22-21} = 0b01;
1970   }
1971
1972   // 128-bit vector types.
1973   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
1974                          OpNode> {
1975     let Inst{22-19} = 0b0001;
1976   }
1977
1978   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
1979                         OpNode> {
1980     let Inst{22-20} = 0b001;
1981   }
1982
1983   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
1984                         OpNode> {
1985     let Inst{22-21} = 0b01;
1986   }
1987
1988   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
1989                         OpNode> {
1990     let Inst{22} = 0b1;
1991   }
1992 }
1993
1994 // Rounding shift right
1995 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1996                                 int_aarch64_neon_vsrshr>;
1997 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1998                                 int_aarch64_neon_vurshr>;
1999
2000 // Saturating shift left unsigned
2001 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
2002
2003 // Saturating shift left
2004 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
2005 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
2006
2007 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
2008                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2009                   SDNode OpNode>
2010   : NeonI_2VShiftImm<q, u, opcode,
2011            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2012            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2013            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2014               (Ty (OpNode (Ty VPRC:$Rn),
2015                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
2016            NoItinerary> {
2017   let Constraints = "$src = $Rd";
2018 }
2019
2020 // Shift Right accumulate
2021 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
2022   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2023                         OpNode> {
2024     let Inst{22-19} = 0b0001;
2025   }
2026
2027   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2028                         OpNode> {
2029     let Inst{22-20} = 0b001;
2030   }
2031
2032   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2033                         OpNode> {
2034     let Inst{22-21} = 0b01;
2035   }
2036
2037   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2038                          OpNode> {
2039     let Inst{22-19} = 0b0001;
2040   }
2041
2042   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2043                         OpNode> {
2044     let Inst{22-20} = 0b001;
2045   }
2046
2047   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2048                         OpNode> {
2049     let Inst{22-21} = 0b01;
2050   }
2051
2052   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2053                         OpNode> {
2054     let Inst{22} = 0b1;
2055   }
2056 }
2057
2058 // Shift right and accumulate
2059 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
2060 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
2061
2062 // Rounding shift accumulate
2063 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
2064                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2065                     SDPatternOperator OpNode>
2066   : NeonI_2VShiftImm<q, u, opcode,
2067                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2068                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2069                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2070                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
2071                      NoItinerary> {
2072   let Constraints = "$src = $Rd";
2073 }
2074
2075 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
2076                              SDPatternOperator OpNode> {
2077   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2078                           OpNode> {
2079     let Inst{22-19} = 0b0001;
2080   }
2081
2082   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2083                           OpNode> {
2084     let Inst{22-20} = 0b001;
2085   }
2086
2087   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2088                           OpNode> {
2089     let Inst{22-21} = 0b01;
2090   }
2091
2092   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2093                            OpNode> {
2094     let Inst{22-19} = 0b0001;
2095   }
2096
2097   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2098                           OpNode> {
2099     let Inst{22-20} = 0b001;
2100   }
2101
2102   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2103                           OpNode> {
2104     let Inst{22-21} = 0b01;
2105   }
2106
2107   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2108                           OpNode> {
2109     let Inst{22} = 0b1;
2110   }
2111 }
2112
2113 // Rounding shift right and accumulate
2114 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
2115 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
2116
2117 // Shift insert by immediate
2118 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
2119                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2120                   SDPatternOperator OpNode>
2121     : NeonI_2VShiftImm<q, u, opcode,
2122            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2123            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2124            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
2125              (i32 ImmTy:$Imm))))],
2126            NoItinerary> {
2127   let Constraints = "$src = $Rd";
2128 }
2129
2130 // shift left insert (vector by immediate)
2131 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2132   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2133                         int_aarch64_neon_vsli> {
2134     let Inst{22-19} = 0b0001;
2135   }
2136
2137   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2138                         int_aarch64_neon_vsli> {
2139     let Inst{22-20} = 0b001;
2140   }
2141
2142   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2143                         int_aarch64_neon_vsli> {
2144     let Inst{22-21} = 0b01;
2145   }
2146
2147     // 128-bit vector types
2148   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2149                          int_aarch64_neon_vsli> {
2150     let Inst{22-19} = 0b0001;
2151   }
2152
2153   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2154                         int_aarch64_neon_vsli> {
2155     let Inst{22-20} = 0b001;
2156   }
2157
2158   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2159                         int_aarch64_neon_vsli> {
2160     let Inst{22-21} = 0b01;
2161   }
2162
2163   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2164                         int_aarch64_neon_vsli> {
2165     let Inst{22} = 0b1;
2166   }
2167 }
2168
2169 // shift right insert (vector by immediate)
2170 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2171     // 64-bit vector types.
2172   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2173                         int_aarch64_neon_vsri> {
2174     let Inst{22-19} = 0b0001;
2175   }
2176
2177   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2178                         int_aarch64_neon_vsri> {
2179     let Inst{22-20} = 0b001;
2180   }
2181
2182   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2183                         int_aarch64_neon_vsri> {
2184     let Inst{22-21} = 0b01;
2185   }
2186
2187     // 128-bit vector types
2188   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2189                          int_aarch64_neon_vsri> {
2190     let Inst{22-19} = 0b0001;
2191   }
2192
2193   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2194                         int_aarch64_neon_vsri> {
2195     let Inst{22-20} = 0b001;
2196   }
2197
2198   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2199                         int_aarch64_neon_vsri> {
2200     let Inst{22-21} = 0b01;
2201   }
2202
2203   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2204                         int_aarch64_neon_vsri> {
2205     let Inst{22} = 0b1;
2206   }
2207 }
2208
2209 // Shift left and insert
2210 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2211
2212 // Shift right and insert
2213 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2214
2215 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2216                     string SrcT, Operand ImmTy>
2217   : NeonI_2VShiftImm<q, u, opcode,
2218                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2219                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2220                      [], NoItinerary>;
2221
2222 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2223                        string SrcT, Operand ImmTy>
2224   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2225                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2226                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2227                      [], NoItinerary> {
2228   let Constraints = "$src = $Rd";
2229 }
2230
2231 // left long shift by immediate
2232 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2233   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2234     let Inst{22-19} = 0b0001;
2235   }
2236
2237   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2238     let Inst{22-20} = 0b001;
2239   }
2240
2241   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2242     let Inst{22-21} = 0b01;
2243   }
2244
2245   // Shift Narrow High
2246   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2247                               shr_imm8> {
2248     let Inst{22-19} = 0b0001;
2249   }
2250
2251   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2252                              shr_imm16> {
2253     let Inst{22-20} = 0b001;
2254   }
2255
2256   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2257                              shr_imm32> {
2258     let Inst{22-21} = 0b01;
2259   }
2260 }
2261
2262 // Shift right narrow
2263 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2264
2265 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2266 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2267 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2268 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2269 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2270 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2271 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2272 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2273
2274 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2275                               (v2i64 (concat_vectors (v1i64 node:$Rm),
2276                                                      (v1i64 node:$Rn)))>;
2277 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2278                               (v8i16 (concat_vectors (v4i16 node:$Rm),
2279                                                      (v4i16 node:$Rn)))>;
2280 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2281                               (v4i32 (concat_vectors (v2i32 node:$Rm),
2282                                                      (v2i32 node:$Rn)))>;
2283 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2284                               (v4f32 (concat_vectors (v2f32 node:$Rm),
2285                                                      (v2f32 node:$Rn)))>;
2286 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2287                               (v2f64 (concat_vectors (v1f64 node:$Rm),
2288                                                      (v1f64 node:$Rn)))>;
2289
2290 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2291                              (v8i16 (srl (v8i16 node:$lhs),
2292                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2293 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2294                              (v4i32 (srl (v4i32 node:$lhs),
2295                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2296 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2297                              (v2i64 (srl (v2i64 node:$lhs),
2298                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2299 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2300                              (v8i16 (sra (v8i16 node:$lhs),
2301                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2302 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2303                              (v4i32 (sra (v4i32 node:$lhs),
2304                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2305 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2306                              (v2i64 (sra (v2i64 node:$lhs),
2307                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2308
2309 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2310 multiclass Neon_shiftNarrow_patterns<string shr> {
2311   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2312               (i32 shr_imm8:$Imm)))),
2313             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2314   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2315               (i32 shr_imm16:$Imm)))),
2316             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2317   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2318               (i32 shr_imm32:$Imm)))),
2319             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2320
2321   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2322               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2323                 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2324             (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2325                          VPR128:$Rn, imm:$Imm)>;
2326   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2327               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2328                 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2329             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2330                         VPR128:$Rn, imm:$Imm)>;
2331   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2332               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2333                 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2334             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2335                         VPR128:$Rn, imm:$Imm)>;
2336 }
2337
2338 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2339   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2340             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2341   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2342             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2343   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2344             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2345
2346   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2347                 (v1i64 (bitconvert (v8i8
2348                     (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2349             (!cast<Instruction>(prefix # "_16B")
2350                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2351                 VPR128:$Rn, imm:$Imm)>;
2352   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2353                 (v1i64 (bitconvert (v4i16
2354                     (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2355             (!cast<Instruction>(prefix # "_8H")
2356                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2357                 VPR128:$Rn, imm:$Imm)>;
2358   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2359                 (v1i64 (bitconvert (v2i32
2360                     (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2361             (!cast<Instruction>(prefix # "_4S")
2362                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2363                   VPR128:$Rn, imm:$Imm)>;
2364 }
2365
2366 defm : Neon_shiftNarrow_patterns<"lshr">;
2367 defm : Neon_shiftNarrow_patterns<"ashr">;
2368
2369 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2370 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2371 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2372 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2373 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2374 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2375 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2376
2377 // Convert fix-point and float-pointing
2378 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2379                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2380                 Operand ImmTy, SDPatternOperator IntOp>
2381   : NeonI_2VShiftImm<q, u, opcode,
2382                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2383                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2384                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2385                        (i32 ImmTy:$Imm))))],
2386                      NoItinerary>;
2387
2388 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2389                               SDPatternOperator IntOp> {
2390   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2391                       shr_imm32, IntOp> {
2392     let Inst{22-21} = 0b01;
2393   }
2394
2395   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2396                       shr_imm32, IntOp> {
2397     let Inst{22-21} = 0b01;
2398   }
2399
2400   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2401                       shr_imm64, IntOp> {
2402     let Inst{22} = 0b1;
2403   }
2404 }
2405
2406 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2407                               SDPatternOperator IntOp> {
2408   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2409                       shr_imm32, IntOp> {
2410     let Inst{22-21} = 0b01;
2411   }
2412
2413   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2414                       shr_imm32, IntOp> {
2415     let Inst{22-21} = 0b01;
2416   }
2417
2418   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2419                       shr_imm64, IntOp> {
2420     let Inst{22} = 0b1;
2421   }
2422 }
2423
2424 // Convert fixed-point to floating-point
2425 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2426                                    int_arm_neon_vcvtfxs2fp>;
2427 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2428                                    int_arm_neon_vcvtfxu2fp>;
2429
2430 // Convert floating-point to fixed-point
2431 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2432                                    int_arm_neon_vcvtfp2fxs>;
2433 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2434                                    int_arm_neon_vcvtfp2fxu>;
2435
2436 multiclass Neon_sshll2_0<SDNode ext>
2437 {
2438   def _v8i8  : PatFrag<(ops node:$Rn),
2439                        (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2440   def _v4i16 : PatFrag<(ops node:$Rn),
2441                        (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2442   def _v2i32 : PatFrag<(ops node:$Rn),
2443                        (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2444 }
2445
2446 defm NI_sext_high : Neon_sshll2_0<sext>;
2447 defm NI_zext_high : Neon_sshll2_0<zext>;
2448
2449
2450 //===----------------------------------------------------------------------===//
2451 // Multiclasses for NeonI_Across
2452 //===----------------------------------------------------------------------===//
2453
2454 // Variant 1
2455
2456 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2457                             string asmop, SDPatternOperator opnode>
2458 {
2459     def _1h8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2460                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2461                 asmop # "\t$Rd, $Rn.8b",
2462                 [(set (v1i16 FPR16:$Rd),
2463                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2464                 NoItinerary>;
2465
2466     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2467                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2468                 asmop # "\t$Rd, $Rn.16b",
2469                 [(set (v1i16 FPR16:$Rd),
2470                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2471                 NoItinerary>;
2472
2473     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2474                 (outs FPR32:$Rd), (ins VPR64:$Rn),
2475                 asmop # "\t$Rd, $Rn.4h",
2476                 [(set (v1i32 FPR32:$Rd),
2477                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2478                 NoItinerary>;
2479
2480     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2481                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2482                 asmop # "\t$Rd, $Rn.8h",
2483                 [(set (v1i32 FPR32:$Rd),
2484                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2485                 NoItinerary>;
2486
2487     // _1d2s doesn't exist!
2488
2489     def _1d4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2490                 (outs FPR64:$Rd), (ins VPR128:$Rn),
2491                 asmop # "\t$Rd, $Rn.4s",
2492                 [(set (v1i64 FPR64:$Rd),
2493                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2494                 NoItinerary>;
2495 }
2496
2497 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2498 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2499
2500 // Variant 2
2501
2502 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2503                             string asmop, SDPatternOperator opnode>
2504 {
2505     def _1b8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2506                 (outs FPR8:$Rd), (ins VPR64:$Rn),
2507                 asmop # "\t$Rd, $Rn.8b",
2508                 [(set (v1i8 FPR8:$Rd),
2509                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2510                 NoItinerary>;
2511
2512     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2513                 (outs FPR8:$Rd), (ins VPR128:$Rn),
2514                 asmop # "\t$Rd, $Rn.16b",
2515                 [(set (v1i8 FPR8:$Rd),
2516                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2517                 NoItinerary>;
2518
2519     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2520                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2521                 asmop # "\t$Rd, $Rn.4h",
2522                 [(set (v1i16 FPR16:$Rd),
2523                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2524                 NoItinerary>;
2525
2526     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2527                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2528                 asmop # "\t$Rd, $Rn.8h",
2529                 [(set (v1i16 FPR16:$Rd),
2530                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2531                 NoItinerary>;
2532
2533     // _1s2s doesn't exist!
2534
2535     def _1s4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2536                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2537                 asmop # "\t$Rd, $Rn.4s",
2538                 [(set (v1i32 FPR32:$Rd),
2539                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2540                 NoItinerary>;
2541 }
2542
2543 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2544 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2545
2546 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2547 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2548
2549 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2550
2551 // Variant 3
2552
2553 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2554                             string asmop, SDPatternOperator opnode> {
2555     def _1s4s:  NeonI_2VAcross<0b1, u, size, opcode,
2556                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2557                 asmop # "\t$Rd, $Rn.4s",
2558                 [(set (f32 FPR32:$Rd),
2559                     (f32 (opnode (v4f32 VPR128:$Rn))))],
2560                 NoItinerary>;
2561 }
2562
2563 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2564                                 int_aarch64_neon_vmaxnmv>;
2565 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2566                                 int_aarch64_neon_vminnmv>;
2567
2568 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2569                               int_aarch64_neon_vmaxv>;
2570 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2571                               int_aarch64_neon_vminv>;
2572
2573 // The followings are for instruction class (Perm)
2574
2575 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2576                     string asmop, RegisterOperand OpVPR, string OpS,
2577                     SDPatternOperator opnode, ValueType Ty>
2578   : NeonI_Perm<q, size, opcode,
2579                (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2580                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2581                [(set (Ty OpVPR:$Rd),
2582                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2583                NoItinerary>;
2584
2585 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2586                           SDPatternOperator opnode> {
2587   def _8b  : NeonI_Permute<0b0, 0b00, opcode, asmop,
2588                            VPR64, "8b", opnode, v8i8>;
2589   def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2590                            VPR128, "16b",opnode, v16i8>;
2591   def _4h  : NeonI_Permute<0b0, 0b01, opcode, asmop,
2592                            VPR64, "4h", opnode, v4i16>;
2593   def _8h  : NeonI_Permute<0b1, 0b01, opcode, asmop,
2594                            VPR128, "8h", opnode, v8i16>;
2595   def _2s  : NeonI_Permute<0b0, 0b10, opcode, asmop,
2596                            VPR64, "2s", opnode, v2i32>;
2597   def _4s  : NeonI_Permute<0b1, 0b10, opcode, asmop,
2598                            VPR128, "4s", opnode, v4i32>;
2599   def _2d  : NeonI_Permute<0b1, 0b11, opcode, asmop,
2600                            VPR128, "2d", opnode, v2i64>;
2601 }
2602
2603 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2604 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2605 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2606 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2607 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2608 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2609
2610 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2611   def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2612             (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2613
2614   def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2615             (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2616
2617   def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2618             (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2619 }
2620
2621 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2622 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2623 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2624 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2625 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2626 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2627
2628 // The followings are for instruction class (3V Diff)
2629
2630 // normal long/long2 pattern
2631 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2632                  string asmop, string ResS, string OpS,
2633                  SDPatternOperator opnode, SDPatternOperator ext,
2634                  RegisterOperand OpVPR,
2635                  ValueType ResTy, ValueType OpTy>
2636   : NeonI_3VDiff<q, u, size, opcode,
2637                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2638                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2639                  [(set (ResTy VPR128:$Rd),
2640                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2641                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2642                  NoItinerary>;
2643
2644 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2645                         string asmop, SDPatternOperator opnode,
2646                         bit Commutable = 0> {
2647   let isCommutable = Commutable in {
2648     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2649                            opnode, sext, VPR64, v8i16, v8i8>;
2650     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2651                            opnode, sext, VPR64, v4i32, v4i16>;
2652     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2653                            opnode, sext, VPR64, v2i64, v2i32>;
2654   }
2655 }
2656
2657 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2658                          SDPatternOperator opnode, bit Commutable = 0> {
2659   let isCommutable = Commutable in {
2660     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2661                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2662     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2663                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2664     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2665                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2666   }
2667 }
2668
2669 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2670                         SDPatternOperator opnode, bit Commutable = 0> {
2671   let isCommutable = Commutable in {
2672     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2673                            opnode, zext, VPR64, v8i16, v8i8>;
2674     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2675                            opnode, zext, VPR64, v4i32, v4i16>;
2676     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2677                            opnode, zext, VPR64, v2i64, v2i32>;
2678   }
2679 }
2680
2681 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2682                          SDPatternOperator opnode, bit Commutable = 0> {
2683   let isCommutable = Commutable in {
2684     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2685                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2686     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2687                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2688     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2689                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2690   }
2691 }
2692
2693 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2694 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2695
2696 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2697 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2698
2699 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2700 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2701
2702 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2703 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2704
2705 // normal wide/wide2 pattern
2706 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2707                  string asmop, string ResS, string OpS,
2708                  SDPatternOperator opnode, SDPatternOperator ext,
2709                  RegisterOperand OpVPR,
2710                  ValueType ResTy, ValueType OpTy>
2711   : NeonI_3VDiff<q, u, size, opcode,
2712                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2713                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2714                  [(set (ResTy VPR128:$Rd),
2715                     (ResTy (opnode (ResTy VPR128:$Rn),
2716                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2717                  NoItinerary>;
2718
2719 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2720                         SDPatternOperator opnode> {
2721   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2722                          opnode, sext, VPR64, v8i16, v8i8>;
2723   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2724                          opnode, sext, VPR64, v4i32, v4i16>;
2725   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2726                          opnode, sext, VPR64, v2i64, v2i32>;
2727 }
2728
2729 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2730 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2731
2732 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2733                          SDPatternOperator opnode> {
2734   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2735                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2736   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2737                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2738   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2739                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2740 }
2741
2742 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2743 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2744
2745 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2746                         SDPatternOperator opnode> {
2747   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2748                          opnode, zext, VPR64, v8i16, v8i8>;
2749   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2750                          opnode, zext, VPR64, v4i32, v4i16>;
2751   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2752                          opnode, zext, VPR64, v2i64, v2i32>;
2753 }
2754
2755 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2756 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2757
2758 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2759                          SDPatternOperator opnode> {
2760   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2761                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2762   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2763                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2764   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2765                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2766 }
2767
2768 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2769 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2770
2771 // Get the high half part of the vector element.
2772 multiclass NeonI_get_high {
2773   def _8h : PatFrag<(ops node:$Rn),
2774                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2775                                              (v8i16 (Neon_vdup (i32 8)))))))>;
2776   def _4s : PatFrag<(ops node:$Rn),
2777                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2778                                               (v4i32 (Neon_vdup (i32 16)))))))>;
2779   def _2d : PatFrag<(ops node:$Rn),
2780                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2781                                               (v2i64 (Neon_vdup (i32 32)))))))>;
2782 }
2783
2784 defm NI_get_hi : NeonI_get_high;
2785
2786 // pattern for addhn/subhn with 2 operands
2787 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2788                            string asmop, string ResS, string OpS,
2789                            SDPatternOperator opnode, SDPatternOperator get_hi,
2790                            ValueType ResTy, ValueType OpTy>
2791   : NeonI_3VDiff<q, u, size, opcode,
2792                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2793                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2794                  [(set (ResTy VPR64:$Rd),
2795                     (ResTy (get_hi
2796                       (OpTy (opnode (OpTy VPR128:$Rn),
2797                                     (OpTy VPR128:$Rm))))))],
2798                  NoItinerary>;
2799
2800 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2801                                 SDPatternOperator opnode, bit Commutable = 0> {
2802   let isCommutable = Commutable in {
2803     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2804                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2805     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2806                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2807     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2808                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2809   }
2810 }
2811
2812 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2813 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2814
2815 // pattern for operation with 2 operands
2816 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2817                     string asmop, string ResS, string OpS,
2818                     SDPatternOperator opnode,
2819                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2820                     ValueType ResTy, ValueType OpTy>
2821   : NeonI_3VDiff<q, u, size, opcode,
2822                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2823                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2824                  [(set (ResTy ResVPR:$Rd),
2825                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2826                  NoItinerary>;
2827
2828 // normal narrow pattern
2829 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2830                           SDPatternOperator opnode, bit Commutable = 0> {
2831   let isCommutable = Commutable in {
2832     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2833                               opnode, VPR64, VPR128, v8i8, v8i16>;
2834     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2835                               opnode, VPR64, VPR128, v4i16, v4i32>;
2836     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2837                               opnode, VPR64, VPR128, v2i32, v2i64>;
2838   }
2839 }
2840
2841 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2842 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2843
2844 // pattern for acle intrinsic with 3 operands
2845 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2846                      string asmop, string ResS, string OpS>
2847   : NeonI_3VDiff<q, u, size, opcode,
2848                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2849                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2850                  [], NoItinerary> {
2851   let Constraints = "$src = $Rd";
2852   let neverHasSideEffects = 1;
2853 }
2854
2855 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2856   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2857   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2858   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2859 }
2860
2861 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2862 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2863
2864 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2865 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2866
2867 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2868 // part.
2869 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2870                         SDPatternOperator coreop>
2871   : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2872                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2873                                                         (SrcTy VPR128:$Rm)))))),
2874         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2875               VPR128:$Rn, VPR128:$Rm)>;
2876
2877 // addhn2 patterns
2878 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2879           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2880 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2881           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2882 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2883           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2884
2885 // subhn2 patterns
2886 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2887           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2888 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2889           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2890 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2891           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2892
2893 // raddhn2 patterns
2894 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2895 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2896 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2897
2898 // rsubhn2 patterns
2899 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2900 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2901 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2902
2903 // pattern that need to extend result
2904 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2905                      string asmop, string ResS, string OpS,
2906                      SDPatternOperator opnode,
2907                      RegisterOperand OpVPR,
2908                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2909   : NeonI_3VDiff<q, u, size, opcode,
2910                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2911                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2912                  [(set (ResTy VPR128:$Rd),
2913                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2914                                                 (OpTy OpVPR:$Rm))))))],
2915                  NoItinerary>;
2916
2917 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2918                            SDPatternOperator opnode, bit Commutable = 0> {
2919   let isCommutable = Commutable in {
2920     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2921                                opnode, VPR64, v8i16, v8i8, v8i8>;
2922     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2923                                opnode, VPR64, v4i32, v4i16, v4i16>;
2924     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2925                                opnode, VPR64, v2i64, v2i32, v2i32>;
2926   }
2927 }
2928
2929 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2930 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2931
2932 multiclass NeonI_Op_High<SDPatternOperator op> {
2933   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2934                      (op (v8i8 (Neon_High16B node:$Rn)),
2935                          (v8i8 (Neon_High16B node:$Rm)))>;
2936   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
2937                      (op (v4i16 (Neon_High8H node:$Rn)),
2938                          (v4i16 (Neon_High8H node:$Rm)))>;
2939   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
2940                      (op (v2i32 (Neon_High4S node:$Rn)),
2941                          (v2i32 (Neon_High4S node:$Rm)))>;
2942 }
2943
2944 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2945 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2946 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2947 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2948 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2949 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2950
2951 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2952                             bit Commutable = 0> {
2953   let isCommutable = Commutable in {
2954     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2955                                 !cast<PatFrag>(opnode # "_16B"),
2956                                 VPR128, v8i16, v16i8, v8i8>;
2957     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2958                                 !cast<PatFrag>(opnode # "_8H"),
2959                                 VPR128, v4i32, v8i16, v4i16>;
2960     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2961                                 !cast<PatFrag>(opnode # "_4S"),
2962                                 VPR128, v2i64, v4i32, v2i32>;
2963   }
2964 }
2965
2966 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2967 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2968
2969 // For pattern that need two operators being chained.
2970 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2971                      string asmop, string ResS, string OpS,
2972                      SDPatternOperator opnode, SDPatternOperator subop,
2973                      RegisterOperand OpVPR,
2974                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2975   : NeonI_3VDiff<q, u, size, opcode,
2976                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2977                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2978                  [(set (ResTy VPR128:$Rd),
2979                     (ResTy (opnode
2980                       (ResTy VPR128:$src),
2981                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2982                                                  (OpTy OpVPR:$Rm))))))))],
2983                  NoItinerary> {
2984   let Constraints = "$src = $Rd";
2985 }
2986
2987 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2988                              SDPatternOperator opnode, SDPatternOperator subop>{
2989   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2990                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2991   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2992                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2993   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2994                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2995 }
2996
2997 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2998                                    add, int_arm_neon_vabds>;
2999 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
3000                                    add, int_arm_neon_vabdu>;
3001
3002 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
3003                               SDPatternOperator opnode, string subop> {
3004   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3005                              opnode, !cast<PatFrag>(subop # "_16B"),
3006                              VPR128, v8i16, v16i8, v8i8>;
3007   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3008                              opnode, !cast<PatFrag>(subop # "_8H"),
3009                              VPR128, v4i32, v8i16, v4i16>;
3010   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3011                              opnode, !cast<PatFrag>(subop # "_4S"),
3012                              VPR128, v2i64, v4i32, v2i32>;
3013 }
3014
3015 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
3016                                      "NI_sabdl_hi">;
3017 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
3018                                      "NI_uabdl_hi">;
3019
3020 // Long pattern with 2 operands
3021 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
3022                           SDPatternOperator opnode, bit Commutable = 0> {
3023   let isCommutable = Commutable in {
3024     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3025                               opnode, VPR128, VPR64, v8i16, v8i8>;
3026     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3027                               opnode, VPR128, VPR64, v4i32, v4i16>;
3028     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3029                               opnode, VPR128, VPR64, v2i64, v2i32>;
3030   }
3031 }
3032
3033 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
3034 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
3035
3036 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
3037                            string asmop, string ResS, string OpS,
3038                            SDPatternOperator opnode,
3039                            ValueType ResTy, ValueType OpTy>
3040   : NeonI_3VDiff<q, u, size, opcode,
3041                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3042                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3043                  [(set (ResTy VPR128:$Rd),
3044                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
3045                  NoItinerary>;
3046
3047 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
3048                                    string opnode, bit Commutable = 0> {
3049   let isCommutable = Commutable in {
3050     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3051                                       !cast<PatFrag>(opnode # "_16B"),
3052                                       v8i16, v16i8>;
3053     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3054                                      !cast<PatFrag>(opnode # "_8H"),
3055                                      v4i32, v8i16>;
3056     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3057                                      !cast<PatFrag>(opnode # "_4S"),
3058                                      v2i64, v4i32>;
3059   }
3060 }
3061
3062 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
3063                                          "NI_smull_hi", 1>;
3064 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
3065                                          "NI_umull_hi", 1>;
3066
3067 // Long pattern with 3 operands
3068 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
3069                      string asmop, string ResS, string OpS,
3070                      SDPatternOperator opnode,
3071                      ValueType ResTy, ValueType OpTy>
3072   : NeonI_3VDiff<q, u, size, opcode,
3073                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
3074                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3075                  [(set (ResTy VPR128:$Rd),
3076                     (ResTy (opnode
3077                       (ResTy VPR128:$src),
3078                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
3079                NoItinerary> {
3080   let Constraints = "$src = $Rd";
3081 }
3082
3083 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
3084                              SDPatternOperator opnode> {
3085   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3086                              opnode, v8i16, v8i8>;
3087   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3088                              opnode, v4i32, v4i16>;
3089   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3090                              opnode, v2i64, v2i32>;
3091 }
3092
3093 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3094                          (add node:$Rd,
3095                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3096
3097 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3098                          (add node:$Rd,
3099                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3100
3101 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3102                          (sub node:$Rd,
3103                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3104
3105 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3106                          (sub node:$Rd,
3107                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3108
3109 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
3110 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
3111
3112 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
3113 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
3114
3115 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
3116                            string asmop, string ResS, string OpS,
3117                            SDPatternOperator subop, SDPatternOperator opnode,
3118                            RegisterOperand OpVPR,
3119                            ValueType ResTy, ValueType OpTy>
3120   : NeonI_3VDiff<q, u, size, opcode,
3121                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3122                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3123                [(set (ResTy VPR128:$Rd),
3124                   (ResTy (subop
3125                     (ResTy VPR128:$src),
3126                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3127                NoItinerary> {
3128   let Constraints = "$src = $Rd";
3129 }
3130
3131 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3132                                    SDPatternOperator subop, string opnode> {
3133   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3134                                     subop, !cast<PatFrag>(opnode # "_16B"),
3135                                     VPR128, v8i16, v16i8>;
3136   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3137                                    subop, !cast<PatFrag>(opnode # "_8H"),
3138                                    VPR128, v4i32, v8i16>;
3139   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3140                                    subop, !cast<PatFrag>(opnode # "_4S"),
3141                                    VPR128, v2i64, v4i32>;
3142 }
3143
3144 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3145                                           add, "NI_smull_hi">;
3146 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3147                                           add, "NI_umull_hi">;
3148
3149 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3150                                           sub, "NI_smull_hi">;
3151 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3152                                           sub, "NI_umull_hi">;
3153
3154 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3155                                     SDPatternOperator opnode> {
3156   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3157                                    opnode, int_arm_neon_vqdmull,
3158                                    VPR64, v4i32, v4i16>;
3159   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3160                                    opnode, int_arm_neon_vqdmull,
3161                                    VPR64, v2i64, v2i32>;
3162 }
3163
3164 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3165                                            int_arm_neon_vqadds>;
3166 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3167                                            int_arm_neon_vqsubs>;
3168
3169 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3170                          SDPatternOperator opnode, bit Commutable = 0> {
3171   let isCommutable = Commutable in {
3172     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3173                               opnode, VPR128, VPR64, v4i32, v4i16>;
3174     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3175                               opnode, VPR128, VPR64, v2i64, v2i32>;
3176   }
3177 }
3178
3179 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3180                                 int_arm_neon_vqdmull, 1>;
3181
3182 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3183                                    string opnode, bit Commutable = 0> {
3184   let isCommutable = Commutable in {
3185     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3186                                      !cast<PatFrag>(opnode # "_8H"),
3187                                      v4i32, v8i16>;
3188     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3189                                      !cast<PatFrag>(opnode # "_4S"),
3190                                      v2i64, v4i32>;
3191   }
3192 }
3193
3194 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3195                                            "NI_qdmull_hi", 1>;
3196
3197 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3198                                      SDPatternOperator opnode> {
3199   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3200                                    opnode, NI_qdmull_hi_8H,
3201                                    VPR128, v4i32, v8i16>;
3202   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3203                                    opnode, NI_qdmull_hi_4S,
3204                                    VPR128, v2i64, v4i32>;
3205 }
3206
3207 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3208                                              int_arm_neon_vqadds>;
3209 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3210                                              int_arm_neon_vqsubs>;
3211
3212 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3213                          SDPatternOperator opnode_8h8b,
3214                          SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3215   let isCommutable = Commutable in {
3216     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3217                               opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3218
3219     def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3220                               opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3221   }
3222 }
3223
3224 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3225                               int_aarch64_neon_vmull_p64, 1>;
3226
3227 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3228                                    string opnode, bit Commutable = 0> {
3229   let isCommutable = Commutable in {
3230     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3231                                       !cast<PatFrag>(opnode # "_16B"),
3232                                       v8i16, v16i8>;
3233
3234     def _1q2d : 
3235       NeonI_3VDiff<0b1, u, 0b11, opcode,
3236                    (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3237                    asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3238                    [(set (v16i8 VPR128:$Rd),
3239                       (v16i8 (int_aarch64_neon_vmull_p64 
3240                         (v1i64 (scalar_to_vector
3241                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3242                         (v1i64 (scalar_to_vector
3243                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3244                    NoItinerary>;
3245   }
3246 }
3247
3248 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3249                                          1>;
3250
3251 // End of implementation for instruction class (3V Diff)
3252
3253 // The followings are vector load/store multiple N-element structure
3254 // (class SIMD lselem).
3255
3256 // ld1:         load multiple 1-element structure to 1/2/3/4 registers.
3257 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3258 //              The structure consists of a sequence of sets of N values.
3259 //              The first element of the structure is placed in the first lane
3260 //              of the first first vector, the second element in the first lane
3261 //              of the second vector, and so on.
3262 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3263 // the three 64-bit vectors list {BA, DC, FE}.
3264 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3265 // 64-bit vectors list {DA, EB, FC}.
3266 // Store instructions store multiple structure to N registers like load.
3267
3268
3269 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3270                     RegisterOperand VecList, string asmop>
3271   : NeonI_LdStMult<q, 1, opcode, size,
3272                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3273                  asmop # "\t$Rt, [$Rn]",
3274                  [],
3275                  NoItinerary> {
3276   let mayLoad = 1;
3277   let neverHasSideEffects = 1;
3278 }
3279
3280 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3281   def _8B : NeonI_LDVList<0, opcode, 0b00,
3282                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3283
3284   def _4H : NeonI_LDVList<0, opcode, 0b01,
3285                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3286
3287   def _2S : NeonI_LDVList<0, opcode, 0b10,
3288                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3289
3290   def _16B : NeonI_LDVList<1, opcode, 0b00,
3291                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3292
3293   def _8H : NeonI_LDVList<1, opcode, 0b01,
3294                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3295
3296   def _4S : NeonI_LDVList<1, opcode, 0b10,
3297                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3298
3299   def _2D : NeonI_LDVList<1, opcode, 0b11,
3300                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3301 }
3302
3303 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3304 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3305 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3306
3307 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3308
3309 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3310
3311 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3312
3313 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3314 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3315 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3316
3317 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3318 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3319
3320 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3321 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3322
3323 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3324                     RegisterOperand VecList, string asmop>
3325   : NeonI_LdStMult<q, 0, opcode, size,
3326                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3327                  asmop # "\t$Rt, [$Rn]",
3328                  [],
3329                  NoItinerary> {
3330   let mayStore = 1;
3331   let neverHasSideEffects = 1;
3332 }
3333
3334 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3335   def _8B : NeonI_STVList<0, opcode, 0b00,
3336                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3337
3338   def _4H : NeonI_STVList<0, opcode, 0b01,
3339                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3340
3341   def _2S : NeonI_STVList<0, opcode, 0b10,
3342                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3343
3344   def _16B : NeonI_STVList<1, opcode, 0b00,
3345                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3346
3347   def _8H : NeonI_STVList<1, opcode, 0b01,
3348                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3349
3350   def _4S : NeonI_STVList<1, opcode, 0b10,
3351                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3352
3353   def _2D : NeonI_STVList<1, opcode, 0b11,
3354                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3355 }
3356
3357 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3358 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3359 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3360
3361 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3362
3363 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3364
3365 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3366
3367 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3368 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3369 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3370
3371 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3372 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3373
3374 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3375 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3376
3377 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3378 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3379
3380 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3381 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3382
3383 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3384 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3385
3386 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3387 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3388
3389 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3390 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3391
3392 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3393 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3394
3395 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3396           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3397 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3398           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3399
3400 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3401           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3402 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3403           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3404
3405 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3406           (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3407 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3408           (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3409
3410 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3411           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3412 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3413           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3414
3415 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3416           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3417 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3418           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3419
3420 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3421           (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3422 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3423           (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3424
3425 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3426 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3427 // these patterns are not needed any more.
3428 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3429 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3430 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3431
3432 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3433           (LSFP8_STR $value, $addr, 0)>;
3434 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3435           (LSFP16_STR $value, $addr, 0)>;
3436 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3437           (LSFP32_STR $value, $addr, 0)>;
3438
3439
3440 // End of vector load/store multiple N-element structure(class SIMD lselem)
3441
3442 // The followings are post-index vector load/store multiple N-element
3443 // structure(class SIMD lselem-post)
3444 def exact1_asmoperand : AsmOperandClass {
3445   let Name = "Exact1";
3446   let PredicateMethod = "isExactImm<1>";
3447   let RenderMethod = "addImmOperands";
3448 }
3449 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3450   let ParserMatchClass = exact1_asmoperand;
3451 }
3452
3453 def exact2_asmoperand : AsmOperandClass {
3454   let Name = "Exact2";
3455   let PredicateMethod = "isExactImm<2>";
3456   let RenderMethod = "addImmOperands";
3457 }
3458 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3459   let ParserMatchClass = exact2_asmoperand;
3460 }
3461
3462 def exact3_asmoperand : AsmOperandClass {
3463   let Name = "Exact3";
3464   let PredicateMethod = "isExactImm<3>";
3465   let RenderMethod = "addImmOperands";
3466 }
3467 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3468   let ParserMatchClass = exact3_asmoperand;
3469 }
3470
3471 def exact4_asmoperand : AsmOperandClass {
3472   let Name = "Exact4";
3473   let PredicateMethod = "isExactImm<4>";
3474   let RenderMethod = "addImmOperands";
3475 }
3476 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3477   let ParserMatchClass = exact4_asmoperand;
3478 }
3479
3480 def exact6_asmoperand : AsmOperandClass {
3481   let Name = "Exact6";
3482   let PredicateMethod = "isExactImm<6>";
3483   let RenderMethod = "addImmOperands";
3484 }
3485 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3486   let ParserMatchClass = exact6_asmoperand;
3487 }
3488
3489 def exact8_asmoperand : AsmOperandClass {
3490   let Name = "Exact8";
3491   let PredicateMethod = "isExactImm<8>";
3492   let RenderMethod = "addImmOperands";
3493 }
3494 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3495   let ParserMatchClass = exact8_asmoperand;
3496 }
3497
3498 def exact12_asmoperand : AsmOperandClass {
3499   let Name = "Exact12";
3500   let PredicateMethod = "isExactImm<12>";
3501   let RenderMethod = "addImmOperands";
3502 }
3503 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3504   let ParserMatchClass = exact12_asmoperand;
3505 }
3506
3507 def exact16_asmoperand : AsmOperandClass {
3508   let Name = "Exact16";
3509   let PredicateMethod = "isExactImm<16>";
3510   let RenderMethod = "addImmOperands";
3511 }
3512 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3513   let ParserMatchClass = exact16_asmoperand;
3514 }
3515
3516 def exact24_asmoperand : AsmOperandClass {
3517   let Name = "Exact24";
3518   let PredicateMethod = "isExactImm<24>";
3519   let RenderMethod = "addImmOperands";
3520 }
3521 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3522   let ParserMatchClass = exact24_asmoperand;
3523 }
3524
3525 def exact32_asmoperand : AsmOperandClass {
3526   let Name = "Exact32";
3527   let PredicateMethod = "isExactImm<32>";
3528   let RenderMethod = "addImmOperands";
3529 }
3530 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3531   let ParserMatchClass = exact32_asmoperand;
3532 }
3533
3534 def exact48_asmoperand : AsmOperandClass {
3535   let Name = "Exact48";
3536   let PredicateMethod = "isExactImm<48>";
3537   let RenderMethod = "addImmOperands";
3538 }
3539 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3540   let ParserMatchClass = exact48_asmoperand;
3541 }
3542
3543 def exact64_asmoperand : AsmOperandClass {
3544   let Name = "Exact64";
3545   let PredicateMethod = "isExactImm<64>";
3546   let RenderMethod = "addImmOperands";
3547 }
3548 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3549   let ParserMatchClass = exact64_asmoperand;
3550 }
3551
3552 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3553                            RegisterOperand VecList, Operand ImmTy,
3554                            string asmop> {
3555   let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3556       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3557     def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3558                      (outs VecList:$Rt, GPR64xsp:$wb),
3559                      (ins GPR64xsp:$Rn, ImmTy:$amt),
3560                      asmop # "\t$Rt, [$Rn], $amt",
3561                      [],
3562                      NoItinerary> {
3563       let Rm = 0b11111;
3564     }
3565
3566     def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3567                         (outs VecList:$Rt, GPR64xsp:$wb),
3568                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3569                         asmop # "\t$Rt, [$Rn], $Rm",
3570                         [],
3571                         NoItinerary>;
3572   }
3573 }
3574
3575 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3576     Operand ImmTy2, string asmop> {
3577   defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3578                               !cast<RegisterOperand>(List # "8B_operand"),
3579                               ImmTy, asmop>;
3580
3581   defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3582                               !cast<RegisterOperand>(List # "4H_operand"),
3583                               ImmTy, asmop>;
3584
3585   defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3586                               !cast<RegisterOperand>(List # "2S_operand"),
3587                               ImmTy, asmop>;
3588
3589   defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3590                                !cast<RegisterOperand>(List # "16B_operand"),
3591                                ImmTy2, asmop>;
3592
3593   defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3594                               !cast<RegisterOperand>(List # "8H_operand"),
3595                               ImmTy2, asmop>;
3596
3597   defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3598                               !cast<RegisterOperand>(List # "4S_operand"),
3599                               ImmTy2, asmop>;
3600
3601   defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3602                               !cast<RegisterOperand>(List # "2D_operand"),
3603                               ImmTy2, asmop>;
3604 }
3605
3606 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3607 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3608 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3609                                  "ld1">;
3610
3611 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3612
3613 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3614                              "ld3">;
3615
3616 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3617
3618 // Post-index load multiple 1-element structures from N consecutive registers
3619 // (N = 2,3,4)
3620 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3621                                "ld1">;
3622 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3623                                    uimm_exact16, "ld1">;
3624
3625 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3626                                "ld1">;
3627 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3628                                    uimm_exact24, "ld1">;
3629
3630 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3631                                 "ld1">;
3632 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3633                                    uimm_exact32, "ld1">;
3634
3635 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3636                             RegisterOperand VecList, Operand ImmTy,
3637                             string asmop> {
3638   let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3639       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3640     def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3641                      (outs GPR64xsp:$wb),
3642                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3643                      asmop # "\t$Rt, [$Rn], $amt",
3644                      [],
3645                      NoItinerary> {
3646       let Rm = 0b11111;
3647     }
3648
3649     def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3650                       (outs GPR64xsp:$wb),
3651                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3652                       asmop # "\t$Rt, [$Rn], $Rm",
3653                       [],
3654                       NoItinerary>;
3655   }
3656 }
3657
3658 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3659                            Operand ImmTy2, string asmop> {
3660   defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3661                  !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3662
3663   defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3664                               !cast<RegisterOperand>(List # "4H_operand"),
3665                               ImmTy, asmop>;
3666
3667   defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3668                               !cast<RegisterOperand>(List # "2S_operand"),
3669                               ImmTy, asmop>;
3670
3671   defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3672                                !cast<RegisterOperand>(List # "16B_operand"),
3673                                ImmTy2, asmop>;
3674
3675   defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3676                               !cast<RegisterOperand>(List # "8H_operand"),
3677                               ImmTy2, asmop>;
3678
3679   defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3680                               !cast<RegisterOperand>(List # "4S_operand"),
3681                               ImmTy2, asmop>;
3682
3683   defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3684                               !cast<RegisterOperand>(List # "2D_operand"),
3685                               ImmTy2, asmop>;
3686 }
3687
3688 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3689 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3690 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3691                                  "st1">;
3692
3693 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3694
3695 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3696                              "st3">;
3697
3698 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3699
3700 // Post-index load multiple 1-element structures from N consecutive registers
3701 // (N = 2,3,4)
3702 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3703                                "st1">;
3704 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3705                                    uimm_exact16, "st1">;
3706
3707 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3708                                "st1">;
3709 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3710                                    uimm_exact24, "st1">;
3711
3712 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3713                                "st1">;
3714 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3715                                    uimm_exact32, "st1">;
3716
3717 // End of post-index vector load/store multiple N-element structure
3718 // (class SIMD lselem-post)
3719
3720 // The followings are vector load/store single N-element structure
3721 // (class SIMD lsone).
3722 def neon_uimm0_bare : Operand<i64>,
3723                         ImmLeaf<i64, [{return Imm == 0;}]> {
3724   let ParserMatchClass = neon_uimm0_asmoperand;
3725   let PrintMethod = "printUImmBareOperand";
3726 }
3727
3728 def neon_uimm1_bare : Operand<i64>,
3729                         ImmLeaf<i64, [{return Imm < 2;}]> {
3730   let ParserMatchClass = neon_uimm1_asmoperand;
3731   let PrintMethod = "printUImmBareOperand";
3732 }
3733
3734 def neon_uimm2_bare : Operand<i64>,
3735                         ImmLeaf<i64, [{return Imm < 4;}]> {
3736   let ParserMatchClass = neon_uimm2_asmoperand;
3737   let PrintMethod = "printUImmBareOperand";
3738 }
3739
3740 def neon_uimm3_bare : Operand<i64>,
3741                         ImmLeaf<i64, [{return Imm < 8;}]> {
3742   let ParserMatchClass = uimm3_asmoperand;
3743   let PrintMethod = "printUImmBareOperand";
3744 }
3745
3746 def neon_uimm4_bare : Operand<i64>,
3747                         ImmLeaf<i64, [{return Imm < 16;}]> {
3748   let ParserMatchClass = uimm4_asmoperand;
3749   let PrintMethod = "printUImmBareOperand";
3750 }
3751
3752 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3753                     RegisterOperand VecList, string asmop>
3754     : NeonI_LdOne_Dup<q, r, opcode, size,
3755                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3756                       asmop # "\t$Rt, [$Rn]",
3757                       [],
3758                       NoItinerary> {
3759   let mayLoad = 1;
3760   let neverHasSideEffects = 1;
3761 }
3762
3763 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3764   def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3765                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3766
3767   def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3768                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3769
3770   def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3771                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3772
3773   def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3774                           !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3775
3776   def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3777                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3778
3779   def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3780                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3781
3782   def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3783                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3784
3785   def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3786                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3787 }
3788
3789 // Load single 1-element structure to all lanes of 1 register
3790 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3791
3792 // Load single N-element structure to all lanes of N consecutive
3793 // registers (N = 2,3,4)
3794 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3795 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3796 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3797
3798
3799 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3800                     Instruction INST>
3801     : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3802           (VTy (INST GPR64xsp:$Rn))>;
3803
3804 // Match all LD1R instructions
3805 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3806
3807 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3808
3809 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3810
3811 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3812
3813 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3814 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3815
3816 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3817 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3818
3819 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3820 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3821
3822 class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3823                        Instruction INST>
3824   : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
3825         (VTy (INST GPR64xsp:$Rn))>;
3826
3827 def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
3828 def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
3829
3830 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3831                                 RegisterClass RegList> {
3832   defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3833   defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3834   defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3835   defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3836 }
3837
3838 // Special vector list operand of 128-bit vectors with bare layout.
3839 // i.e. only show ".b", ".h", ".s", ".d"
3840 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3841 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3842 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3843 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3844
3845 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3846                      Operand ImmOp, string asmop>
3847     : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3848                          (outs VList:$Rt),
3849                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3850                          asmop # "\t$Rt[$lane], [$Rn]",
3851                          [],
3852                          NoItinerary> {
3853   let mayLoad = 1;
3854   let neverHasSideEffects = 1;
3855   let hasExtraDefRegAllocReq = 1;
3856   let Constraints = "$src = $Rt";
3857 }
3858
3859 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3860   def _B : NeonI_LDN_Lane<r, 0b00, op0,
3861                           !cast<RegisterOperand>(List # "B_operand"),
3862                           neon_uimm4_bare, asmop> {
3863     let Inst{12-10} = lane{2-0};
3864     let Inst{30} = lane{3};
3865   }
3866
3867   def _H : NeonI_LDN_Lane<r, 0b01, op0,
3868                           !cast<RegisterOperand>(List # "H_operand"),
3869                           neon_uimm3_bare, asmop> {
3870     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3871     let Inst{30} = lane{2};
3872   }
3873
3874   def _S : NeonI_LDN_Lane<r, 0b10, op0,
3875                           !cast<RegisterOperand>(List # "S_operand"),
3876                           neon_uimm2_bare, asmop> {
3877     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3878     let Inst{30} = lane{1};
3879   }
3880
3881   def _D : NeonI_LDN_Lane<r, 0b10, op0,
3882                           !cast<RegisterOperand>(List # "D_operand"),
3883                           neon_uimm1_bare, asmop> {
3884     let Inst{12-10} = 0b001;
3885     let Inst{30} = lane{0};
3886   }
3887 }
3888
3889 // Load single 1-element structure to one lane of 1 register.
3890 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3891
3892 // Load single N-element structure to one lane of N consecutive registers
3893 // (N = 2,3,4)
3894 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3895 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3896 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3897
3898 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3899                           Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3900                           Instruction INST> {
3901   def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3902                      (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3903             (VTy (EXTRACT_SUBREG
3904                      (INST GPR64xsp:$Rn,
3905                            (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3906                            ImmOp:$lane),
3907                      sub_64))>;
3908
3909   def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3910                       (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3911             (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3912 }
3913
3914 // Match all LD1LN instructions
3915 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3916                       extloadi8, LD1LN_B>;
3917
3918 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3919                       extloadi16, LD1LN_H>;
3920
3921 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3922                       load, LD1LN_S>;
3923 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3924                       load, LD1LN_S>;
3925
3926 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3927                       load, LD1LN_D>;
3928 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3929                       load, LD1LN_D>;
3930
3931 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3932                      Operand ImmOp, string asmop>
3933     : NeonI_LdStOne_Lane<0, r, op2_1, op0,
3934                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
3935                          asmop # "\t$Rt[$lane], [$Rn]",
3936                          [],
3937                          NoItinerary> {
3938   let mayStore = 1;
3939   let neverHasSideEffects = 1;
3940   let hasExtraDefRegAllocReq = 1;
3941 }
3942
3943 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3944   def _B : NeonI_STN_Lane<r, 0b00, op0,
3945                           !cast<RegisterOperand>(List # "B_operand"),
3946                           neon_uimm4_bare, asmop> {
3947     let Inst{12-10} = lane{2-0};
3948     let Inst{30} = lane{3};
3949   }
3950
3951   def _H : NeonI_STN_Lane<r, 0b01, op0,
3952                           !cast<RegisterOperand>(List # "H_operand"),
3953                           neon_uimm3_bare, asmop> {
3954     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3955     let Inst{30} = lane{2};
3956   }
3957
3958   def _S : NeonI_STN_Lane<r, 0b10, op0,
3959                           !cast<RegisterOperand>(List # "S_operand"),
3960                            neon_uimm2_bare, asmop> {
3961     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3962     let Inst{30} = lane{1};
3963   }
3964
3965   def _D : NeonI_STN_Lane<r, 0b10, op0,
3966                           !cast<RegisterOperand>(List # "D_operand"),
3967                           neon_uimm1_bare, asmop>{
3968     let Inst{12-10} = 0b001;
3969     let Inst{30} = lane{0};
3970   }
3971 }
3972
3973 // Store single 1-element structure from one lane of 1 register.
3974 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
3975
3976 // Store single N-element structure from one lane of N consecutive registers
3977 // (N = 2,3,4)
3978 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
3979 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
3980 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
3981
3982 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3983                           Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
3984                           Instruction INST> {
3985   def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
3986                      GPR64xsp:$Rn),
3987             (INST GPR64xsp:$Rn,
3988                   (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
3989                   ImmOp:$lane)>;
3990
3991   def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
3992                      GPR64xsp:$Rn),
3993             (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
3994 }
3995
3996 // Match all ST1LN instructions
3997 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3998                       truncstorei8, ST1LN_B>;
3999
4000 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
4001                       truncstorei16, ST1LN_H>;
4002
4003 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
4004                       store, ST1LN_S>;
4005 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
4006                       store, ST1LN_S>;
4007
4008 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
4009                       store, ST1LN_D>;
4010 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
4011                       store, ST1LN_D>;
4012
4013 // End of vector load/store single N-element structure (class SIMD lsone).
4014
4015
4016 // The following are post-index load/store single N-element instructions
4017 // (class SIMD lsone-post)
4018
4019 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
4020                             RegisterOperand VecList, Operand ImmTy,
4021                             string asmop> {
4022   let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
4023   DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4024     def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4025                       (outs VecList:$Rt, GPR64xsp:$wb),
4026                       (ins GPR64xsp:$Rn, ImmTy:$amt),
4027                       asmop # "\t$Rt, [$Rn], $amt",
4028                       [],
4029                       NoItinerary> {
4030                         let Rm = 0b11111;
4031                       }
4032
4033     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4034                       (outs VecList:$Rt, GPR64xsp:$wb),
4035                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
4036                       asmop # "\t$Rt, [$Rn], $Rm",
4037                       [],
4038                       NoItinerary>;
4039   }
4040 }
4041
4042 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
4043                          Operand uimm_b, Operand uimm_h,
4044                          Operand uimm_s, Operand uimm_d> {
4045   defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
4046                               !cast<RegisterOperand>(List # "8B_operand"),
4047                               uimm_b, asmop>;
4048
4049   defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
4050                               !cast<RegisterOperand>(List # "4H_operand"),
4051                               uimm_h, asmop>;
4052
4053   defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
4054                               !cast<RegisterOperand>(List # "2S_operand"),
4055                               uimm_s, asmop>;
4056
4057   defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
4058                               !cast<RegisterOperand>(List # "1D_operand"),
4059                               uimm_d, asmop>;
4060
4061   defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
4062                                !cast<RegisterOperand>(List # "16B_operand"),
4063                                uimm_b, asmop>;
4064
4065   defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
4066                               !cast<RegisterOperand>(List # "8H_operand"),
4067                               uimm_h, asmop>;
4068
4069   defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
4070                               !cast<RegisterOperand>(List # "4S_operand"),
4071                               uimm_s, asmop>;
4072
4073   defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
4074                               !cast<RegisterOperand>(List # "2D_operand"),
4075                               uimm_d, asmop>;
4076 }
4077
4078 // Post-index load single 1-element structure to all lanes of 1 register
4079 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
4080                              uimm_exact2, uimm_exact4, uimm_exact8>;
4081
4082 // Post-index load single N-element structure to all lanes of N consecutive
4083 // registers (N = 2,3,4)
4084 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
4085                              uimm_exact4, uimm_exact8, uimm_exact16>;
4086 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
4087                              uimm_exact6, uimm_exact12, uimm_exact24>;
4088 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
4089                              uimm_exact8, uimm_exact16, uimm_exact32>;
4090
4091 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
4092     Constraints = "$Rn = $wb, $Rt = $src",
4093     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4094   class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4095                                 Operand ImmTy, Operand ImmOp, string asmop>
4096       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4097                                 (outs VList:$Rt, GPR64xsp:$wb),
4098                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4099                                     VList:$src, ImmOp:$lane),
4100                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4101                                 [],
4102                                 NoItinerary> {
4103     let Rm = 0b11111;
4104   }
4105
4106   class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4107                                  Operand ImmTy, Operand ImmOp, string asmop>
4108       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4109                                 (outs VList:$Rt, GPR64xsp:$wb),
4110                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
4111                                     VList:$src, ImmOp:$lane),
4112                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4113                                 [],
4114                                 NoItinerary>;
4115 }
4116
4117 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4118                            Operand uimm_b, Operand uimm_h,
4119                            Operand uimm_s, Operand uimm_d> {
4120   def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
4121                                !cast<RegisterOperand>(List # "B_operand"),
4122                                uimm_b, neon_uimm4_bare, asmop> {
4123     let Inst{12-10} = lane{2-0};
4124     let Inst{30} = lane{3};
4125   }
4126
4127   def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
4128                                    !cast<RegisterOperand>(List # "B_operand"),
4129                                    uimm_b, neon_uimm4_bare, asmop> {
4130     let Inst{12-10} = lane{2-0};
4131     let Inst{30} = lane{3};
4132   }
4133
4134   def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4135                                !cast<RegisterOperand>(List # "H_operand"),
4136                                uimm_h, neon_uimm3_bare, asmop> {
4137     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4138     let Inst{30} = lane{2};
4139   }
4140
4141   def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4142                                    !cast<RegisterOperand>(List # "H_operand"),
4143                                    uimm_h, neon_uimm3_bare, asmop> {
4144     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4145     let Inst{30} = lane{2};
4146   }
4147
4148   def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4149                                !cast<RegisterOperand>(List # "S_operand"),
4150                                uimm_s, neon_uimm2_bare, asmop> {
4151     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4152     let Inst{30} = lane{1};
4153   }
4154
4155   def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4156                                    !cast<RegisterOperand>(List # "S_operand"),
4157                                    uimm_s, neon_uimm2_bare, asmop> {
4158     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4159     let Inst{30} = lane{1};
4160   }
4161
4162   def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4163                                !cast<RegisterOperand>(List # "D_operand"),
4164                                uimm_d, neon_uimm1_bare, asmop> {
4165     let Inst{12-10} = 0b001;
4166     let Inst{30} = lane{0};
4167   }
4168
4169   def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4170                                    !cast<RegisterOperand>(List # "D_operand"),
4171                                    uimm_d, neon_uimm1_bare, asmop> {
4172     let Inst{12-10} = 0b001;
4173     let Inst{30} = lane{0};
4174   }
4175 }
4176
4177 // Post-index load single 1-element structure to one lane of 1 register.
4178 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4179                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4180
4181 // Post-index load single N-element structure to one lane of N consecutive
4182 // registers
4183 // (N = 2,3,4)
4184 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4185                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4186 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4187                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4188 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4189                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4190
4191 let mayStore = 1, neverHasSideEffects = 1,
4192     hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4193     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4194   class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4195                       Operand ImmTy, Operand ImmOp, string asmop>
4196       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4197                                 (outs GPR64xsp:$wb),
4198                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4199                                     VList:$Rt, ImmOp:$lane),
4200                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4201                                 [],
4202                                 NoItinerary> {
4203     let Rm = 0b11111;
4204   }
4205
4206   class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4207                        Operand ImmTy, Operand ImmOp, string asmop>
4208       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4209                                 (outs GPR64xsp:$wb),
4210                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4211                                     ImmOp:$lane),
4212                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4213                                 [],
4214                                 NoItinerary>;
4215 }
4216
4217 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4218                            Operand uimm_b, Operand uimm_h,
4219                            Operand uimm_s, Operand uimm_d> {
4220   def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4221                                !cast<RegisterOperand>(List # "B_operand"),
4222                                uimm_b, neon_uimm4_bare, asmop> {
4223     let Inst{12-10} = lane{2-0};
4224     let Inst{30} = lane{3};
4225   }
4226
4227   def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4228                                    !cast<RegisterOperand>(List # "B_operand"),
4229                                    uimm_b, neon_uimm4_bare, asmop> {
4230     let Inst{12-10} = lane{2-0};
4231     let Inst{30} = lane{3};
4232   }
4233
4234   def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4235                                !cast<RegisterOperand>(List # "H_operand"),
4236                                uimm_h, neon_uimm3_bare, asmop> {
4237     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4238     let Inst{30} = lane{2};
4239   }
4240
4241   def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4242                                    !cast<RegisterOperand>(List # "H_operand"),
4243                                    uimm_h, neon_uimm3_bare, asmop> {
4244     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4245     let Inst{30} = lane{2};
4246   }
4247
4248   def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4249                                !cast<RegisterOperand>(List # "S_operand"),
4250                                uimm_s, neon_uimm2_bare, asmop> {
4251     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4252     let Inst{30} = lane{1};
4253   }
4254
4255   def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4256                                    !cast<RegisterOperand>(List # "S_operand"),
4257                                    uimm_s, neon_uimm2_bare, asmop> {
4258     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4259     let Inst{30} = lane{1};
4260   }
4261
4262   def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4263                                !cast<RegisterOperand>(List # "D_operand"),
4264                                uimm_d, neon_uimm1_bare, asmop> {
4265     let Inst{12-10} = 0b001;
4266     let Inst{30} = lane{0};
4267   }
4268
4269   def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4270                                    !cast<RegisterOperand>(List # "D_operand"),
4271                                    uimm_d, neon_uimm1_bare, asmop> {
4272     let Inst{12-10} = 0b001;
4273     let Inst{30} = lane{0};
4274   }
4275 }
4276
4277 // Post-index store single 1-element structure from one lane of 1 register.
4278 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4279                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4280
4281 // Post-index store single N-element structure from one lane of N consecutive
4282 // registers (N = 2,3,4)
4283 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4284                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4285 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4286                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4287 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4288                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4289
4290 // End of post-index load/store single N-element instructions
4291 // (class SIMD lsone-post)
4292
4293 // Neon Scalar instructions implementation
4294 // Scalar Three Same
4295
4296 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4297                              RegisterClass FPRC>
4298   : NeonI_Scalar3Same<u, size, opcode,
4299                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4300                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4301                       [],
4302                       NoItinerary>;
4303
4304 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4305   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4306
4307 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4308                                       bit Commutable = 0> {
4309   let isCommutable = Commutable in {
4310     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4311     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4312   }
4313 }
4314
4315 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4316                                       string asmop, bit Commutable = 0> {
4317   let isCommutable = Commutable in {
4318     def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4319     def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4320   }
4321 }
4322
4323 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4324                                         string asmop, bit Commutable = 0> {
4325   let isCommutable = Commutable in {
4326     def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4327     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4328     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4329     def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4330   }
4331 }
4332
4333 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4334                                             Instruction INSTD> {
4335   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4336             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4337 }
4338
4339 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4340                                                Instruction INSTB,
4341                                                Instruction INSTH,
4342                                                Instruction INSTS,
4343                                                Instruction INSTD>
4344   : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4345   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4346            (INSTB FPR8:$Rn, FPR8:$Rm)>;
4347   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4348            (INSTH FPR16:$Rn, FPR16:$Rm)>;
4349   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4350            (INSTS FPR32:$Rn, FPR32:$Rm)>;
4351 }
4352
4353 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4354                                              Instruction INSTH,
4355                                              Instruction INSTS> {
4356   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4357             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4358   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4359             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4360 }
4361
4362 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4363                                              ValueType SResTy, ValueType STy,
4364                                              Instruction INSTS, ValueType DResTy,
4365                                              ValueType DTy, Instruction INSTD> {
4366   def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4367             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4368   def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4369             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4370 }
4371
4372 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4373                                               Instruction INSTD>
4374   : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4375         (INSTD FPR64:$Rn, FPR64:$Rm)>;
4376
4377 // Scalar Three Different
4378
4379 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4380                              RegisterClass FPRCD, RegisterClass FPRCS>
4381   : NeonI_Scalar3Diff<u, size, opcode,
4382                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4383                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4384                       [],
4385                       NoItinerary>;
4386
4387 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4388   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4389   def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4390 }
4391
4392 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4393   let Constraints = "$Src = $Rd" in {
4394     def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4395                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4396                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4397                        [],
4398                        NoItinerary>;
4399     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4400                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4401                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4402                        [],
4403                        NoItinerary>;
4404   }
4405 }
4406
4407 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4408                                              Instruction INSTH,
4409                                              Instruction INSTS> {
4410   def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4411             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4412   def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4413             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4414 }
4415
4416 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4417                                              Instruction INSTH,
4418                                              Instruction INSTS> {
4419   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4420             (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4421   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4422             (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4423 }
4424
4425 // Scalar Two Registers Miscellaneous
4426
4427 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4428                              RegisterClass FPRCD, RegisterClass FPRCS>
4429   : NeonI_Scalar2SameMisc<u, size, opcode,
4430                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4431                           !strconcat(asmop, "\t$Rd, $Rn"),
4432                           [],
4433                           NoItinerary>;
4434
4435 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4436                                          string asmop> {
4437   def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4438                                       FPR32>;
4439   def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4440                                       FPR64>;
4441 }
4442
4443 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4444   def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4445 }
4446
4447 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4448   : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4449   def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4450   def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4451   def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4452 }
4453
4454 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4455   : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4456
4457 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4458                                                  string asmop> {
4459   def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4460   def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4461   def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4462 }
4463
4464 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4465                                        string asmop, RegisterClass FPRC>
4466   : NeonI_Scalar2SameMisc<u, size, opcode,
4467                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4468                           !strconcat(asmop, "\t$Rd, $Rn"),
4469                           [],
4470                           NoItinerary>;
4471
4472 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4473                                                  string asmop> {
4474
4475   let Constraints = "$Src = $Rd" in {
4476     def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4477     def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4478     def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4479     def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4480   }
4481 }
4482
4483 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4484                                                   Instruction INSTD>
4485   : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4486         (INSTD FPR64:$Rn)>;
4487
4488 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4489                                                       Instruction INSTS,
4490                                                       Instruction INSTD> {
4491   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4492             (INSTS FPR32:$Rn)>;
4493   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4494             (INSTD FPR64:$Rn)>;
4495 }
4496
4497 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4498                                                 Instruction INSTD>
4499   : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4500             (INSTD FPR64:$Rn)>;
4501
4502 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4503                                                      Instruction INSTS,
4504                                                      Instruction INSTD> {
4505   def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4506             (INSTS FPR32:$Rn)>;
4507   def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4508             (INSTD FPR64:$Rn)>;
4509 }
4510
4511 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4512                                                  Instruction INSTS,
4513                                                  Instruction INSTD> {
4514   def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4515             (INSTS FPR32:$Rn)>;
4516   def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4517             (INSTD FPR64:$Rn)>;
4518 }
4519
4520 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4521                                               Instruction INSTD>
4522   : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4523         (INSTD FPR64:$Rn)>;
4524
4525 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4526   : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4527                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4528                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4529                           [],
4530                           NoItinerary>;
4531
4532 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4533                                               string asmop> {
4534   def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4535                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
4536                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4537                            [],
4538                            NoItinerary>;
4539   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4540                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
4541                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4542                            [],
4543                            NoItinerary>;
4544 }
4545
4546 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4547                                                 Instruction INSTD>
4548   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4549                        (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4550         (INSTD FPR64:$Rn, 0)>;
4551
4552 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4553                                                    Instruction INSTD>
4554   : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4555                           (i32 neon_uimm0:$Imm), CC)),
4556         (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4557
4558 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4559                                                       CondCode CC,
4560                                                       Instruction INSTS,
4561                                                       Instruction INSTD> {
4562   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))),
4563             (INSTS FPR32:$Rn, fpzz32:$FPImm)>;
4564   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))),
4565             (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4566   def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)),
4567             (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4568 }
4569
4570 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4571                                                 Instruction INSTD> {
4572   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4573             (INSTD FPR64:$Rn)>;
4574 }
4575
4576 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4577                                                    Instruction INSTB,
4578                                                    Instruction INSTH,
4579                                                    Instruction INSTS,
4580                                                    Instruction INSTD>
4581   : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4582   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4583             (INSTB FPR8:$Rn)>;
4584   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4585             (INSTH FPR16:$Rn)>;
4586   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4587             (INSTS FPR32:$Rn)>;
4588 }
4589
4590 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4591                                                        SDPatternOperator opnode,
4592                                                        Instruction INSTH,
4593                                                        Instruction INSTS,
4594                                                        Instruction INSTD> {
4595   def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4596             (INSTH FPR16:$Rn)>;
4597   def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4598             (INSTS FPR32:$Rn)>;
4599   def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4600             (INSTD FPR64:$Rn)>;
4601
4602 }
4603
4604 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4605                                                        SDPatternOperator opnode,
4606                                                        Instruction INSTB,
4607                                                        Instruction INSTH,
4608                                                        Instruction INSTS,
4609                                                        Instruction INSTD> {
4610   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4611             (INSTB FPR8:$Src, FPR8:$Rn)>;
4612   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4613             (INSTH FPR16:$Src, FPR16:$Rn)>;
4614   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4615             (INSTS FPR32:$Src, FPR32:$Rn)>;
4616   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4617             (INSTD FPR64:$Src, FPR64:$Rn)>;
4618 }
4619
4620 // Scalar Shift By Immediate
4621
4622 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4623                                 RegisterClass FPRC, Operand ImmTy>
4624   : NeonI_ScalarShiftImm<u, opcode,
4625                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4626                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4627                          [], NoItinerary>;
4628
4629 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4630                                             string asmop> {
4631   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4632     bits<6> Imm;
4633     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4634     let Inst{21-16} = Imm;
4635   }
4636 }
4637
4638 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4639                                                string asmop>
4640   : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4641   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4642     bits<3> Imm;
4643     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4644     let Inst{18-16} = Imm;
4645   }
4646   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4647     bits<4> Imm;
4648     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4649     let Inst{19-16} = Imm;
4650   }
4651   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4652     bits<5> Imm;
4653     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4654     let Inst{20-16} = Imm;
4655   }
4656 }
4657
4658 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4659                                             string asmop> {
4660   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4661     bits<6> Imm;
4662     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4663     let Inst{21-16} = Imm;
4664   }
4665 }
4666
4667 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4668                                               string asmop>
4669   : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4670   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4671     bits<3> Imm;
4672     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4673     let Inst{18-16} = Imm;
4674   }
4675   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4676     bits<4> Imm;
4677     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4678     let Inst{19-16} = Imm;
4679   }
4680   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4681     bits<5> Imm;
4682     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4683     let Inst{20-16} = Imm;
4684   }
4685 }
4686
4687 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4688   : NeonI_ScalarShiftImm<u, opcode,
4689                          (outs FPR64:$Rd),
4690                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4691                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4692                          [], NoItinerary> {
4693     bits<6> Imm;
4694     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4695     let Inst{21-16} = Imm;
4696     let Constraints = "$Src = $Rd";
4697 }
4698
4699 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4700   : NeonI_ScalarShiftImm<u, opcode,
4701                          (outs FPR64:$Rd),
4702                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4703                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4704                          [], NoItinerary> {
4705     bits<6> Imm;
4706     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4707     let Inst{21-16} = Imm;
4708     let Constraints = "$Src = $Rd";
4709 }
4710
4711 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4712                                        RegisterClass FPRCD, RegisterClass FPRCS,
4713                                        Operand ImmTy>
4714   : NeonI_ScalarShiftImm<u, opcode,
4715                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4716                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4717                          [], NoItinerary>;
4718
4719 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4720                                                 string asmop> {
4721   def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4722                                              shr_imm8> {
4723     bits<3> Imm;
4724     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4725     let Inst{18-16} = Imm;
4726   }
4727   def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4728                                              shr_imm16> {
4729     bits<4> Imm;
4730     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4731     let Inst{19-16} = Imm;
4732   }
4733   def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4734                                              shr_imm32> {
4735     bits<5> Imm;
4736     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4737     let Inst{20-16} = Imm;
4738   }
4739 }
4740
4741 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4742   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4743     bits<5> Imm;
4744     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4745     let Inst{20-16} = Imm;
4746   }
4747   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4748     bits<6> Imm;
4749     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4750     let Inst{21-16} = Imm;
4751   }
4752 }
4753
4754 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4755                                                Instruction INSTD> {
4756   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4757                 (INSTD FPR64:$Rn, imm:$Imm)>;
4758 }
4759
4760 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4761                                                Instruction INSTD> {
4762   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4763                 (INSTD FPR64:$Rn, imm:$Imm)>;
4764 }
4765
4766 class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
4767                                              Instruction INSTD>
4768   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4769             (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
4770         (INSTD FPR64:$Rn, imm:$Imm)>;
4771
4772 class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
4773                                              Instruction INSTD>
4774   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4775             (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4776         (INSTD FPR64:$Rn, imm:$Imm)>;
4777
4778 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4779                                                    Instruction INSTB,
4780                                                    Instruction INSTH,
4781                                                    Instruction INSTS,
4782                                                    Instruction INSTD>
4783   : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4784   def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4785                 (INSTB FPR8:$Rn, imm:$Imm)>;
4786   def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4787                 (INSTH FPR16:$Rn, imm:$Imm)>;
4788   def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4789                 (INSTS FPR32:$Rn, imm:$Imm)>;
4790 }
4791
4792 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4793                                                 Instruction INSTD>
4794   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4795             (i32 shl_imm64:$Imm))),
4796         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4797
4798 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4799                                                 Instruction INSTD>
4800   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4801             (i32 shr_imm64:$Imm))),
4802         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4803
4804 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4805                                                        SDPatternOperator opnode,
4806                                                        Instruction INSTH,
4807                                                        Instruction INSTS,
4808                                                        Instruction INSTD> {
4809   def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4810                 (INSTH FPR16:$Rn, imm:$Imm)>;
4811   def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4812                 (INSTS FPR32:$Rn, imm:$Imm)>;
4813   def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4814                 (INSTD FPR64:$Rn, imm:$Imm)>;
4815 }
4816
4817 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4818                                                       Instruction INSTS,
4819                                                       Instruction INSTD> {
4820   def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4821                 (INSTS FPR32:$Rn, imm:$Imm)>;
4822   def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4823                 (INSTD FPR64:$Rn, imm:$Imm)>;
4824 }
4825
4826 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4827                                                       Instruction INSTS,
4828                                                       Instruction INSTD> {
4829   def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4830                 (INSTS FPR32:$Rn, imm:$Imm)>;
4831   def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4832                 (INSTD FPR64:$Rn, imm:$Imm)>;
4833 }
4834
4835 // Scalar Signed Shift Right (Immediate)
4836 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4837 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4838 // Pattern to match llvm.arm.* intrinsic.
4839 def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
4840
4841 // Scalar Unsigned Shift Right (Immediate)
4842 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4843 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4844 // Pattern to match llvm.arm.* intrinsic.
4845 def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
4846
4847 // Scalar Signed Rounding Shift Right (Immediate)
4848 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4849 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4850
4851 // Scalar Unigned Rounding Shift Right (Immediate)
4852 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4853 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4854
4855 // Scalar Signed Shift Right and Accumulate (Immediate)
4856 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4857 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4858           <int_aarch64_neon_vsrads_n, SSRA>;
4859
4860 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4861 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4862 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4863           <int_aarch64_neon_vsradu_n, USRA>;
4864
4865 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4866 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4867 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4868           <int_aarch64_neon_vrsrads_n, SRSRA>;
4869
4870 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4871 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4872 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4873           <int_aarch64_neon_vrsradu_n, URSRA>;
4874
4875 // Scalar Shift Left (Immediate)
4876 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4877 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4878 // Pattern to match llvm.arm.* intrinsic.
4879 def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
4880
4881 // Signed Saturating Shift Left (Immediate)
4882 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4883 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4884                                                SQSHLbbi, SQSHLhhi,
4885                                                SQSHLssi, SQSHLddi>;
4886 // Pattern to match llvm.arm.* intrinsic.
4887 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4888
4889 // Unsigned Saturating Shift Left (Immediate)
4890 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4891 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4892                                                UQSHLbbi, UQSHLhhi,
4893                                                UQSHLssi, UQSHLddi>;
4894 // Pattern to match llvm.arm.* intrinsic.
4895 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4896
4897 // Signed Saturating Shift Left Unsigned (Immediate)
4898 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4899 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4900                                                SQSHLUbbi, SQSHLUhhi,
4901                                                SQSHLUssi, SQSHLUddi>;
4902
4903 // Shift Right And Insert (Immediate)
4904 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4905 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4906           <int_aarch64_neon_vsri, SRI>;
4907
4908 // Shift Left And Insert (Immediate)
4909 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4910 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4911           <int_aarch64_neon_vsli, SLI>;
4912
4913 // Signed Saturating Shift Right Narrow (Immediate)
4914 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4915 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4916                                                     SQSHRNbhi, SQSHRNhsi,
4917                                                     SQSHRNsdi>;
4918
4919 // Unsigned Saturating Shift Right Narrow (Immediate)
4920 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4921 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4922                                                     UQSHRNbhi, UQSHRNhsi,
4923                                                     UQSHRNsdi>;
4924
4925 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4926 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4927 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4928                                                     SQRSHRNbhi, SQRSHRNhsi,
4929                                                     SQRSHRNsdi>;
4930
4931 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4932 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4933 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4934                                                     UQRSHRNbhi, UQRSHRNhsi,
4935                                                     UQRSHRNsdi>;
4936
4937 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4938 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4939 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4940                                                     SQSHRUNbhi, SQSHRUNhsi,
4941                                                     SQSHRUNsdi>;
4942
4943 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4944 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4945 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4946                                                     SQRSHRUNbhi, SQRSHRUNhsi,
4947                                                     SQRSHRUNsdi>;
4948
4949 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4950 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4951 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
4952                                                   SCVTF_Nssi, SCVTF_Nddi>;
4953
4954 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4955 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4956 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
4957                                                   UCVTF_Nssi, UCVTF_Nddi>;
4958
4959 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4960 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4961 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
4962                                                   FCVTZS_Nssi, FCVTZS_Nddi>;
4963
4964 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4965 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4966 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
4967                                                   FCVTZU_Nssi, FCVTZU_Nddi>;
4968
4969 // Patterns For Convert Instructions Between v1f64 and v1i64
4970 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
4971                                              Instruction INST>
4972     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4973           (INST FPR64:$Rn, imm:$Imm)>;
4974
4975 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
4976                                              Instruction INST>
4977     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4978           (INST FPR64:$Rn, imm:$Imm)>;
4979
4980 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
4981                                              SCVTF_Nddi>;
4982
4983 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
4984                                              UCVTF_Nddi>;
4985
4986 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
4987                                              FCVTZS_Nddi>;
4988
4989 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
4990                                              FCVTZU_Nddi>;
4991
4992 // Scalar Integer Add
4993 let isCommutable = 1 in {
4994 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4995 }
4996
4997 // Scalar Integer Sub
4998 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4999
5000 // Pattern for Scalar Integer Add and Sub with D register only
5001 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
5002 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
5003
5004 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
5005 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
5006 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
5007 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
5008 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
5009
5010 // Scalar Integer Saturating Add (Signed, Unsigned)
5011 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
5012 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
5013
5014 // Scalar Integer Saturating Sub (Signed, Unsigned)
5015 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
5016 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
5017
5018
5019 // Patterns to match llvm.aarch64.* intrinsic for
5020 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
5021 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
5022                                            SQADDhhh, SQADDsss, SQADDddd>;
5023 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
5024                                            UQADDhhh, UQADDsss, UQADDddd>;
5025 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
5026                                            SQSUBhhh, SQSUBsss, SQSUBddd>;
5027 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
5028                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
5029
5030 // Scalar Integer Saturating Doubling Multiply Half High
5031 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
5032
5033 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5034 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
5035
5036 // Patterns to match llvm.arm.* intrinsic for
5037 // Scalar Integer Saturating Doubling Multiply Half High and
5038 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5039 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
5040                                                                SQDMULHsss>;
5041 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
5042                                                                 SQRDMULHsss>;
5043
5044 // Scalar Floating-point Multiply Extended
5045 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
5046
5047 // Scalar Floating-point Reciprocal Step
5048 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
5049 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
5050                                          FRECPSsss, f64, f64, FRECPSddd>;
5051 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5052           (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
5053
5054 // Scalar Floating-point Reciprocal Square Root Step
5055 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
5056 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
5057                                          FRSQRTSsss, f64, f64, FRSQRTSddd>;
5058 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5059           (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
5060 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
5061
5062 // Patterns to match llvm.aarch64.* intrinsic for
5063 // Scalar Floating-point Multiply Extended,
5064 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
5065                                                   Instruction INSTS,
5066                                                   Instruction INSTD> {
5067   def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5068             (INSTS FPR32:$Rn, FPR32:$Rm)>;
5069   def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5070             (INSTD FPR64:$Rn, FPR64:$Rm)>;
5071 }
5072
5073 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
5074                                               FMULXsss, FMULXddd>;
5075 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5076           (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
5077
5078 // Scalar Integer Shift Left (Signed, Unsigned)
5079 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
5080 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
5081
5082 // Patterns to match llvm.arm.* intrinsic for
5083 // Scalar Integer Shift Left (Signed, Unsigned)
5084 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
5085 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
5086
5087 // Patterns to match llvm.aarch64.* intrinsic for
5088 // Scalar Integer Shift Left (Signed, Unsigned)
5089 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
5090 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
5091
5092 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
5093 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
5094 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
5095
5096 // Patterns to match llvm.aarch64.* intrinsic for
5097 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
5098 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
5099                                            SQSHLhhh, SQSHLsss, SQSHLddd>;
5100 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
5101                                            UQSHLhhh, UQSHLsss, UQSHLddd>;
5102
5103 // Patterns to match llvm.arm.* intrinsic for
5104 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
5105 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
5106 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
5107
5108 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5109 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
5110 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
5111
5112 // Patterns to match llvm.aarch64.* intrinsic for
5113 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5114 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
5115 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
5116
5117 // Patterns to match llvm.arm.* intrinsic for
5118 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5119 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
5120 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
5121
5122 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5123 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
5124 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
5125
5126 // Patterns to match llvm.aarch64.* intrinsic for
5127 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5128 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
5129                                            SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
5130 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
5131                                            UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
5132
5133 // Patterns to match llvm.arm.* intrinsic for
5134 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5135 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5136 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5137
5138 // Signed Saturating Doubling Multiply-Add Long
5139 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5140 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5141                                             SQDMLALshh, SQDMLALdss>;
5142
5143 // Signed Saturating Doubling Multiply-Subtract Long
5144 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5145 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5146                                             SQDMLSLshh, SQDMLSLdss>;
5147
5148 // Signed Saturating Doubling Multiply Long
5149 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5150 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5151                                          SQDMULLshh, SQDMULLdss>;
5152
5153 // Scalar Signed Integer Convert To Floating-point
5154 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5155 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5156                                                  SCVTFss, SCVTFdd>;
5157
5158 // Scalar Unsigned Integer Convert To Floating-point
5159 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5160 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5161                                                  UCVTFss, UCVTFdd>;
5162
5163 // Scalar Floating-point Converts
5164 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5165 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5166                                                   FCVTXN>;
5167
5168 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5169 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5170                                                   FCVTNSss, FCVTNSdd>;
5171 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5172
5173 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5174 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5175                                                   FCVTNUss, FCVTNUdd>;
5176 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5177
5178 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5179 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5180                                                   FCVTMSss, FCVTMSdd>;
5181 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5182
5183 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5184 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5185                                                   FCVTMUss, FCVTMUdd>;
5186 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5187
5188 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5189 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5190                                                   FCVTASss, FCVTASdd>;
5191 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5192
5193 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5194 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5195                                                   FCVTAUss, FCVTAUdd>;
5196 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5197
5198 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5199 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5200                                                   FCVTPSss, FCVTPSdd>;
5201 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5202
5203 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5204 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5205                                                   FCVTPUss, FCVTPUdd>;
5206 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5207
5208 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5209 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5210                                                   FCVTZSss, FCVTZSdd>;
5211 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5212                                                 FCVTZSdd>;
5213
5214 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5215 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5216                                                   FCVTZUss, FCVTZUdd>;
5217 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5218                                                 FCVTZUdd>;
5219
5220 // Patterns For Convert Instructions Between v1f64 and v1i64
5221 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5222                                               Instruction INST>
5223     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5224
5225 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5226                                               Instruction INST>
5227     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5228
5229 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5230 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5231
5232 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5233 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5234
5235 // Scalar Floating-point Reciprocal Estimate
5236 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5237 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5238                                              FRECPEss, FRECPEdd>;
5239 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5240                                               FRECPEdd>;
5241
5242 // Scalar Floating-point Reciprocal Exponent
5243 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5244 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5245                                              FRECPXss, FRECPXdd>;
5246
5247 // Scalar Floating-point Reciprocal Square Root Estimate
5248 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5249 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5250                                                  FRSQRTEss, FRSQRTEdd>;
5251 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5252                                               FRSQRTEdd>;
5253
5254 // Scalar Floating-point Round
5255 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5256     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5257
5258 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5259 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5260 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5261 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5262 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5263 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5264 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5265
5266 // Scalar Integer Compare
5267
5268 // Scalar Compare Bitwise Equal
5269 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5270 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5271
5272 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5273                                               Instruction INSTD,
5274                                               CondCode CC>
5275   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5276         (INSTD FPR64:$Rn, FPR64:$Rm)>;
5277
5278 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5279
5280 // Scalar Compare Signed Greather Than Or Equal
5281 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5282 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5283 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5284
5285 // Scalar Compare Unsigned Higher Or Same
5286 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5287 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5288 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5289
5290 // Scalar Compare Unsigned Higher
5291 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5292 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5293 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5294
5295 // Scalar Compare Signed Greater Than
5296 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5297 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5298 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5299
5300 // Scalar Compare Bitwise Test Bits
5301 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5302 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5303 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5304
5305 // Scalar Compare Bitwise Equal To Zero
5306 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5307 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5308                                                 CMEQddi>;
5309 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5310
5311 // Scalar Compare Signed Greather Than Or Equal To Zero
5312 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5313 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5314                                                 CMGEddi>;
5315 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5316
5317 // Scalar Compare Signed Greater Than Zero
5318 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5319 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5320                                                 CMGTddi>;
5321 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5322
5323 // Scalar Compare Signed Less Than Or Equal To Zero
5324 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5325 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5326                                                 CMLEddi>;
5327 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5328
5329 // Scalar Compare Less Than Zero
5330 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5331 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5332                                                 CMLTddi>;
5333 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5334
5335 // Scalar Floating-point Compare
5336
5337 // Scalar Floating-point Compare Mask Equal
5338 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5339 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5340                                          FCMEQsss, v1i64, f64, FCMEQddd>;
5341 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5342
5343 // Scalar Floating-point Compare Mask Equal To Zero
5344 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5345 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5346                                                   FCMEQZssi, FCMEQZddi>;
5347
5348 // Scalar Floating-point Compare Mask Greater Than Or Equal
5349 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5350 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5351                                          FCMGEsss, v1i64, f64, FCMGEddd>;
5352 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5353
5354 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5355 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5356 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5357                                                   FCMGEZssi, FCMGEZddi>;
5358
5359 // Scalar Floating-point Compare Mask Greather Than
5360 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5361 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5362                                          FCMGTsss, v1i64, f64, FCMGTddd>;
5363 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5364
5365 // Scalar Floating-point Compare Mask Greather Than Zero
5366 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5367 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5368                                                   FCMGTZssi, FCMGTZddi>;
5369
5370 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5371 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5372 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5373                                                   FCMLEZssi, FCMLEZddi>;
5374
5375 // Scalar Floating-point Compare Mask Less Than Zero
5376 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5377 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5378                                                   FCMLTZssi, FCMLTZddi>;
5379
5380 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5381 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5382 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5383                                          FACGEsss, v1i64, f64, FACGEddd>;
5384 def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5385           (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5386
5387 // Scalar Floating-point Absolute Compare Mask Greater Than
5388 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5389 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5390                                          FACGTsss, v1i64, f64, FACGTddd>;
5391 def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5392           (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5393
5394 // Scalar Floating-point Absolute Difference
5395 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5396 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5397                                          FABDsss, f64, f64, FABDddd>;
5398
5399 // Scalar Absolute Value
5400 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5401 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5402
5403 // Scalar Signed Saturating Absolute Value
5404 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5405 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5406                                                SQABSbb, SQABShh, SQABSss, SQABSdd>;
5407
5408 // Scalar Negate
5409 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5410 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5411
5412 // Scalar Signed Saturating Negate
5413 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5414 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5415                                                SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5416
5417 // Scalar Signed Saturating Accumulated of Unsigned Value
5418 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5419 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5420                                                      SUQADDbb, SUQADDhh,
5421                                                      SUQADDss, SUQADDdd>;
5422
5423 // Scalar Unsigned Saturating Accumulated of Signed Value
5424 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5425 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5426                                                      USQADDbb, USQADDhh,
5427                                                      USQADDss, USQADDdd>;
5428
5429 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5430                                           (v1i64 FPR64:$Rn))),
5431           (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5432
5433 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5434                                           (v1i64 FPR64:$Rn))),
5435           (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5436
5437 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5438           (ABSdd FPR64:$Rn)>;
5439
5440 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5441           (SQABSdd FPR64:$Rn)>;
5442
5443 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5444           (SQNEGdd FPR64:$Rn)>;
5445
5446 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5447                       (v1i64 FPR64:$Rn))),
5448           (NEGdd FPR64:$Rn)>;
5449
5450 // Scalar Signed Saturating Extract Unsigned Narrow
5451 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5452 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5453                                                      SQXTUNbh, SQXTUNhs,
5454                                                      SQXTUNsd>;
5455
5456 // Scalar Signed Saturating Extract Narrow
5457 defm SQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5458 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5459                                                      SQXTNbh, SQXTNhs,
5460                                                      SQXTNsd>;
5461
5462 // Scalar Unsigned Saturating Extract Narrow
5463 defm UQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5464 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5465                                                      UQXTNbh, UQXTNhs,
5466                                                      UQXTNsd>;
5467
5468 // Scalar Reduce Pairwise
5469
5470 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5471                                      string asmop, bit Commutable = 0> {
5472   let isCommutable = Commutable in {
5473     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5474                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
5475                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5476                                 [],
5477                                 NoItinerary>;
5478   }
5479 }
5480
5481 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5482                                      string asmop, bit Commutable = 0>
5483   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5484   let isCommutable = Commutable in {
5485     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5486                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
5487                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5488                                 [],
5489                                 NoItinerary>;
5490   }
5491 }
5492
5493 // Scalar Reduce Addition Pairwise (Integer) with
5494 // Pattern to match llvm.arm.* intrinsic
5495 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5496
5497 // Pattern to match llvm.aarch64.* intrinsic for
5498 // Scalar Reduce Addition Pairwise (Integer)
5499 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5500           (ADDPvv_D_2D VPR128:$Rn)>;
5501 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5502           (ADDPvv_D_2D VPR128:$Rn)>;
5503
5504 // Scalar Reduce Addition Pairwise (Floating Point)
5505 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5506
5507 // Scalar Reduce Maximum Pairwise (Floating Point)
5508 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5509
5510 // Scalar Reduce Minimum Pairwise (Floating Point)
5511 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5512
5513 // Scalar Reduce maxNum Pairwise (Floating Point)
5514 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5515
5516 // Scalar Reduce minNum Pairwise (Floating Point)
5517 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5518
5519 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5520                                             Instruction INSTS,
5521                                             Instruction INSTD> {
5522   def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5523             (INSTS VPR64:$Rn)>;
5524   def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5525             (INSTD VPR128:$Rn)>;
5526 }
5527
5528 // Patterns to match llvm.aarch64.* intrinsic for
5529 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5530 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5531                                         FADDPvv_S_2S, FADDPvv_D_2D>;
5532
5533 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5534                                         FMAXPvv_S_2S, FMAXPvv_D_2D>;
5535
5536 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5537                                         FMINPvv_S_2S, FMINPvv_D_2D>;
5538
5539 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5540                                         FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5541
5542 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5543                                         FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5544
5545 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5546           (FADDPvv_S_2S (v2f32
5547                (EXTRACT_SUBREG
5548                    (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5549                    sub_64)))>;
5550
5551 // Scalar by element Arithmetic
5552
5553 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5554                                     string rmlane, bit u, bit szhi, bit szlo,
5555                                     RegisterClass ResFPR, RegisterClass OpFPR,
5556                                     RegisterOperand OpVPR, Operand OpImm>
5557   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5558                              (outs ResFPR:$Rd),
5559                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5560                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5561                              [],
5562                              NoItinerary> {
5563   bits<3> Imm;
5564   bits<5> MRm;
5565 }
5566
5567 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5568                                                     string rmlane,
5569                                                     bit u, bit szhi, bit szlo,
5570                                                     RegisterClass ResFPR,
5571                                                     RegisterClass OpFPR,
5572                                                     RegisterOperand OpVPR,
5573                                                     Operand OpImm>
5574   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5575                              (outs ResFPR:$Rd),
5576                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5577                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5578                              [],
5579                              NoItinerary> {
5580   let Constraints = "$src = $Rd";
5581   bits<3> Imm;
5582   bits<5> MRm;
5583 }
5584
5585 // Scalar Floating Point  multiply (scalar, by element)
5586 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5587   0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5588   let Inst{11} = Imm{1}; // h
5589   let Inst{21} = Imm{0}; // l
5590   let Inst{20-16} = MRm;
5591 }
5592 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5593   0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5594   let Inst{11} = Imm{0}; // h
5595   let Inst{21} = 0b0;    // l
5596   let Inst{20-16} = MRm;
5597 }
5598
5599 // Scalar Floating Point  multiply extended (scalar, by element)
5600 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5601   0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5602   let Inst{11} = Imm{1}; // h
5603   let Inst{21} = Imm{0}; // l
5604   let Inst{20-16} = MRm;
5605 }
5606 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5607   0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5608   let Inst{11} = Imm{0}; // h
5609   let Inst{21} = 0b0;    // l
5610   let Inst{20-16} = MRm;
5611 }
5612
5613 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5614   SDPatternOperator opnode,
5615   Instruction INST,
5616   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5617   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5618
5619   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5620                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5621              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5622
5623   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5624                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5625              (ResTy (INST (ResTy FPRC:$Rn),
5626                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5627                OpNImm:$Imm))>;
5628
5629   // swapped operands
5630   def  : Pat<(ResTy (opnode
5631                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5632                (ResTy FPRC:$Rn))),
5633              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5634
5635   def  : Pat<(ResTy (opnode
5636                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5637                (ResTy FPRC:$Rn))),
5638              (ResTy (INST (ResTy FPRC:$Rn),
5639                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5640                OpNImm:$Imm))>;
5641 }
5642
5643 // Patterns for Scalar Floating Point  multiply (scalar, by element)
5644 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5645   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5646 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5647   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5648
5649 // Patterns for Scalar Floating Point  multiply extended (scalar, by element)
5650 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5651   FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5652   v2f32, v4f32, neon_uimm1_bare>;
5653 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5654   FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5655   v1f64, v2f64, neon_uimm0_bare>;
5656
5657 // Scalar Floating Point fused multiply-add (scalar, by element)
5658 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5659   0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5660   let Inst{11} = Imm{1}; // h
5661   let Inst{21} = Imm{0}; // l
5662   let Inst{20-16} = MRm;
5663 }
5664 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5665   0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5666   let Inst{11} = Imm{0}; // h
5667   let Inst{21} = 0b0;    // l
5668   let Inst{20-16} = MRm;
5669 }
5670
5671 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5672 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5673   0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5674   let Inst{11} = Imm{1}; // h
5675   let Inst{21} = Imm{0}; // l
5676   let Inst{20-16} = MRm;
5677 }
5678 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5679   0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5680   let Inst{11} = Imm{0}; // h
5681   let Inst{21} = 0b0;    // l
5682   let Inst{20-16} = MRm;
5683 }
5684 // We are allowed to match the fma instruction regardless of compile options.
5685 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5686   Instruction FMLAI, Instruction FMLSI,
5687   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5688   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5689   // fmla
5690   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5691                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5692                (ResTy FPRC:$Ra))),
5693              (ResTy (FMLAI (ResTy FPRC:$Ra),
5694                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5695
5696   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5697                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5698                (ResTy FPRC:$Ra))),
5699              (ResTy (FMLAI (ResTy FPRC:$Ra),
5700                (ResTy FPRC:$Rn),
5701                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5702                OpNImm:$Imm))>;
5703
5704   // swapped fmla operands
5705   def  : Pat<(ResTy (fma
5706                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5707                (ResTy FPRC:$Rn),
5708                (ResTy FPRC:$Ra))),
5709              (ResTy (FMLAI (ResTy FPRC:$Ra),
5710                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5711
5712   def  : Pat<(ResTy (fma
5713                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5714                (ResTy FPRC:$Rn),
5715                (ResTy FPRC:$Ra))),
5716              (ResTy (FMLAI (ResTy FPRC:$Ra),
5717                (ResTy FPRC:$Rn),
5718                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5719                OpNImm:$Imm))>;
5720
5721   // fmls
5722   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5723                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5724                (ResTy FPRC:$Ra))),
5725              (ResTy (FMLSI (ResTy FPRC:$Ra),
5726                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5727
5728   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5729                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5730                (ResTy FPRC:$Ra))),
5731              (ResTy (FMLSI (ResTy FPRC:$Ra),
5732                (ResTy FPRC:$Rn),
5733                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5734                OpNImm:$Imm))>;
5735
5736   // swapped fmls operands
5737   def  : Pat<(ResTy (fma
5738                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5739                (ResTy FPRC:$Rn),
5740                (ResTy FPRC:$Ra))),
5741              (ResTy (FMLSI (ResTy FPRC:$Ra),
5742                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5743
5744   def  : Pat<(ResTy (fma
5745                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5746                (ResTy FPRC:$Rn),
5747                (ResTy FPRC:$Ra))),
5748              (ResTy (FMLSI (ResTy FPRC:$Ra),
5749                (ResTy FPRC:$Rn),
5750                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5751                OpNImm:$Imm))>;
5752 }
5753
5754 // Scalar Floating Point fused multiply-add and
5755 // multiply-subtract (scalar, by element)
5756 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5757   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5758 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5759   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5760 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5761   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5762
5763 // Scalar Signed saturating doubling multiply long (scalar, by element)
5764 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5765   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5766   let Inst{11} = 0b0; // h
5767   let Inst{21} = Imm{1}; // l
5768   let Inst{20} = Imm{0}; // m
5769   let Inst{19-16} = MRm{3-0};
5770 }
5771 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5772   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5773   let Inst{11} = Imm{2}; // h
5774   let Inst{21} = Imm{1}; // l
5775   let Inst{20} = Imm{0}; // m
5776   let Inst{19-16} = MRm{3-0};
5777 }
5778 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5779   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5780   let Inst{11} = 0b0;    // h
5781   let Inst{21} = Imm{0}; // l
5782   let Inst{20-16} = MRm;
5783 }
5784 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5785   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5786   let Inst{11} = Imm{1};    // h
5787   let Inst{21} = Imm{0};    // l
5788   let Inst{20-16} = MRm;
5789 }
5790
5791 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5792   SDPatternOperator opnode,
5793   Instruction INST,
5794   ValueType ResTy, RegisterClass FPRC,
5795   ValueType OpVTy, ValueType OpTy,
5796   ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5797
5798   def  : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5799                (OpVTy (scalar_to_vector
5800                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5801              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5802
5803   //swapped operands
5804   def  : Pat<(ResTy (opnode
5805                (OpVTy (scalar_to_vector
5806                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5807                  (OpVTy FPRC:$Rn))),
5808              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5809 }
5810
5811
5812 // Patterns for Scalar Signed saturating doubling
5813 // multiply long (scalar, by element)
5814 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5815   SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5816   i32, VPR64Lo, neon_uimm2_bare>;
5817 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5818   SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5819   i32, VPR128Lo, neon_uimm3_bare>;
5820 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5821   SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5822   i32, VPR64Lo, neon_uimm1_bare>;
5823 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5824   SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5825   i32, VPR128Lo, neon_uimm2_bare>;
5826
5827 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5828 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5829   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5830   let Inst{11} = 0b0; // h
5831   let Inst{21} = Imm{1}; // l
5832   let Inst{20} = Imm{0}; // m
5833   let Inst{19-16} = MRm{3-0};
5834 }
5835 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5836   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5837   let Inst{11} = Imm{2}; // h
5838   let Inst{21} = Imm{1}; // l
5839   let Inst{20} = Imm{0}; // m
5840   let Inst{19-16} = MRm{3-0};
5841 }
5842 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5843   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5844   let Inst{11} = 0b0;    // h
5845   let Inst{21} = Imm{0}; // l
5846   let Inst{20-16} = MRm;
5847 }
5848 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5849   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5850   let Inst{11} = Imm{1};    // h
5851   let Inst{21} = Imm{0};    // l
5852   let Inst{20-16} = MRm;
5853 }
5854
5855 // Scalar Signed saturating doubling
5856 // multiply-subtract long (scalar, by element)
5857 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5858   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5859   let Inst{11} = 0b0; // h
5860   let Inst{21} = Imm{1}; // l
5861   let Inst{20} = Imm{0}; // m
5862   let Inst{19-16} = MRm{3-0};
5863 }
5864 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5865   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5866   let Inst{11} = Imm{2}; // h
5867   let Inst{21} = Imm{1}; // l
5868   let Inst{20} = Imm{0}; // m
5869   let Inst{19-16} = MRm{3-0};
5870 }
5871 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5872   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5873   let Inst{11} = 0b0;    // h
5874   let Inst{21} = Imm{0}; // l
5875   let Inst{20-16} = MRm;
5876 }
5877 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5878   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5879   let Inst{11} = Imm{1};    // h
5880   let Inst{21} = Imm{0};    // l
5881   let Inst{20-16} = MRm;
5882 }
5883
5884 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5885   SDPatternOperator opnode,
5886   SDPatternOperator coreopnode,
5887   Instruction INST,
5888   ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5889   ValueType OpTy,
5890   ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5891
5892   def  : Pat<(ResTy (opnode
5893                (ResTy ResFPRC:$Ra),
5894                (ResTy (coreopnode (OpTy FPRC:$Rn),
5895                  (OpTy (scalar_to_vector
5896                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5897              (ResTy (INST (ResTy ResFPRC:$Ra),
5898                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5899
5900   // swapped operands
5901   def  : Pat<(ResTy (opnode
5902                (ResTy ResFPRC:$Ra),
5903                (ResTy (coreopnode
5904                  (OpTy (scalar_to_vector
5905                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5906                  (OpTy FPRC:$Rn))))),
5907              (ResTy (INST (ResTy ResFPRC:$Ra),
5908                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5909 }
5910
5911 // Patterns for Scalar Signed saturating
5912 // doubling multiply-add long (scalar, by element)
5913 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5914   int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5915   i32, VPR64Lo, neon_uimm2_bare>;
5916 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5917   int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5918   i32, VPR128Lo, neon_uimm3_bare>;
5919 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5920   int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5921   i32, VPR64Lo, neon_uimm1_bare>;
5922 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5923   int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5924   i32, VPR128Lo, neon_uimm2_bare>;
5925
5926 // Patterns for Scalar Signed saturating
5927 // doubling multiply-sub long (scalar, by element)
5928 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5929   int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5930   i32, VPR64Lo, neon_uimm2_bare>;
5931 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5932   int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5933   i32, VPR128Lo, neon_uimm3_bare>;
5934 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5935   int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5936   i32, VPR64Lo, neon_uimm1_bare>;
5937 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5938   int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5939   i32, VPR128Lo, neon_uimm2_bare>;
5940
5941 // Scalar Signed saturating doubling multiply returning
5942 // high half (scalar, by element)
5943 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5944   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5945   let Inst{11} = 0b0; // h
5946   let Inst{21} = Imm{1}; // l
5947   let Inst{20} = Imm{0}; // m
5948   let Inst{19-16} = MRm{3-0};
5949 }
5950 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5951   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5952   let Inst{11} = Imm{2}; // h
5953   let Inst{21} = Imm{1}; // l
5954   let Inst{20} = Imm{0}; // m
5955   let Inst{19-16} = MRm{3-0};
5956 }
5957 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5958   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5959   let Inst{11} = 0b0;    // h
5960   let Inst{21} = Imm{0}; // l
5961   let Inst{20-16} = MRm;
5962 }
5963 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5964   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5965   let Inst{11} = Imm{1};    // h
5966   let Inst{21} = Imm{0};    // l
5967   let Inst{20-16} = MRm;
5968 }
5969
5970 // Patterns for Scalar Signed saturating doubling multiply returning
5971 // high half (scalar, by element)
5972 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5973   SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
5974   i32, VPR64Lo, neon_uimm2_bare>;
5975 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5976   SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
5977   i32, VPR128Lo, neon_uimm3_bare>;
5978 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5979   SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
5980   i32, VPR64Lo, neon_uimm1_bare>;
5981 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5982   SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
5983   i32, VPR128Lo, neon_uimm2_bare>;
5984
5985 // Scalar Signed saturating rounding doubling multiply
5986 // returning high half (scalar, by element)
5987 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5988   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5989   let Inst{11} = 0b0; // h
5990   let Inst{21} = Imm{1}; // l
5991   let Inst{20} = Imm{0}; // m
5992   let Inst{19-16} = MRm{3-0};
5993 }
5994 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5995   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5996   let Inst{11} = Imm{2}; // h
5997   let Inst{21} = Imm{1}; // l
5998   let Inst{20} = Imm{0}; // m
5999   let Inst{19-16} = MRm{3-0};
6000 }
6001 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6002   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
6003   let Inst{11} = 0b0;    // h
6004   let Inst{21} = Imm{0}; // l
6005   let Inst{20-16} = MRm;
6006 }
6007 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6008   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
6009   let Inst{11} = Imm{1};    // h
6010   let Inst{21} = Imm{0};    // l
6011   let Inst{20-16} = MRm;
6012 }
6013
6014 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6015   SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
6016   VPR64Lo, neon_uimm2_bare>;
6017 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6018   SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
6019   VPR128Lo, neon_uimm3_bare>;
6020 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6021   SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
6022   VPR64Lo, neon_uimm1_bare>;
6023 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6024   SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
6025   VPR128Lo, neon_uimm2_bare>;
6026
6027 // Scalar general arithmetic operation
6028 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
6029                                         Instruction INST> 
6030     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
6031
6032 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
6033                                         Instruction INST> 
6034     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
6035           (INST FPR64:$Rn, FPR64:$Rm)>;
6036
6037 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
6038                                         Instruction INST> 
6039     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
6040               (v1f64 FPR64:$Ra))),
6041           (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
6042
6043 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
6044 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
6045 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
6046 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
6047 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
6048 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
6049 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
6050 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
6051 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
6052
6053 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
6054 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
6055
6056 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
6057 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
6058
6059 // Scalar Copy - DUP element to scalar
6060 class NeonI_Scalar_DUP<string asmop, string asmlane,
6061                        RegisterClass ResRC, RegisterOperand VPRC,
6062                        Operand OpImm>
6063   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
6064                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
6065                      [],
6066                      NoItinerary> {
6067   bits<4> Imm;
6068 }
6069
6070 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
6071   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6072 }
6073 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
6074   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6075 }
6076 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
6077   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6078 }
6079 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
6080   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6081 }
6082
6083 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)),
6084           (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>;
6085 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)),
6086           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>;
6087 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)),
6088           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>;
6089 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)),
6090           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>;
6091
6092 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)),
6093           (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>;
6094 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)),
6095           (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>;
6096
6097 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)),
6098           (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>;
6099 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)),
6100           (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6101             1))>;
6102
6103 def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)),
6104           (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>;
6105
6106 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
6107   ValueType ResTy, ValueType OpTy,Operand OpLImm,
6108   ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
6109
6110   def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
6111             (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
6112
6113   def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
6114             (ResTy (DUPI
6115               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6116                 OpNImm:$Imm))>;
6117 }
6118
6119 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
6120 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
6121                                         v8i8, v16i8, neon_uimm3_bare>;
6122 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
6123                                         v4i16, v8i16, neon_uimm2_bare>;
6124 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
6125                                         v2i32, v4i32, neon_uimm1_bare>;
6126
6127 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
6128                                           ValueType OpTy, ValueType ElemTy,
6129                                           Operand OpImm, ValueType OpNTy,
6130                                           ValueType ExTy, Operand OpNImm> {
6131
6132   def : Pat<(ResTy (vector_insert (ResTy undef),
6133               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
6134               (neon_uimm0_bare:$Imm))),
6135             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6136
6137   def : Pat<(ResTy (vector_insert (ResTy undef),
6138               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6139               (OpNImm:$Imm))),
6140             (ResTy (DUPI
6141               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6142               OpNImm:$Imm))>;
6143 }
6144
6145 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6146                                           ValueType OpTy, ValueType ElemTy,
6147                                           Operand OpImm, ValueType OpNTy,
6148                                           ValueType ExTy, Operand OpNImm> {
6149
6150   def : Pat<(ResTy (scalar_to_vector
6151               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6152             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6153
6154   def : Pat<(ResTy (scalar_to_vector
6155               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6156             (ResTy (DUPI
6157               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6158               OpNImm:$Imm))>;
6159 }
6160
6161 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6162 // instructions.
6163 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6164   v1i64, v2i64, i64, neon_uimm1_bare,
6165   v1i64, v2i64, neon_uimm0_bare>;
6166 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6167   v1i32, v4i32, i32, neon_uimm2_bare,
6168   v2i32, v4i32, neon_uimm1_bare>;
6169 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6170   v1i16, v8i16, i32, neon_uimm3_bare,
6171   v4i16, v8i16, neon_uimm2_bare>;
6172 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6173   v1i8, v16i8, i32, neon_uimm4_bare,
6174   v8i8, v16i8, neon_uimm3_bare>;
6175 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6176   v1i64, v2i64, i64, neon_uimm1_bare,
6177   v1i64, v2i64, neon_uimm0_bare>;
6178 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6179   v1i32, v4i32, i32, neon_uimm2_bare,
6180   v2i32, v4i32, neon_uimm1_bare>;
6181 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6182   v1i16, v8i16, i32, neon_uimm3_bare,
6183   v4i16, v8i16, neon_uimm2_bare>;
6184 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6185   v1i8, v16i8, i32, neon_uimm4_bare,
6186   v8i8, v16i8, neon_uimm3_bare>;
6187
6188 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6189                                   Instruction DUPI, Operand OpImm,
6190                                   RegisterClass ResRC> {
6191   def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6192           (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6193 }
6194
6195 // Aliases for Scalar copy - DUP element (scalar)
6196 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6197 // custom printing of aliases.
6198 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6199 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6200 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6201 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6202
6203 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6204                       ValueType OpTy> {
6205   def : Pat<(ResTy (GetLow VPR128:$Rn)),
6206             (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6207   def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6208             (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6209 }
6210
6211 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6212 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6213 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6214 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6215 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6216 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6217
6218 // The following is for sext/zext from v1xx to v1xx
6219 multiclass NeonI_ext<string prefix, SDNode ExtOp> {
6220   // v1i32 -> v1i64
6221   def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))),
6222             (EXTRACT_SUBREG 
6223               (v2i64 (!cast<Instruction>(prefix # "_2S")
6224                 (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)),
6225               sub_64)>;
6226   
6227   // v1i16 -> v1i32
6228   def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))),
6229             (EXTRACT_SUBREG 
6230               (v4i32 (!cast<Instruction>(prefix # "_4H")
6231                 (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6232               sub_32)>;
6233   
6234   // v1i8 -> v1i16
6235   def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))),
6236             (EXTRACT_SUBREG 
6237               (v8i16 (!cast<Instruction>(prefix # "_8B")
6238                 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6239               sub_16)>;
6240 }
6241
6242 defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
6243 defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
6244
6245 // zext v1i8 -> v1i32
6246 def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
6247           (v1i32 (EXTRACT_SUBREG
6248             (v1i64 (SUBREG_TO_REG (i64 0),
6249               (v1i8 (DUPbv_B
6250                 (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6251                 0)),
6252               sub_8)),
6253             sub_32))>;
6254
6255 // zext v1i8 -> v1i64
6256 def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
6257           (v1i64 (SUBREG_TO_REG (i64 0),
6258             (v1i8 (DUPbv_B
6259               (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6260               0)),
6261             sub_8))>;
6262
6263 // zext v1i16 -> v1i64
6264 def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
6265           (v1i64 (SUBREG_TO_REG (i64 0),
6266             (v1i16 (DUPhv_H
6267               (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)),
6268               0)),
6269             sub_16))>;
6270
6271 // sext v1i8 -> v1i32
6272 def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
6273           (EXTRACT_SUBREG
6274             (v4i32 (SSHLLvvi_4H
6275               (v4i16 (SUBREG_TO_REG (i64 0),
6276                 (v1i16 (EXTRACT_SUBREG 
6277                   (v8i16 (SSHLLvvi_8B
6278                     (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6279                   sub_16)),
6280                 sub_16)), 0)),
6281             sub_32)>;
6282               
6283 // sext v1i8 -> v1i64
6284 def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
6285           (EXTRACT_SUBREG 
6286             (v2i64 (SSHLLvvi_2S
6287               (v2i32 (SUBREG_TO_REG (i64 0),
6288                 (v1i32 (EXTRACT_SUBREG
6289                   (v4i32 (SSHLLvvi_4H
6290                     (v4i16 (SUBREG_TO_REG (i64 0),
6291                       (v1i16 (EXTRACT_SUBREG 
6292                         (v8i16 (SSHLLvvi_8B
6293                           (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6294                         sub_16)),
6295                       sub_16)), 0)),
6296                   sub_32)),
6297                 sub_32)), 0)),
6298             sub_64)>;
6299
6300   
6301 // sext v1i16 -> v1i64
6302 def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))),
6303           (EXTRACT_SUBREG
6304             (v2i64 (SSHLLvvi_2S
6305               (v2i32 (SUBREG_TO_REG (i64 0),
6306                 (v1i32 (EXTRACT_SUBREG 
6307                   (v4i32 (SSHLLvvi_4H
6308                     (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6309                   sub_32)),
6310                 sub_32)), 0)),
6311             sub_64)>;
6312
6313 //===----------------------------------------------------------------------===//
6314 // Non-Instruction Patterns
6315 //===----------------------------------------------------------------------===//
6316
6317 // 64-bit vector bitcasts...
6318
6319 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
6320 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
6321 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
6322 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
6323
6324 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
6325 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
6326 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
6327 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
6328
6329 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
6330 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
6331 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
6332 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
6333
6334 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
6335 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
6336 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
6337 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
6338
6339 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
6340 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6341 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6342 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6343
6344 def : Pat<(v1i64 (bitconvert (v1f64  VPR64:$src))), (v1i64 VPR64:$src)>;
6345 def : Pat<(v2f32 (bitconvert (v1f64  VPR64:$src))), (v2f32 VPR64:$src)>;
6346 def : Pat<(v2i32 (bitconvert (v1f64  VPR64:$src))), (v2i32 VPR64:$src)>;
6347 def : Pat<(v4i16 (bitconvert (v1f64  VPR64:$src))), (v4i16 VPR64:$src)>;
6348 def : Pat<(v8i8 (bitconvert (v1f64  VPR64:$src))), (v8i8 VPR64:$src)>;
6349 def : Pat<(f64   (bitconvert (v1f64  VPR64:$src))), (f64 VPR64:$src)>;
6350
6351 def : Pat<(v1f64 (bitconvert (v1i64  VPR64:$src))), (v1f64 VPR64:$src)>;
6352 def : Pat<(v1f64 (bitconvert (v2f32  VPR64:$src))), (v1f64 VPR64:$src)>;
6353 def : Pat<(v1f64 (bitconvert (v2i32  VPR64:$src))), (v1f64 VPR64:$src)>;
6354 def : Pat<(v1f64 (bitconvert (v4i16  VPR64:$src))), (v1f64 VPR64:$src)>;
6355 def : Pat<(v1f64 (bitconvert (v8i8  VPR64:$src))), (v1f64 VPR64:$src)>;
6356 def : Pat<(v1f64 (bitconvert (f64  VPR64:$src))), (v1f64 VPR64:$src)>;
6357
6358 // ..and 128-bit vector bitcasts...
6359
6360 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
6361 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
6362 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
6363 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
6364 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
6365
6366 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
6367 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
6368 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
6369 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
6370 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
6371
6372 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
6373 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
6374 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
6375 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
6376 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
6377
6378 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
6379 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
6380 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
6381 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
6382 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
6383
6384 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
6385 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
6386 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
6387 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
6388 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
6389
6390 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
6391 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
6392 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
6393 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
6394 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
6395
6396 // ...and scalar bitcasts...
6397 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
6398 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
6399 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
6400 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
6401
6402 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
6403 def : Pat<(i64 (bitconvert (v1f64  FPR64:$src))), (FMOVxd $src)>;
6404 def : Pat<(i64 (bitconvert (v2i32  FPR64:$src))), (FMOVxd $src)>;
6405 def : Pat<(i64 (bitconvert (v2f32  FPR64:$src))), (FMOVxd $src)>;
6406 def : Pat<(i64 (bitconvert (v4i16  FPR64:$src))), (FMOVxd $src)>;
6407 def : Pat<(i64 (bitconvert (v8i8  FPR64:$src))), (FMOVxd $src)>;
6408
6409 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
6410
6411 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6412 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6413 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6414
6415 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
6416 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
6417 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
6418 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
6419 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
6420
6421 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
6422 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
6423 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
6424 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
6425 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
6426 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
6427
6428 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
6429 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
6430 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
6431 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
6432
6433 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6434 def : Pat<(v1f64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6435 def : Pat<(v2i32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6436 def : Pat<(v2f32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6437 def : Pat<(v4i16 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6438 def : Pat<(v8i8 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6439
6440 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
6441
6442 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
6443 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
6444 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
6445 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
6446 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
6447
6448 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
6449 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
6450 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
6451 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
6452 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
6453 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
6454
6455 // Scalar Three Same
6456
6457 def neon_uimm3 : Operand<i64>,
6458                    ImmLeaf<i64, [{return Imm < 8;}]> {
6459   let ParserMatchClass = uimm3_asmoperand;
6460   let PrintMethod = "printUImmHexOperand";
6461 }
6462
6463 def neon_uimm4 : Operand<i64>,
6464                    ImmLeaf<i64, [{return Imm < 16;}]> {
6465   let ParserMatchClass = uimm4_asmoperand;
6466   let PrintMethod = "printUImmHexOperand";
6467 }
6468
6469 // Bitwise Extract
6470 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6471                     string OpS, RegisterOperand OpVPR, Operand OpImm>
6472   : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6473                      (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6474                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6475                      ", $Rm." # OpS # ", $Index",
6476                      [],
6477                      NoItinerary>{
6478   bits<4> Index;
6479 }
6480
6481 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6482                                VPR64, neon_uimm3> {
6483   let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6484 }
6485
6486 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6487                                VPR128, neon_uimm4> {
6488   let Inst{14-11} = Index;
6489 }
6490
6491 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6492                  Operand OpImm>
6493   : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6494                                  (i64 OpImm:$Imm))),
6495               (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6496
6497 def : NI_Extract<v8i8,  VPR64,  EXTvvvi_8b,  neon_uimm3>;
6498 def : NI_Extract<v4i16, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6499 def : NI_Extract<v2i32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6500 def : NI_Extract<v1i64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6501 def : NI_Extract<v2f32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6502 def : NI_Extract<v1f64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6503 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6504 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6505 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6506 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6507 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6508 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6509
6510 // Table lookup
6511 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6512              string asmop, string OpS, RegisterOperand OpVPR,
6513              RegisterOperand VecList>
6514   : NeonI_TBL<q, op2, len, op,
6515               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6516               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6517               [],
6518               NoItinerary>;
6519
6520 // The vectors in look up table are always 16b
6521 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6522   def _8b  : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6523                     !cast<RegisterOperand>(List # "16B_operand")>;
6524
6525   def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6526                     !cast<RegisterOperand>(List # "16B_operand")>;
6527 }
6528
6529 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6530 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6531 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6532 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6533
6534 // Table lookup extension
6535 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6536              string asmop, string OpS, RegisterOperand OpVPR,
6537              RegisterOperand VecList>
6538   : NeonI_TBL<q, op2, len, op,
6539               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6540               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6541               [],
6542               NoItinerary> {
6543   let Constraints = "$src = $Rd";
6544 }
6545
6546 // The vectors in look up table are always 16b
6547 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6548   def _8b  : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6549                     !cast<RegisterOperand>(List # "16B_operand")>;
6550
6551   def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6552                     !cast<RegisterOperand>(List # "16B_operand")>;
6553 }
6554
6555 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6556 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6557 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6558 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6559
6560 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6561                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6562   : NeonI_copy<0b1, 0b0, 0b0011,
6563                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6564                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6565                [(set (ResTy VPR128:$Rd),
6566                  (ResTy (vector_insert
6567                    (ResTy VPR128:$src),
6568                    (OpTy OpGPR:$Rn),
6569                    (OpImm:$Imm))))],
6570                NoItinerary> {
6571   bits<4> Imm;
6572   let Constraints = "$src = $Rd";
6573 }
6574
6575 //Insert element (vector, from main)
6576 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6577                            neon_uimm4_bare> {
6578   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6579 }
6580 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6581                            neon_uimm3_bare> {
6582   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6583 }
6584 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6585                            neon_uimm2_bare> {
6586   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6587 }
6588 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6589                            neon_uimm1_bare> {
6590   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6591 }
6592
6593 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6594                     (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6595 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6596                     (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6597 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6598                     (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6599 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6600                     (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6601
6602 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6603                              RegisterClass OpGPR, ValueType OpTy,
6604                              Operand OpImm, Instruction INS>
6605   : Pat<(ResTy (vector_insert
6606               (ResTy VPR64:$src),
6607               (OpTy OpGPR:$Rn),
6608               (OpImm:$Imm))),
6609         (ResTy (EXTRACT_SUBREG
6610           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6611             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6612
6613 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6614                                           neon_uimm3_bare, INSbw>;
6615 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6616                                           neon_uimm2_bare, INShw>;
6617 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6618                                           neon_uimm1_bare, INSsw>;
6619 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6620                                           neon_uimm0_bare, INSdx>;
6621
6622 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6623   : NeonI_insert<0b1, 0b1,
6624                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6625                  ResImm:$Immd, ResImm:$Immn),
6626                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6627                  [],
6628                  NoItinerary> {
6629   let Constraints = "$src = $Rd";
6630   bits<4> Immd;
6631   bits<4> Immn;
6632 }
6633
6634 //Insert element (vector, from element)
6635 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6636   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6637   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6638 }
6639 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6640   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6641   let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6642   // bit 11 is unspecified, but should be set to zero.
6643 }
6644 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6645   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6646   let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6647   // bits 11-12 are unspecified, but should be set to zero.
6648 }
6649 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6650   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6651   let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6652   // bits 11-13 are unspecified, but should be set to zero.
6653 }
6654
6655 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6656                     (INSELb VPR128:$Rd, VPR128:$Rn,
6657                       neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6658 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6659                     (INSELh VPR128:$Rd, VPR128:$Rn,
6660                       neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6661 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6662                     (INSELs VPR128:$Rd, VPR128:$Rn,
6663                       neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6664 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6665                     (INSELd VPR128:$Rd, VPR128:$Rn,
6666                       neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6667
6668 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6669                                 ValueType MidTy, Operand StImm, Operand NaImm,
6670                                 Instruction INS> {
6671 def : Pat<(ResTy (vector_insert
6672             (ResTy VPR128:$src),
6673             (MidTy (vector_extract
6674               (ResTy VPR128:$Rn),
6675               (StImm:$Immn))),
6676             (StImm:$Immd))),
6677           (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6678               StImm:$Immd, StImm:$Immn)>;
6679
6680 def : Pat <(ResTy (vector_insert
6681              (ResTy VPR128:$src),
6682              (MidTy (vector_extract
6683                (NaTy VPR64:$Rn),
6684                (NaImm:$Immn))),
6685              (StImm:$Immd))),
6686            (INS (ResTy VPR128:$src),
6687              (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6688              StImm:$Immd, NaImm:$Immn)>;
6689
6690 def : Pat <(NaTy (vector_insert
6691              (NaTy VPR64:$src),
6692              (MidTy (vector_extract
6693                (ResTy VPR128:$Rn),
6694                (StImm:$Immn))),
6695              (NaImm:$Immd))),
6696            (NaTy (EXTRACT_SUBREG
6697              (ResTy (INS
6698                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6699                (ResTy VPR128:$Rn),
6700                NaImm:$Immd, StImm:$Immn)),
6701              sub_64))>;
6702
6703 def : Pat <(NaTy (vector_insert
6704              (NaTy VPR64:$src),
6705              (MidTy (vector_extract
6706                (NaTy VPR64:$Rn),
6707                (NaImm:$Immn))),
6708              (NaImm:$Immd))),
6709            (NaTy (EXTRACT_SUBREG
6710              (ResTy (INS
6711                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6712                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6713                NaImm:$Immd, NaImm:$Immn)),
6714              sub_64))>;
6715 }
6716
6717 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6718                             neon_uimm1_bare, INSELs>;
6719 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6720                             neon_uimm0_bare, INSELd>;
6721 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6722                             neon_uimm3_bare, INSELb>;
6723 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6724                             neon_uimm2_bare, INSELh>;
6725 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6726                             neon_uimm1_bare, INSELs>;
6727 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6728                             neon_uimm0_bare, INSELd>;
6729
6730 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6731                                       ValueType MidTy,
6732                                       RegisterClass OpFPR, Operand ResImm,
6733                                       SubRegIndex SubIndex, Instruction INS> {
6734 def : Pat <(ResTy (vector_insert
6735              (ResTy VPR128:$src),
6736              (MidTy OpFPR:$Rn),
6737              (ResImm:$Imm))),
6738            (INS (ResTy VPR128:$src),
6739              (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6740              ResImm:$Imm,
6741              (i64 0))>;
6742
6743 def : Pat <(NaTy (vector_insert
6744              (NaTy VPR64:$src),
6745              (MidTy OpFPR:$Rn),
6746              (ResImm:$Imm))),
6747            (NaTy (EXTRACT_SUBREG
6748              (ResTy (INS
6749                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6750                (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6751                ResImm:$Imm,
6752                (i64 0))),
6753              sub_64))>;
6754 }
6755
6756 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6757                                   sub_32, INSELs>;
6758 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6759                                   sub_64, INSELd>;
6760
6761 class NeonI_SMOV<string asmop, string Res, bit Q,
6762                  ValueType OpTy, ValueType eleTy,
6763                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6764   : NeonI_copy<Q, 0b0, 0b0101,
6765                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6766                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6767                [(set (ResTy ResGPR:$Rd),
6768                  (ResTy (sext_inreg
6769                    (ResTy (vector_extract
6770                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
6771                    eleTy)))],
6772                NoItinerary> {
6773   bits<4> Imm;
6774 }
6775
6776 //Signed integer move (main, from element)
6777 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6778                         GPR32, i32> {
6779   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6780 }
6781 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6782                         GPR32, i32> {
6783   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6784 }
6785 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6786                         GPR64, i64> {
6787   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6788 }
6789 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6790                         GPR64, i64> {
6791   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6792 }
6793 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6794                         GPR64, i64> {
6795   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6796 }
6797
6798 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6799                                ValueType eleTy, Operand StImm,  Operand NaImm,
6800                                Instruction SMOVI> {
6801   def : Pat<(i64 (sext_inreg
6802               (i64 (anyext
6803                 (i32 (vector_extract
6804                   (StTy VPR128:$Rn), (StImm:$Imm))))),
6805               eleTy)),
6806             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6807
6808   def : Pat<(i64 (sext
6809               (i32 (vector_extract
6810                 (StTy VPR128:$Rn), (StImm:$Imm))))),
6811             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6812
6813   def : Pat<(i64 (sext_inreg
6814               (i64 (vector_extract
6815                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6816               eleTy)),
6817             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6818               NaImm:$Imm)>;
6819
6820   def : Pat<(i64 (sext_inreg
6821               (i64 (anyext
6822                 (i32 (vector_extract
6823                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6824               eleTy)),
6825             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6826               NaImm:$Imm)>;
6827
6828   def : Pat<(i64 (sext
6829               (i32 (vector_extract
6830                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6831             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6832               NaImm:$Imm)>;
6833 }
6834
6835 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6836                           neon_uimm3_bare, SMOVxb>;
6837 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6838                           neon_uimm2_bare, SMOVxh>;
6839 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6840                           neon_uimm1_bare, SMOVxs>;
6841
6842 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6843                           ValueType eleTy, Operand StImm,  Operand NaImm,
6844                           Instruction SMOVI>
6845   : Pat<(i32 (sext_inreg
6846           (i32 (vector_extract
6847             (NaTy VPR64:$Rn), (NaImm:$Imm))),
6848           eleTy)),
6849         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6850           NaImm:$Imm)>;
6851
6852 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6853                          neon_uimm3_bare, SMOVwb>;
6854 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6855                          neon_uimm2_bare, SMOVwh>;
6856
6857 class NeonI_UMOV<string asmop, string Res, bit Q,
6858                  ValueType OpTy, Operand OpImm,
6859                  RegisterClass ResGPR, ValueType ResTy>
6860   : NeonI_copy<Q, 0b0, 0b0111,
6861                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6862                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6863                [(set (ResTy ResGPR:$Rd),
6864                   (ResTy (vector_extract
6865                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6866                NoItinerary> {
6867   bits<4> Imm;
6868 }
6869
6870 //Unsigned integer move (main, from element)
6871 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6872                          GPR32, i32> {
6873   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6874 }
6875 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6876                          GPR32, i32> {
6877   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6878 }
6879 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6880                          GPR32, i32> {
6881   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6882 }
6883 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6884                          GPR64, i64> {
6885   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6886 }
6887
6888 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6889                     (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6890 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6891                     (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6892
6893 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6894                          Operand StImm,  Operand NaImm,
6895                          Instruction SMOVI>
6896   : Pat<(ResTy (vector_extract
6897           (NaTy VPR64:$Rn), NaImm:$Imm)),
6898         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6899           NaImm:$Imm)>;
6900
6901 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6902                         neon_uimm3_bare, UMOVwb>;
6903 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6904                         neon_uimm2_bare, UMOVwh>;
6905 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6906                         neon_uimm1_bare, UMOVws>;
6907
6908 def : Pat<(i32 (and
6909             (i32 (vector_extract
6910               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6911             255)),
6912           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6913
6914 def : Pat<(i32 (and
6915             (i32 (vector_extract
6916               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6917             65535)),
6918           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6919
6920 def : Pat<(i64 (zext
6921             (i32 (vector_extract
6922               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6923           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6924
6925 def : Pat<(i32 (and
6926             (i32 (vector_extract
6927               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6928             255)),
6929           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6930             neon_uimm3_bare:$Imm)>;
6931
6932 def : Pat<(i32 (and
6933             (i32 (vector_extract
6934               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6935             65535)),
6936           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6937             neon_uimm2_bare:$Imm)>;
6938
6939 def : Pat<(i64 (zext
6940             (i32 (vector_extract
6941               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6942           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6943             neon_uimm0_bare:$Imm)>;
6944
6945 // Additional copy patterns for scalar types
6946 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6947           (UMOVwb (v16i8
6948             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6949
6950 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6951           (UMOVwh (v8i16
6952             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6953
6954 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6955           (FMOVws FPR32:$Rn)>;
6956
6957 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6958           (FMOVxd FPR64:$Rn)>;
6959
6960 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6961           (f64 FPR64:$Rn)>;
6962
6963 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6964           (v1i8 (EXTRACT_SUBREG (v16i8
6965             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6966             sub_8))>;
6967
6968 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6969           (v1i16 (EXTRACT_SUBREG (v8i16
6970             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6971             sub_16))>;
6972
6973 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6974           (FMOVsw $src)>;
6975
6976 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6977           (FMOVdx $src)>;
6978
6979 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6980           (v8i8 (EXTRACT_SUBREG (v16i8
6981             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6982             sub_64))>;
6983
6984 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6985           (v4i16 (EXTRACT_SUBREG (v8i16
6986             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6987             sub_64))>;
6988
6989 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6990           (v2i32 (EXTRACT_SUBREG (v16i8
6991             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6992             sub_64))>;
6993
6994 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6995           (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6996
6997 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6998           (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6999
7000 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
7001           (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7002
7003 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
7004           (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7005
7006 def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
7007           (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7008 def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
7009           (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7010
7011 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
7012           (v1f64 FPR64:$Rn)>;
7013
7014 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
7015           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
7016                          (f64 FPR64:$src), sub_64)>;
7017
7018 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
7019                     RegisterOperand ResVPR, Operand OpImm>
7020   : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
7021                (ins VPR128:$Rn, OpImm:$Imm),
7022                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
7023                [],
7024                NoItinerary> {
7025   bits<4> Imm;
7026 }
7027
7028 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
7029                               neon_uimm4_bare> {
7030   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7031 }
7032
7033 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
7034                               neon_uimm3_bare> {
7035   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7036 }
7037
7038 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
7039                               neon_uimm2_bare> {
7040   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7041 }
7042
7043 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
7044                               neon_uimm1_bare> {
7045   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
7046 }
7047
7048 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
7049                               neon_uimm4_bare> {
7050   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7051 }
7052
7053 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
7054                               neon_uimm3_bare> {
7055   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7056 }
7057
7058 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
7059                               neon_uimm2_bare> {
7060   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7061 }
7062
7063 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
7064                                        ValueType OpTy,ValueType NaTy,
7065                                        ValueType ExTy, Operand OpLImm,
7066                                        Operand OpNImm> {
7067 def  : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
7068         (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
7069
7070 def : Pat<(ResTy (Neon_vduplane
7071             (NaTy VPR64:$Rn), OpNImm:$Imm)),
7072           (ResTy (DUPELT
7073             (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
7074 }
7075 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
7076                              neon_uimm4_bare, neon_uimm3_bare>;
7077 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
7078                              neon_uimm4_bare, neon_uimm3_bare>;
7079 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
7080                              neon_uimm3_bare, neon_uimm2_bare>;
7081 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
7082                              neon_uimm3_bare, neon_uimm2_bare>;
7083 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
7084                              neon_uimm2_bare, neon_uimm1_bare>;
7085 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
7086                              neon_uimm2_bare, neon_uimm1_bare>;
7087 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
7088                              neon_uimm1_bare, neon_uimm0_bare>;
7089 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
7090                              neon_uimm2_bare, neon_uimm1_bare>;
7091 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
7092                              neon_uimm2_bare, neon_uimm1_bare>;
7093 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
7094                              neon_uimm1_bare, neon_uimm0_bare>;
7095
7096 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
7097           (v2f32 (DUPELT2s
7098             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7099             (i64 0)))>;
7100 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
7101           (v4f32 (DUPELT4s
7102             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7103             (i64 0)))>;
7104 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
7105           (v2f64 (DUPELT2d
7106             (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
7107             (i64 0)))>;
7108
7109 multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy,
7110                              ValueType OpTy, RegisterClass OpRC,
7111                              Operand OpNImm, SubRegIndex SubIndex> {
7112 def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)),
7113           (ResTy (DUPELT
7114             (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>;
7115 }
7116
7117 defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>;
7118 defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>;
7119 defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>;
7120 defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>;
7121 defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>;
7122
7123 class NeonI_DUP<bit Q, string asmop, string rdlane,
7124                 RegisterOperand ResVPR, ValueType ResTy,
7125                 RegisterClass OpGPR, ValueType OpTy>
7126   : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
7127                asmop # "\t$Rd" # rdlane # ", $Rn",
7128                [(set (ResTy ResVPR:$Rd),
7129                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
7130                NoItinerary>;
7131
7132 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
7133   let Inst{20-16} = 0b00001;
7134   // bits 17-20 are unspecified, but should be set to zero.
7135 }
7136
7137 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
7138   let Inst{20-16} = 0b00010;
7139   // bits 18-20 are unspecified, but should be set to zero.
7140 }
7141
7142 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
7143   let Inst{20-16} = 0b00100;
7144   // bits 19-20 are unspecified, but should be set to zero.
7145 }
7146
7147 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
7148   let Inst{20-16} = 0b01000;
7149   // bit 20 is unspecified, but should be set to zero.
7150 }
7151
7152 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
7153   let Inst{20-16} = 0b00001;
7154   // bits 17-20 are unspecified, but should be set to zero.
7155 }
7156
7157 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
7158   let Inst{20-16} = 0b00010;
7159   // bits 18-20 are unspecified, but should be set to zero.
7160 }
7161
7162 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
7163   let Inst{20-16} = 0b00100;
7164   // bits 19-20 are unspecified, but should be set to zero.
7165 }
7166
7167 // patterns for CONCAT_VECTORS
7168 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
7169 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
7170           (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
7171 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
7172           (INSELd
7173             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7174             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
7175             (i64 1),
7176             (i64 0))>;
7177 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
7178           (DUPELT2d
7179             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7180             (i64 0))> ;
7181 }
7182
7183 defm : Concat_Vector_Pattern<v16i8, v8i8>;
7184 defm : Concat_Vector_Pattern<v8i16, v4i16>;
7185 defm : Concat_Vector_Pattern<v4i32, v2i32>;
7186 defm : Concat_Vector_Pattern<v2i64, v1i64>;
7187 defm : Concat_Vector_Pattern<v4f32, v2f32>;
7188 defm : Concat_Vector_Pattern<v2f64, v1f64>;
7189
7190 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)),
7191           (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>;
7192 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
7193           (EXTRACT_SUBREG 
7194             (v4i32 (INSELs
7195               (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)),
7196               (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
7197               (i64 1),
7198               (i64 0))),
7199             sub_64)>;
7200 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))),
7201           (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
7202
7203 //patterns for EXTRACT_SUBVECTOR
7204 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
7205           (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7206 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
7207           (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7208 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
7209           (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7210 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
7211           (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7212 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
7213           (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7214 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
7215           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7216
7217 // The followings are for instruction class (3V Elem)
7218
7219 // Variant 1
7220
7221 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
7222              string asmop, string ResS, string OpS, string EleOpS,
7223              Operand OpImm, RegisterOperand ResVPR,
7224              RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7225   : NeonI_2VElem<q, u, size, opcode,
7226                  (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
7227                                          EleOpVPR:$Re, OpImm:$Index),
7228                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7229                  ", $Re." # EleOpS # "[$Index]",
7230                  [],
7231                  NoItinerary> {
7232   bits<3> Index;
7233   bits<5> Re;
7234
7235   let Constraints = "$src = $Rd";
7236 }
7237
7238 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
7239   // vector register class for element is always 128-bit to cover the max index
7240   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7241                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7242     let Inst{11} = {Index{1}};
7243     let Inst{21} = {Index{0}};
7244     let Inst{20-16} = Re;
7245   }
7246
7247   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7248                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7249     let Inst{11} = {Index{1}};
7250     let Inst{21} = {Index{0}};
7251     let Inst{20-16} = Re;
7252   }
7253
7254   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7255   def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7256                      neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7257     let Inst{11} = {Index{2}};
7258     let Inst{21} = {Index{1}};
7259     let Inst{20} = {Index{0}};
7260     let Inst{19-16} = Re{3-0};
7261   }
7262
7263   def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7264                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7265     let Inst{11} = {Index{2}};
7266     let Inst{21} = {Index{1}};
7267     let Inst{20} = {Index{0}};
7268     let Inst{19-16} = Re{3-0};
7269   }
7270 }
7271
7272 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7273 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7274
7275 // Pattern for lane in 128-bit vector
7276 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7277                    RegisterOperand ResVPR, RegisterOperand OpVPR,
7278                    RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7279                    ValueType EleOpTy>
7280   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7281           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7282         (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7283
7284 // Pattern for lane in 64-bit vector
7285 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7286                   RegisterOperand ResVPR, RegisterOperand OpVPR,
7287                   RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7288                   ValueType EleOpTy>
7289   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7290           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7291         (INST ResVPR:$src, OpVPR:$Rn,
7292           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7293
7294 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7295 {
7296   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7297                      op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7298
7299   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7300                      op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7301
7302   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7303                      op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7304
7305   def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7306                      op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7307
7308   // Index can only be half of the max value for lane in 64-bit vector
7309
7310   def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7311                     op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7312
7313   def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7314                     op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7315 }
7316
7317 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7318 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7319
7320 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7321                  string asmop, string ResS, string OpS, string EleOpS,
7322                  Operand OpImm, RegisterOperand ResVPR,
7323                  RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7324   : NeonI_2VElem<q, u, size, opcode,
7325                  (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7326                                          EleOpVPR:$Re, OpImm:$Index),
7327                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7328                  ", $Re." # EleOpS # "[$Index]",
7329                  [],
7330                  NoItinerary> {
7331   bits<3> Index;
7332   bits<5> Re;
7333 }
7334
7335 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7336   // vector register class for element is always 128-bit to cover the max index
7337   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7338                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7339     let Inst{11} = {Index{1}};
7340     let Inst{21} = {Index{0}};
7341     let Inst{20-16} = Re;
7342   }
7343
7344   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7345                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7346     let Inst{11} = {Index{1}};
7347     let Inst{21} = {Index{0}};
7348     let Inst{20-16} = Re;
7349   }
7350
7351   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7352   def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7353                          neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7354     let Inst{11} = {Index{2}};
7355     let Inst{21} = {Index{1}};
7356     let Inst{20} = {Index{0}};
7357     let Inst{19-16} = Re{3-0};
7358   }
7359
7360   def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7361                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7362     let Inst{11} = {Index{2}};
7363     let Inst{21} = {Index{1}};
7364     let Inst{20} = {Index{0}};
7365     let Inst{19-16} = Re{3-0};
7366   }
7367 }
7368
7369 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7370 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7371 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7372
7373 // Pattern for lane in 128-bit vector
7374 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7375                        RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7376                        ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7377   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7378           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7379         (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7380
7381 // Pattern for lane in 64-bit vector
7382 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7383                       RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7384                       ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7385   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7386           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7387         (INST OpVPR:$Rn,
7388           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7389
7390 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7391   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7392                          op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7393
7394   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7395                          op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7396
7397   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7398                          op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7399
7400   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7401                          op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7402
7403   // Index can only be half of the max value for lane in 64-bit vector
7404
7405   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7406                         op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7407
7408   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7409                         op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7410 }
7411
7412 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7413 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7414 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7415
7416 // Variant 2
7417
7418 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7419   // vector register class for element is always 128-bit to cover the max index
7420   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7421                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7422     let Inst{11} = {Index{1}};
7423     let Inst{21} = {Index{0}};
7424     let Inst{20-16} = Re;
7425   }
7426
7427   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7428                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7429     let Inst{11} = {Index{1}};
7430     let Inst{21} = {Index{0}};
7431     let Inst{20-16} = Re;
7432   }
7433
7434   // _1d2d doesn't exist!
7435
7436   def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7437                          neon_uimm1_bare, VPR128, VPR128, VPR128> {
7438     let Inst{11} = {Index{0}};
7439     let Inst{21} = 0b0;
7440     let Inst{20-16} = Re;
7441   }
7442 }
7443
7444 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7445 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7446
7447 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7448                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7449                          ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7450                          SDPatternOperator coreop>
7451   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7452           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7453         (INST OpVPR:$Rn,
7454           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7455
7456 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7457   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7458                          op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7459
7460   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7461                          op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7462
7463   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7464                          op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7465
7466   // Index can only be half of the max value for lane in 64-bit vector
7467
7468   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7469                         op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7470
7471   def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7472                            op, VPR128, VPR64, v2f64, v2f64, v1f64,
7473                            BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7474 }
7475
7476 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7477 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7478
7479 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7480                        (v2f32 VPR64:$Rn))),
7481           (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7482
7483 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7484                        (v4f32 VPR128:$Rn))),
7485           (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7486
7487 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7488                        (v2f64 VPR128:$Rn))),
7489           (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7490
7491 // The followings are patterns using fma
7492 // -ffp-contract=fast generates fma
7493
7494 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7495   // vector register class for element is always 128-bit to cover the max index
7496   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7497                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7498     let Inst{11} = {Index{1}};
7499     let Inst{21} = {Index{0}};
7500     let Inst{20-16} = Re;
7501   }
7502
7503   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7504                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7505     let Inst{11} = {Index{1}};
7506     let Inst{21} = {Index{0}};
7507     let Inst{20-16} = Re;
7508   }
7509
7510   // _1d2d doesn't exist!
7511
7512   def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7513                      neon_uimm1_bare, VPR128, VPR128, VPR128> {
7514     let Inst{11} = {Index{0}};
7515     let Inst{21} = 0b0;
7516     let Inst{20-16} = Re;
7517   }
7518 }
7519
7520 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7521 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7522
7523 // Pattern for lane in 128-bit vector
7524 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7525                        RegisterOperand ResVPR, RegisterOperand OpVPR,
7526                        ValueType ResTy, ValueType OpTy,
7527                        SDPatternOperator coreop>
7528   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7529                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7530         (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7531
7532 // Pattern for lane 0
7533 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7534                       RegisterOperand ResVPR, ValueType ResTy>
7535   : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7536                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7537                    (ResTy ResVPR:$src))),
7538         (INST ResVPR:$src, ResVPR:$Rn,
7539               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7540
7541 // Pattern for lane in 64-bit vector
7542 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7543                       RegisterOperand ResVPR, RegisterOperand OpVPR,
7544                       ValueType ResTy, ValueType OpTy,
7545                       SDPatternOperator coreop>
7546   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7547                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7548         (INST ResVPR:$src, ResVPR:$Rn,
7549           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7550
7551 // Pattern for lane in 64-bit vector
7552 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7553                            SDPatternOperator op,
7554                            RegisterOperand ResVPR, RegisterOperand OpVPR,
7555                            ValueType ResTy, ValueType OpTy,
7556                            SDPatternOperator coreop>
7557   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7558                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7559         (INST ResVPR:$src, ResVPR:$Rn,
7560           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7561
7562
7563 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7564   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7565                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7566                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7567
7568   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7569                         op, VPR64, v2f32>;
7570
7571   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7572                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7573                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7574
7575   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7576                         op, VPR128, v4f32>;
7577
7578   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7579                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7580                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7581
7582   // Index can only be half of the max value for lane in 64-bit vector
7583
7584   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7585                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7586                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7587
7588   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7589                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7590                              BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7591 }
7592
7593 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7594
7595 // Pattern for lane 0
7596 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7597                       RegisterOperand ResVPR, ValueType ResTy>
7598   : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7599                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7600                    (ResTy ResVPR:$src))),
7601         (INST ResVPR:$src, ResVPR:$Rn,
7602               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7603
7604 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7605 {
7606   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7607                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7608                          BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7609
7610   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7611                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7612                          BinOpFrag<(Neon_vduplane
7613                                      (fneg node:$LHS), node:$RHS)>>;
7614
7615   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7616                         op, VPR64, v2f32>;
7617
7618   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7619                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7620                          BinOpFrag<(fneg (Neon_vduplane
7621                                      node:$LHS, node:$RHS))>>;
7622
7623   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7624                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7625                          BinOpFrag<(Neon_vduplane
7626                                      (fneg node:$LHS), node:$RHS)>>;
7627
7628   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7629                         op, VPR128, v4f32>;
7630
7631   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7632                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7633                          BinOpFrag<(fneg (Neon_vduplane
7634                                      node:$LHS, node:$RHS))>>;
7635
7636   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7637                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7638                          BinOpFrag<(Neon_vduplane
7639                                      (fneg node:$LHS), node:$RHS)>>;
7640
7641   // Index can only be half of the max value for lane in 64-bit vector
7642
7643   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7644                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7645                         BinOpFrag<(fneg (Neon_vduplane
7646                                     node:$LHS, node:$RHS))>>;
7647
7648   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7649                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7650                         BinOpFrag<(Neon_vduplane
7651                                     (fneg node:$LHS), node:$RHS)>>;
7652
7653   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7654                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7655                         BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7656
7657   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7658                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7659                         BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7660
7661   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7662                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7663                              BinOpFrag<(fneg (Neon_combine_2d
7664                                          node:$LHS, node:$RHS))>>;
7665
7666   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7667                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7668                              BinOpFrag<(Neon_combine_2d
7669                                          (fneg node:$LHS), (fneg node:$RHS))>>;
7670 }
7671
7672 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7673
7674 // Variant 3: Long type
7675 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7676 //      SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7677
7678 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7679   // vector register class for element is always 128-bit to cover the max index
7680   def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7681                      neon_uimm2_bare, VPR128, VPR64, VPR128> {
7682     let Inst{11} = {Index{1}};
7683     let Inst{21} = {Index{0}};
7684     let Inst{20-16} = Re;
7685   }
7686
7687   def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7688                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7689     let Inst{11} = {Index{1}};
7690     let Inst{21} = {Index{0}};
7691     let Inst{20-16} = Re;
7692   }
7693
7694   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7695   def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7696                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7697     let Inst{11} = {Index{2}};
7698     let Inst{21} = {Index{1}};
7699     let Inst{20} = {Index{0}};
7700     let Inst{19-16} = Re{3-0};
7701   }
7702
7703   def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7704                      neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7705     let Inst{11} = {Index{2}};
7706     let Inst{21} = {Index{1}};
7707     let Inst{20} = {Index{0}};
7708     let Inst{19-16} = Re{3-0};
7709   }
7710 }
7711
7712 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7713 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7714 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7715 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7716 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7717 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7718
7719 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7720   // vector register class for element is always 128-bit to cover the max index
7721   def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7722                          neon_uimm2_bare, VPR128, VPR64, VPR128> {
7723     let Inst{11} = {Index{1}};
7724     let Inst{21} = {Index{0}};
7725     let Inst{20-16} = Re;
7726   }
7727
7728   def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7729                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7730     let Inst{11} = {Index{1}};
7731     let Inst{21} = {Index{0}};
7732     let Inst{20-16} = Re;
7733   }
7734
7735   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7736   def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7737                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7738     let Inst{11} = {Index{2}};
7739     let Inst{21} = {Index{1}};
7740     let Inst{20} = {Index{0}};
7741     let Inst{19-16} = Re{3-0};
7742   }
7743
7744   def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7745                          neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7746     let Inst{11} = {Index{2}};
7747     let Inst{21} = {Index{1}};
7748     let Inst{20} = {Index{0}};
7749     let Inst{19-16} = Re{3-0};
7750   }
7751 }
7752
7753 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7754 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7755 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7756
7757 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7758           (FMOVdd $src)>;
7759
7760 // Pattern for lane in 128-bit vector
7761 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7762                      RegisterOperand EleOpVPR, ValueType ResTy,
7763                      ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7764                      SDPatternOperator hiop>
7765   : Pat<(ResTy (op (ResTy VPR128:$src),
7766           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7767           (HalfOpTy (Neon_vduplane
7768                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7769         (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7770
7771 // Pattern for lane in 64-bit vector
7772 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7773                     RegisterOperand EleOpVPR, ValueType ResTy,
7774                     ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7775                     SDPatternOperator hiop>
7776   : Pat<(ResTy (op (ResTy VPR128:$src),
7777           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7778           (HalfOpTy (Neon_vduplane
7779                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7780         (INST VPR128:$src, VPR128:$Rn,
7781           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7782
7783 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7784                      ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7785                      SDPatternOperator hiop, Instruction DupInst>
7786   : Pat<(ResTy (op (ResTy VPR128:$src),
7787           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7788           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7789         (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7790
7791 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7792   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7793                      op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7794
7795   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7796                      op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7797
7798   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7799                        op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7800
7801   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7802                        op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7803
7804   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7805                        op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7806
7807   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7808                        op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7809
7810   // Index can only be half of the max value for lane in 64-bit vector
7811
7812   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7813                     op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7814
7815   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7816                     op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7817
7818   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7819                       op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7820
7821   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7822                       op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7823 }
7824
7825 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7826 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7827 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7828 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7829
7830 // Pattern for lane in 128-bit vector
7831 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7832                          RegisterOperand EleOpVPR, ValueType ResTy,
7833                          ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7834                          SDPatternOperator hiop>
7835   : Pat<(ResTy (op
7836           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7837           (HalfOpTy (Neon_vduplane
7838                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7839         (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7840
7841 // Pattern for lane in 64-bit vector
7842 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7843                         RegisterOperand EleOpVPR, ValueType ResTy,
7844                         ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7845                         SDPatternOperator hiop>
7846   : Pat<(ResTy (op
7847           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7848           (HalfOpTy (Neon_vduplane
7849                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7850         (INST VPR128:$Rn,
7851           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7852
7853 // Pattern for fixed lane 0
7854 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7855                          ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7856                          SDPatternOperator hiop, Instruction DupInst>
7857   : Pat<(ResTy (op
7858           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7859           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7860         (INST VPR128:$Rn, (DupInst $Re), 0)>;
7861
7862 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7863   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7864                          op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7865
7866   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7867                          op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7868
7869   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7870                          op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7871
7872   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7873                            op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7874
7875   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7876                            op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7877
7878   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7879                            op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7880
7881   // Index can only be half of the max value for lane in 64-bit vector
7882
7883   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7884                         op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7885
7886   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7887                         op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7888
7889   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7890                           op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7891
7892   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7893                           op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7894 }
7895
7896 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7897 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7898 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7899
7900 multiclass NI_qdma<SDPatternOperator op> {
7901   def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7902                     (op node:$Ra,
7903                       (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7904
7905   def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7906                     (op node:$Ra,
7907                       (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7908 }
7909
7910 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7911 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7912
7913 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7914   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7915                      !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7916                      v4i32, v4i16, v8i16>;
7917
7918   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7919                      !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7920                      v2i64, v2i32, v4i32>;
7921
7922   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7923                        !cast<PatFrag>(op # "_4s"), VPR128Lo,
7924                        v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7925
7926   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7927                        !cast<PatFrag>(op # "_2d"), VPR128,
7928                        v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7929
7930   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7931                        !cast<PatFrag>(op # "_4s"),
7932                        v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7933
7934   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7935                        !cast<PatFrag>(op # "_2d"),
7936                        v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7937
7938   // Index can only be half of the max value for lane in 64-bit vector
7939
7940   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7941                     !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
7942                     v4i32, v4i16, v4i16>;
7943
7944   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7945                     !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
7946                     v2i64, v2i32, v2i32>;
7947
7948   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7949                       !cast<PatFrag>(op # "_4s"), VPR64Lo,
7950                       v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7951
7952   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7953                       !cast<PatFrag>(op # "_2d"), VPR64,
7954                       v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7955 }
7956
7957 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
7958 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
7959
7960 // End of implementation for instruction class (3V Elem)
7961
7962 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
7963                 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
7964                 SDPatternOperator Neon_Rev>
7965   : NeonI_2VMisc<Q, U, size, opcode,
7966                (outs ResVPR:$Rd), (ins ResVPR:$Rn),
7967                asmop # "\t$Rd." # Res # ", $Rn." # Res,
7968                [(set (ResTy ResVPR:$Rd),
7969                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
7970                NoItinerary> ;
7971
7972 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
7973                           v16i8, Neon_rev64>;
7974 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
7975                          v8i16, Neon_rev64>;
7976 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
7977                          v4i32, Neon_rev64>;
7978 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
7979                          v8i8, Neon_rev64>;
7980 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
7981                          v4i16, Neon_rev64>;
7982 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
7983                          v2i32, Neon_rev64>;
7984
7985 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
7986 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
7987
7988 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
7989                           v16i8, Neon_rev32>;
7990 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
7991                           v8i16, Neon_rev32>;
7992 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
7993                          v8i8, Neon_rev32>;
7994 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
7995                          v4i16, Neon_rev32>;
7996
7997 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
7998                           v16i8, Neon_rev16>;
7999 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
8000                          v8i8, Neon_rev16>;
8001
8002 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
8003                              SDPatternOperator Neon_Padd> {
8004   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8005                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8006                            asmop # "\t$Rd.8h, $Rn.16b",
8007                            [(set (v8i16 VPR128:$Rd),
8008                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
8009                            NoItinerary>;
8010
8011   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8012                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8013                           asmop # "\t$Rd.4h, $Rn.8b",
8014                           [(set (v4i16 VPR64:$Rd),
8015                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
8016                           NoItinerary>;
8017
8018   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8019                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8020                            asmop # "\t$Rd.4s, $Rn.8h",
8021                            [(set (v4i32 VPR128:$Rd),
8022                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
8023                            NoItinerary>;
8024
8025   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8026                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8027                           asmop # "\t$Rd.2s, $Rn.4h",
8028                           [(set (v2i32 VPR64:$Rd),
8029                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
8030                           NoItinerary>;
8031
8032   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8033                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8034                            asmop # "\t$Rd.2d, $Rn.4s",
8035                            [(set (v2i64 VPR128:$Rd),
8036                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
8037                            NoItinerary>;
8038
8039   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8040                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8041                           asmop # "\t$Rd.1d, $Rn.2s",
8042                           [(set (v1i64 VPR64:$Rd),
8043                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
8044                           NoItinerary>;
8045 }
8046
8047 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
8048                                 int_arm_neon_vpaddls>;
8049 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
8050                                 int_arm_neon_vpaddlu>;
8051
8052 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
8053           (SADDLP2s1d $Rn)>;
8054 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
8055           (UADDLP2s1d $Rn)>;
8056
8057 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
8058                              SDPatternOperator Neon_Padd> {
8059   let Constraints = "$src = $Rd" in {
8060     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8061                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8062                              asmop # "\t$Rd.8h, $Rn.16b",
8063                              [(set (v8i16 VPR128:$Rd),
8064                                 (v8i16 (Neon_Padd
8065                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
8066                              NoItinerary>;
8067
8068     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8069                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8070                             asmop # "\t$Rd.4h, $Rn.8b",
8071                             [(set (v4i16 VPR64:$Rd),
8072                                (v4i16 (Neon_Padd
8073                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
8074                             NoItinerary>;
8075
8076     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8077                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8078                             asmop # "\t$Rd.4s, $Rn.8h",
8079                             [(set (v4i32 VPR128:$Rd),
8080                                (v4i32 (Neon_Padd
8081                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
8082                             NoItinerary>;
8083
8084     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8085                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8086                             asmop # "\t$Rd.2s, $Rn.4h",
8087                             [(set (v2i32 VPR64:$Rd),
8088                                (v2i32 (Neon_Padd
8089                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
8090                             NoItinerary>;
8091
8092     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8093                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8094                             asmop # "\t$Rd.2d, $Rn.4s",
8095                             [(set (v2i64 VPR128:$Rd),
8096                                (v2i64 (Neon_Padd
8097                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
8098                             NoItinerary>;
8099
8100     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8101                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8102                             asmop # "\t$Rd.1d, $Rn.2s",
8103                             [(set (v1i64 VPR64:$Rd),
8104                                (v1i64 (Neon_Padd
8105                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
8106                             NoItinerary>;
8107   }
8108 }
8109
8110 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
8111                                    int_arm_neon_vpadals>;
8112 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
8113                                    int_arm_neon_vpadalu>;
8114
8115 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
8116   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8117                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8118                          asmop # "\t$Rd.16b, $Rn.16b",
8119                          [], NoItinerary>;
8120
8121   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8122                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8123                         asmop # "\t$Rd.8h, $Rn.8h",
8124                         [], NoItinerary>;
8125
8126   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8127                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8128                         asmop # "\t$Rd.4s, $Rn.4s",
8129                         [], NoItinerary>;
8130
8131   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8132                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8133                         asmop # "\t$Rd.2d, $Rn.2d",
8134                         [], NoItinerary>;
8135
8136   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8137                          (outs VPR64:$Rd), (ins VPR64:$Rn),
8138                          asmop # "\t$Rd.8b, $Rn.8b",
8139                          [], NoItinerary>;
8140
8141   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8142                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8143                         asmop # "\t$Rd.4h, $Rn.4h",
8144                         [], NoItinerary>;
8145
8146   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8147                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8148                         asmop # "\t$Rd.2s, $Rn.2s",
8149                         [], NoItinerary>;
8150 }
8151
8152 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
8153 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
8154 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
8155 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
8156
8157 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
8158                                           SDPatternOperator Neon_Op> {
8159   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
8160             (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
8161
8162   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
8163             (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
8164
8165   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
8166             (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
8167
8168   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
8169             (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
8170
8171   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
8172             (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
8173
8174   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
8175             (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
8176
8177   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
8178             (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
8179 }
8180
8181 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
8182 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
8183 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
8184
8185 def : Pat<(v16i8 (sub
8186             (v16i8 Neon_AllZero),
8187             (v16i8 VPR128:$Rn))),
8188           (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
8189 def : Pat<(v8i8 (sub
8190             (v8i8 Neon_AllZero),
8191             (v8i8 VPR64:$Rn))),
8192           (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
8193 def : Pat<(v8i16 (sub
8194             (v8i16 (bitconvert (v16i8 Neon_AllZero))),
8195             (v8i16 VPR128:$Rn))),
8196           (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
8197 def : Pat<(v4i16 (sub
8198             (v4i16 (bitconvert (v8i8 Neon_AllZero))),
8199             (v4i16 VPR64:$Rn))),
8200           (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
8201 def : Pat<(v4i32 (sub
8202             (v4i32 (bitconvert (v16i8 Neon_AllZero))),
8203             (v4i32 VPR128:$Rn))),
8204           (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
8205 def : Pat<(v2i32 (sub
8206             (v2i32 (bitconvert (v8i8 Neon_AllZero))),
8207             (v2i32 VPR64:$Rn))),
8208           (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
8209 def : Pat<(v2i64 (sub
8210             (v2i64 (bitconvert (v16i8 Neon_AllZero))),
8211             (v2i64 VPR128:$Rn))),
8212           (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
8213
8214 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
8215   let Constraints = "$src = $Rd" in {
8216     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8217                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8218                            asmop # "\t$Rd.16b, $Rn.16b",
8219                            [], NoItinerary>;
8220
8221     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8222                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8223                           asmop # "\t$Rd.8h, $Rn.8h",
8224                           [], NoItinerary>;
8225
8226     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8227                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8228                           asmop # "\t$Rd.4s, $Rn.4s",
8229                           [], NoItinerary>;
8230
8231     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8232                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8233                           asmop # "\t$Rd.2d, $Rn.2d",
8234                           [], NoItinerary>;
8235
8236     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8237                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8238                           asmop # "\t$Rd.8b, $Rn.8b",
8239                           [], NoItinerary>;
8240
8241     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8242                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8243                           asmop # "\t$Rd.4h, $Rn.4h",
8244                           [], NoItinerary>;
8245
8246     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8247                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8248                           asmop # "\t$Rd.2s, $Rn.2s",
8249                           [], NoItinerary>;
8250   }
8251 }
8252
8253 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
8254 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
8255
8256 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
8257                                            SDPatternOperator Neon_Op> {
8258   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
8259             (v16i8 (!cast<Instruction>(Prefix # 16b)
8260               (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
8261
8262   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
8263             (v8i16 (!cast<Instruction>(Prefix # 8h)
8264               (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
8265
8266   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
8267             (v4i32 (!cast<Instruction>(Prefix # 4s)
8268               (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
8269
8270   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
8271             (v2i64 (!cast<Instruction>(Prefix # 2d)
8272               (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8273
8274   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8275             (v8i8 (!cast<Instruction>(Prefix # 8b)
8276               (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8277
8278   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8279             (v4i16 (!cast<Instruction>(Prefix # 4h)
8280               (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8281
8282   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8283             (v2i32 (!cast<Instruction>(Prefix # 2s)
8284               (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8285 }
8286
8287 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8288 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8289
8290 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8291                           SDPatternOperator Neon_Op> {
8292   def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8293                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8294                          asmop # "\t$Rd.16b, $Rn.16b",
8295                          [(set (v16i8 VPR128:$Rd),
8296                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8297                          NoItinerary>;
8298
8299   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8300                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8301                         asmop # "\t$Rd.8h, $Rn.8h",
8302                         [(set (v8i16 VPR128:$Rd),
8303                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8304                         NoItinerary>;
8305
8306   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8307                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8308                         asmop # "\t$Rd.4s, $Rn.4s",
8309                         [(set (v4i32 VPR128:$Rd),
8310                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8311                         NoItinerary>;
8312
8313   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8314                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8315                         asmop # "\t$Rd.8b, $Rn.8b",
8316                         [(set (v8i8 VPR64:$Rd),
8317                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8318                         NoItinerary>;
8319
8320   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8321                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8322                         asmop # "\t$Rd.4h, $Rn.4h",
8323                         [(set (v4i16 VPR64:$Rd),
8324                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8325                         NoItinerary>;
8326
8327   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8328                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8329                         asmop # "\t$Rd.2s, $Rn.2s",
8330                         [(set (v2i32 VPR64:$Rd),
8331                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8332                         NoItinerary>;
8333 }
8334
8335 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8336 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8337
8338 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8339                               bits<5> Opcode> {
8340   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8341                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8342                          asmop # "\t$Rd.16b, $Rn.16b",
8343                          [], NoItinerary>;
8344
8345   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8346                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8347                         asmop # "\t$Rd.8b, $Rn.8b",
8348                         [], NoItinerary>;
8349 }
8350
8351 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8352 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8353 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8354
8355 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8356                     (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8357 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8358                     (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8359
8360 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8361           (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8362 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8363           (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8364
8365 def : Pat<(v16i8 (xor
8366             (v16i8 VPR128:$Rn),
8367             (v16i8 Neon_AllOne))),
8368           (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8369 def : Pat<(v8i8 (xor
8370             (v8i8 VPR64:$Rn),
8371             (v8i8 Neon_AllOne))),
8372           (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8373 def : Pat<(v8i16 (xor
8374             (v8i16 VPR128:$Rn),
8375             (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8376           (NOT16b VPR128:$Rn)>;
8377 def : Pat<(v4i16 (xor
8378             (v4i16 VPR64:$Rn),
8379             (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8380           (NOT8b VPR64:$Rn)>;
8381 def : Pat<(v4i32 (xor
8382             (v4i32 VPR128:$Rn),
8383             (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8384           (NOT16b VPR128:$Rn)>;
8385 def : Pat<(v2i32 (xor
8386             (v2i32 VPR64:$Rn),
8387             (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8388           (NOT8b VPR64:$Rn)>;
8389 def : Pat<(v2i64 (xor
8390             (v2i64 VPR128:$Rn),
8391             (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8392           (NOT16b VPR128:$Rn)>;
8393
8394 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8395           (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8396 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8397           (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8398
8399 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8400                                 SDPatternOperator Neon_Op> {
8401   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8402                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8403                         asmop # "\t$Rd.4s, $Rn.4s",
8404                         [(set (v4f32 VPR128:$Rd),
8405                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8406                         NoItinerary>;
8407
8408   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8409                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8410                         asmop # "\t$Rd.2d, $Rn.2d",
8411                         [(set (v2f64 VPR128:$Rd),
8412                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8413                         NoItinerary>;
8414
8415   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8416                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8417                         asmop # "\t$Rd.2s, $Rn.2s",
8418                         [(set (v2f32 VPR64:$Rd),
8419                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8420                         NoItinerary>;
8421 }
8422
8423 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8424 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8425
8426 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8427   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8428                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8429                           asmop # "\t$Rd.8b, $Rn.8h",
8430                           [], NoItinerary>;
8431
8432   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8433                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8434                           asmop # "\t$Rd.4h, $Rn.4s",
8435                           [], NoItinerary>;
8436
8437   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8438                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8439                           asmop # "\t$Rd.2s, $Rn.2d",
8440                           [], NoItinerary>;
8441
8442   let Constraints = "$Rd = $src" in {
8443     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8444                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8445                              asmop # "2\t$Rd.16b, $Rn.8h",
8446                              [], NoItinerary>;
8447
8448     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8449                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8450                             asmop # "2\t$Rd.8h, $Rn.4s",
8451                             [], NoItinerary>;
8452
8453     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8454                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8455                             asmop # "2\t$Rd.4s, $Rn.2d",
8456                             [], NoItinerary>;
8457   }
8458 }
8459
8460 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8461 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8462 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8463 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8464
8465 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8466                                         SDPatternOperator Neon_Op> {
8467   def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8468             (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8469
8470   def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8471             (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8472
8473   def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8474             (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8475
8476   def : Pat<(v16i8 (concat_vectors
8477               (v8i8 VPR64:$src),
8478               (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8479             (!cast<Instruction>(Prefix # 8h16b)
8480               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8481               VPR128:$Rn)>;
8482
8483   def : Pat<(v8i16 (concat_vectors
8484               (v4i16 VPR64:$src),
8485               (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8486             (!cast<Instruction>(Prefix # 4s8h)
8487               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8488               VPR128:$Rn)>;
8489
8490   def : Pat<(v4i32 (concat_vectors
8491               (v2i32 VPR64:$src),
8492               (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8493             (!cast<Instruction>(Prefix # 2d4s)
8494               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8495               VPR128:$Rn)>;
8496 }
8497
8498 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8499 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8500 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8501 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8502
8503 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8504   let DecoderMethod = "DecodeSHLLInstruction" in {
8505     def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8506                             (outs VPR128:$Rd),
8507                             (ins VPR64:$Rn, uimm_exact8:$Imm),
8508                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8509                             [], NoItinerary>;
8510
8511     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8512                             (outs VPR128:$Rd),
8513                             (ins VPR64:$Rn, uimm_exact16:$Imm),
8514                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8515                             [], NoItinerary>;
8516
8517     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8518                             (outs VPR128:$Rd),
8519                             (ins VPR64:$Rn, uimm_exact32:$Imm),
8520                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8521                             [], NoItinerary>;
8522
8523     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8524                             (outs VPR128:$Rd),
8525                             (ins VPR128:$Rn, uimm_exact8:$Imm),
8526                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8527                             [], NoItinerary>;
8528
8529     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8530                             (outs VPR128:$Rd),
8531                             (ins VPR128:$Rn, uimm_exact16:$Imm),
8532                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8533                             [], NoItinerary>;
8534
8535     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8536                             (outs VPR128:$Rd),
8537                             (ins VPR128:$Rn, uimm_exact32:$Imm),
8538                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8539                             [], NoItinerary>;
8540   }
8541 }
8542
8543 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8544
8545 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8546                           SDPatternOperator ExtOp, Operand Neon_Imm,
8547                           string suffix>
8548   : Pat<(DesTy (shl
8549           (DesTy (ExtOp (OpTy VPR64:$Rn))),
8550             (DesTy (Neon_vdup
8551               (i32 Neon_Imm:$Imm))))),
8552         (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8553
8554 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8555                                SDPatternOperator ExtOp, Operand Neon_Imm,
8556                                string suffix, PatFrag GetHigh>
8557   : Pat<(DesTy (shl
8558           (DesTy (ExtOp
8559             (OpTy (GetHigh VPR128:$Rn)))),
8560               (DesTy (Neon_vdup
8561                 (i32 Neon_Imm:$Imm))))),
8562         (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8563
8564 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8565 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8566 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8567 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8568 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8569 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8570 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8571                                Neon_High16B>;
8572 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8573                                Neon_High16B>;
8574 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8575                                Neon_High8H>;
8576 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8577                                Neon_High8H>;
8578 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8579                                Neon_High4S>;
8580 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8581                                Neon_High4S>;
8582
8583 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8584   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8585                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8586                           asmop # "\t$Rd.4h, $Rn.4s",
8587                           [], NoItinerary>;
8588
8589   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8590                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8591                           asmop # "\t$Rd.2s, $Rn.2d",
8592                           [], NoItinerary>;
8593
8594   let Constraints = "$src = $Rd" in {
8595     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8596                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8597                             asmop # "2\t$Rd.8h, $Rn.4s",
8598                             [], NoItinerary>;
8599
8600     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8601                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8602                             asmop # "2\t$Rd.4s, $Rn.2d",
8603                             [], NoItinerary>;
8604   }
8605 }
8606
8607 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8608
8609 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8610                                        SDPatternOperator f32_to_f16_Op,
8611                                        SDPatternOperator f64_to_f32_Op> {
8612
8613   def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8614               (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8615
8616   def : Pat<(v8i16 (concat_vectors
8617                 (v4i16 VPR64:$src),
8618                 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8619                   (!cast<Instruction>(prefix # "4s8h")
8620                     (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8621                     (v4f32 VPR128:$Rn))>;
8622
8623   def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8624             (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8625
8626   def : Pat<(v4f32 (concat_vectors
8627               (v2f32 VPR64:$src),
8628               (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8629                 (!cast<Instruction>(prefix # "2d4s")
8630                   (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8631                   (v2f64 VPR128:$Rn))>;
8632 }
8633
8634 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8635
8636 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8637                                  bits<5> opcode> {
8638   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8639                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8640                           asmop # "\t$Rd.2s, $Rn.2d",
8641                           [], NoItinerary>;
8642
8643   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8644                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8645                           asmop # "2\t$Rd.4s, $Rn.2d",
8646                           [], NoItinerary> {
8647     let Constraints = "$src = $Rd";
8648   }
8649
8650   def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8651             (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8652
8653   def : Pat<(v4f32 (concat_vectors
8654               (v2f32 VPR64:$src),
8655               (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8656             (!cast<Instruction>(prefix # "2d4s")
8657                (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8658                VPR128:$Rn)>;
8659 }
8660
8661 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8662
8663 def Neon_High4Float : PatFrag<(ops node:$in),
8664                               (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8665
8666 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8667   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8668                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8669                           asmop # "\t$Rd.4s, $Rn.4h",
8670                           [], NoItinerary>;
8671
8672   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8673                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8674                           asmop # "\t$Rd.2d, $Rn.2s",
8675                           [], NoItinerary>;
8676
8677   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8678                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8679                           asmop # "2\t$Rd.4s, $Rn.8h",
8680                           [], NoItinerary>;
8681
8682   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8683                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8684                           asmop # "2\t$Rd.2d, $Rn.4s",
8685                           [], NoItinerary>;
8686 }
8687
8688 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8689
8690 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8691   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8692             (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8693
8694   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8695               (v4i16 (Neon_High8H
8696                 (v8i16 VPR128:$Rn))))),
8697             (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8698
8699   def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8700             (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8701
8702   def : Pat<(v2f64 (fextend
8703               (v2f32 (Neon_High4Float
8704                 (v4f32 VPR128:$Rn))))),
8705             (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8706 }
8707
8708 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8709
8710 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8711                                 ValueType ResTy4s, ValueType OpTy4s,
8712                                 ValueType ResTy2d, ValueType OpTy2d,
8713                                 ValueType ResTy2s, ValueType OpTy2s,
8714                                 SDPatternOperator Neon_Op> {
8715
8716   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8717                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8718                         asmop # "\t$Rd.4s, $Rn.4s",
8719                         [(set (ResTy4s VPR128:$Rd),
8720                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8721                         NoItinerary>;
8722
8723   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8724                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8725                         asmop # "\t$Rd.2d, $Rn.2d",
8726                         [(set (ResTy2d VPR128:$Rd),
8727                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8728                         NoItinerary>;
8729
8730   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8731                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8732                         asmop # "\t$Rd.2s, $Rn.2s",
8733                         [(set (ResTy2s VPR64:$Rd),
8734                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8735                         NoItinerary>;
8736 }
8737
8738 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8739                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8740   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8741                                 v2f64, v2i32, v2f32, Neon_Op>;
8742 }
8743
8744 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8745                                      int_arm_neon_vcvtns>;
8746 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8747                                      int_arm_neon_vcvtnu>;
8748 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8749                                      int_arm_neon_vcvtps>;
8750 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8751                                      int_arm_neon_vcvtpu>;
8752 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8753                                      int_arm_neon_vcvtms>;
8754 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8755                                      int_arm_neon_vcvtmu>;
8756 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8757 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8758 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8759                                      int_arm_neon_vcvtas>;
8760 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8761                                      int_arm_neon_vcvtau>;
8762
8763 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8764                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8765   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8766                                 v2i64, v2f32, v2i32, Neon_Op>;
8767 }
8768
8769 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8770 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8771
8772 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8773                                  bits<5> opcode, SDPatternOperator Neon_Op> {
8774   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8775                                 v2f64, v2f32, v2f32, Neon_Op>;
8776 }
8777
8778 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8779                                      int_aarch64_neon_frintn>;
8780 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8781 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8782 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8783 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8784 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8785 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8786 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8787                                     int_arm_neon_vrecpe>;
8788 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8789                                      int_arm_neon_vrsqrte>;
8790 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8791
8792 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8793                                bits<5> opcode, SDPatternOperator Neon_Op> {
8794   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8795                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8796                         asmop # "\t$Rd.4s, $Rn.4s",
8797                         [(set (v4i32 VPR128:$Rd),
8798                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8799                         NoItinerary>;
8800
8801   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8802                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8803                         asmop # "\t$Rd.2s, $Rn.2s",
8804                         [(set (v2i32 VPR64:$Rd),
8805                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8806                         NoItinerary>;
8807 }
8808
8809 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8810                                   int_arm_neon_vrecpe>;
8811 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8812                                    int_arm_neon_vrsqrte>;
8813
8814 // Crypto Class
8815 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8816                          string asmop, SDPatternOperator opnode>
8817   : NeonI_Crypto_AES<size, opcode,
8818                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8819                      asmop # "\t$Rd.16b, $Rn.16b",
8820                      [(set (v16i8 VPR128:$Rd),
8821                         (v16i8 (opnode (v16i8 VPR128:$src),
8822                                        (v16i8 VPR128:$Rn))))],
8823                      NoItinerary>{
8824   let Constraints = "$src = $Rd";
8825   let Predicates = [HasNEON, HasCrypto];
8826 }
8827
8828 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8829 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8830
8831 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8832                       string asmop, SDPatternOperator opnode>
8833   : NeonI_Crypto_AES<size, opcode,
8834                      (outs VPR128:$Rd), (ins VPR128:$Rn),
8835                      asmop # "\t$Rd.16b, $Rn.16b",
8836                      [(set (v16i8 VPR128:$Rd),
8837                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8838                      NoItinerary>;
8839
8840 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8841 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8842
8843 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8844                          string asmop, SDPatternOperator opnode>
8845   : NeonI_Crypto_SHA<size, opcode,
8846                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8847                      asmop # "\t$Rd.4s, $Rn.4s",
8848                      [(set (v4i32 VPR128:$Rd),
8849                         (v4i32 (opnode (v4i32 VPR128:$src),
8850                                        (v4i32 VPR128:$Rn))))],
8851                      NoItinerary> {
8852   let Constraints = "$src = $Rd";
8853   let Predicates = [HasNEON, HasCrypto];
8854 }
8855
8856 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8857                                  int_arm_neon_sha1su1>;
8858 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8859                                    int_arm_neon_sha256su0>;
8860
8861 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8862                          string asmop, SDPatternOperator opnode>
8863   : NeonI_Crypto_SHA<size, opcode,
8864                      (outs FPR32:$Rd), (ins FPR32:$Rn),
8865                      asmop # "\t$Rd, $Rn",
8866                      [(set (v1i32 FPR32:$Rd),
8867                         (v1i32 (opnode (v1i32 FPR32:$Rn))))],
8868                      NoItinerary> {
8869   let Predicates = [HasNEON, HasCrypto];
8870 }
8871
8872 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8873
8874 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8875                            SDPatternOperator opnode>
8876   : NeonI_Crypto_3VSHA<size, opcode,
8877                        (outs VPR128:$Rd),
8878                        (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8879                        asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8880                        [(set (v4i32 VPR128:$Rd),
8881                           (v4i32 (opnode (v4i32 VPR128:$src),
8882                                          (v4i32 VPR128:$Rn),
8883                                          (v4i32 VPR128:$Rm))))],
8884                        NoItinerary> {
8885   let Constraints = "$src = $Rd";
8886   let Predicates = [HasNEON, HasCrypto];
8887 }
8888
8889 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8890                                    int_arm_neon_sha1su0>;
8891 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8892                                      int_arm_neon_sha256su1>;
8893
8894 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8895                            SDPatternOperator opnode>
8896   : NeonI_Crypto_3VSHA<size, opcode,
8897                        (outs FPR128:$Rd),
8898                        (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8899                        asmop # "\t$Rd, $Rn, $Rm.4s",
8900                        [(set (v4i32 FPR128:$Rd),
8901                           (v4i32 (opnode (v4i32 FPR128:$src),
8902                                          (v4i32 FPR128:$Rn),
8903                                          (v4i32 VPR128:$Rm))))],
8904                        NoItinerary> {
8905   let Constraints = "$src = $Rd";
8906   let Predicates = [HasNEON, HasCrypto];
8907 }
8908
8909 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8910                                    int_arm_neon_sha256h>;
8911 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8912                                     int_arm_neon_sha256h2>;
8913
8914 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
8915                            SDPatternOperator opnode>
8916   : NeonI_Crypto_3VSHA<size, opcode,
8917                        (outs FPR128:$Rd),
8918                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8919                        asmop # "\t$Rd, $Rn, $Rm.4s",
8920                        [(set (v4i32 FPR128:$Rd),
8921                           (v4i32 (opnode (v4i32 FPR128:$src),
8922                                          (v1i32 FPR32:$Rn),
8923                                          (v4i32 VPR128:$Rm))))],
8924                        NoItinerary> {
8925   let Constraints = "$src = $Rd";
8926   let Predicates = [HasNEON, HasCrypto];
8927 }
8928
8929 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
8930 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
8931 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
8932
8933 // Additional patterns to match shl to USHL.
8934 def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8935           (USHLvvv_8B $Rn, $Rm)>;
8936 def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8937           (USHLvvv_4H $Rn, $Rm)>;
8938 def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8939           (USHLvvv_2S $Rn, $Rm)>;
8940 def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8941           (USHLddd $Rn, $Rm)>;
8942 def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8943           (USHLvvv_16B $Rn, $Rm)>;
8944 def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8945           (USHLvvv_8H $Rn, $Rm)>;
8946 def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8947           (USHLvvv_4S $Rn, $Rm)>;
8948 def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8949           (USHLvvv_2D $Rn, $Rm)>;
8950
8951 // Additional patterns to match sra, srl.
8952 // For a vector right shift by vector, the shift amounts of SSHL/USHL are
8953 // negative. Negate the vector of shift amount first.
8954 def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8955           (USHLvvv_8B $Rn, (NEG8b $Rm))>;
8956 def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8957           (USHLvvv_4H $Rn, (NEG4h $Rm))>;
8958 def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8959           (USHLvvv_2S $Rn, (NEG2s $Rm))>;
8960 def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8961           (USHLddd $Rn, (NEGdd $Rm))>;
8962 def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8963           (USHLvvv_16B $Rn, (NEG16b $Rm))>;
8964 def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8965           (USHLvvv_8H $Rn, (NEG8h $Rm))>;
8966 def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8967           (USHLvvv_4S $Rn, (NEG4s $Rm))>;
8968 def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8969           (USHLvvv_2D $Rn, (NEG2d $Rm))>;
8970
8971 def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8972           (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
8973 def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8974           (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
8975 def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8976           (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
8977 def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8978           (SSHLddd $Rn, (NEGdd $Rm))>;
8979 def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8980           (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
8981 def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8982           (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
8983 def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8984           (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
8985 def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8986           (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
8987
8988 //
8989 // Patterns for handling half-precision values
8990 //
8991
8992 // Convert between f16 value and f32 value
8993 def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))),
8994           (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>;
8995 def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))),
8996           (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>;
8997
8998 // Convert f16 value coming in as i16 value to f32
8999 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
9000           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9001 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
9002           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9003
9004 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
9005             f32_to_f16 (f32 FPR32:$Rn))))))),
9006           (f32 FPR32:$Rn)>;
9007
9008 // Patterns for vector extract of half-precision FP value in i16 storage type
9009 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9010             (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
9011           (FCVTsh (f16 (DUPhv_H
9012             (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9013             neon_uimm2_bare:$Imm)))>;
9014
9015 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9016             (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
9017           (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
9018
9019 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
9020 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9021             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9022             (neon_uimm3_bare:$Imm))),
9023           (v8i16 (INSELh (v8i16 VPR128:$Rn),
9024             (v8i16 (SUBREG_TO_REG (i64 0),
9025               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9026               sub_16)),
9027             neon_uimm3_bare:$Imm, 0))>;
9028
9029 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9030             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9031             (neon_uimm2_bare:$Imm))),
9032           (v4i16 (EXTRACT_SUBREG
9033             (v8i16 (INSELh
9034               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9035               (v8i16 (SUBREG_TO_REG (i64 0),
9036                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9037                 sub_16)),
9038               neon_uimm2_bare:$Imm, 0)),
9039             sub_64))>;
9040
9041 // Patterns for vector insert of half-precision FP value in i16 storage type
9042 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9043             (i32 (assertsext (i32 (fp_to_sint
9044               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9045             (neon_uimm3_bare:$Imm))),
9046           (v8i16 (INSELh (v8i16 VPR128:$Rn),
9047             (v8i16 (SUBREG_TO_REG (i64 0),
9048               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9049               sub_16)),
9050             neon_uimm3_bare:$Imm, 0))>;
9051
9052 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9053             (i32 (assertsext (i32 (fp_to_sint
9054               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9055             (neon_uimm2_bare:$Imm))),
9056           (v4i16 (EXTRACT_SUBREG
9057             (v8i16 (INSELh
9058               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9059               (v8i16 (SUBREG_TO_REG (i64 0),
9060                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9061                 sub_16)),
9062               neon_uimm2_bare:$Imm, 0)),
9063             sub_64))>;
9064
9065 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9066             (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9067               (neon_uimm3_bare:$Imm1))),
9068           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9069             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9070
9071 // Patterns for vector copy of half-precision FP value in i16 storage type
9072 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9073             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9074               (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9075               65535)))))))),
9076             (neon_uimm3_bare:$Imm1))),
9077           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9078             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9079
9080 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9081             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9082               (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
9083               65535)))))))),
9084             (neon_uimm3_bare:$Imm1))),
9085           (v4i16 (EXTRACT_SUBREG
9086             (v8i16 (INSELh
9087               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9088               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
9089               neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
9090             sub_64))>;
9091
9092