[AArch64]Fix the problems that can't select mul/add/sub of v1i8/v1i16/v1i32 types.
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
20
21 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
22
23 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
24
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
28
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
32
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
36
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
40
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
42                                      SDTCisVT<2, i32>]>;
43 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
45
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
47                                SDTCisSameAs<0, 2>]>;
48 def Neon_uzp1    : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2    : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1    : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2    : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1    : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2    : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
54
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
60                        [SDTCisVec<0>]>>;
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62                            [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64                            [SDTCisVec<0>,  SDTCisSameAs<0, 1>,
65                            SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
66
67 //===----------------------------------------------------------------------===//
68 // Addressing-mode instantiations
69 //===----------------------------------------------------------------------===//
70
71 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
72 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
73                       !foreach(decls.pattern, Offset,
74                                !subst(OFFSET, dword_uimm12, decls.pattern)),
75                       !foreach(decls.pattern, address,
76                                !subst(OFFSET, dword_uimm12,
77                                !subst(ALIGN, min_align8, decls.pattern))),
78                       Ty>;
79 }
80
81 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
82 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
83                        !foreach(decls.pattern, Offset,
84                                 !subst(OFFSET, qword_uimm12, decls.pattern)),
85                        !foreach(decls.pattern, address,
86                                 !subst(OFFSET, qword_uimm12,
87                                 !subst(ALIGN, min_align16, decls.pattern))),
88                       Ty>;
89 }
90
91 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
92   defm : ls_64_pats<address, Base, Offset, v8i8>;
93   defm : ls_64_pats<address, Base, Offset, v4i16>;
94   defm : ls_64_pats<address, Base, Offset, v2i32>;
95   defm : ls_64_pats<address, Base, Offset, v1i64>;
96   defm : ls_64_pats<address, Base, Offset, v2f32>;
97   defm : ls_64_pats<address, Base, Offset, v1f64>;
98
99   defm : ls_128_pats<address, Base, Offset, v16i8>;
100   defm : ls_128_pats<address, Base, Offset, v8i16>;
101   defm : ls_128_pats<address, Base, Offset, v4i32>;
102   defm : ls_128_pats<address, Base, Offset, v2i64>;
103   defm : ls_128_pats<address, Base, Offset, v4f32>;
104   defm : ls_128_pats<address, Base, Offset, v2f64>;
105 }
106
107 defm : uimm12_neon_pats<(A64WrapperSmall
108                           tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
109                         (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
110
111 //===----------------------------------------------------------------------===//
112 // Multiclasses
113 //===----------------------------------------------------------------------===//
114
115 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
116                                 string asmop, SDPatternOperator opnode8B,
117                                 SDPatternOperator opnode16B,
118                                 bit Commutable = 0> {
119   let isCommutable = Commutable in {
120     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
121                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123                [(set (v8i8 VPR64:$Rd),
124                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
125                NoItinerary>;
126
127     def _16B : NeonI_3VSame<0b1, u, size, opcode,
128                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130                [(set (v16i8 VPR128:$Rd),
131                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
132                NoItinerary>;
133   }
134
135 }
136
137 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
138                                   string asmop, SDPatternOperator opnode,
139                                   bit Commutable = 0> {
140   let isCommutable = Commutable in {
141     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
142               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
143               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
144               [(set (v4i16 VPR64:$Rd),
145                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
146               NoItinerary>;
147
148     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
149               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
150               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
151               [(set (v8i16 VPR128:$Rd),
152                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
153               NoItinerary>;
154
155     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
156               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
157               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
158               [(set (v2i32 VPR64:$Rd),
159                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
160               NoItinerary>;
161
162     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
163               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
164               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
165               [(set (v4i32 VPR128:$Rd),
166                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
167               NoItinerary>;
168   }
169 }
170 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
171                                   string asmop, SDPatternOperator opnode,
172                                   bit Commutable = 0>
173    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> {
174   let isCommutable = Commutable in {
175     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
176                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
177                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
178                [(set (v8i8 VPR64:$Rd),
179                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
180                NoItinerary>;
181
182     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
183                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
184                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
185                [(set (v16i8 VPR128:$Rd),
186                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
187                NoItinerary>;
188   }
189 }
190
191 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
192                                    string asmop, SDPatternOperator opnode,
193                                    bit Commutable = 0>
194    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> {
195   let isCommutable = Commutable in {
196     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
197               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
198               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
199               [(set (v2i64 VPR128:$Rd),
200                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
201               NoItinerary>;
202   }
203 }
204
205 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
206 // but Result types can be integer or floating point types.
207 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
208                                  string asmop, SDPatternOperator opnode,
209                                  ValueType ResTy2S, ValueType ResTy4S,
210                                  ValueType ResTy2D, bit Commutable = 0> {
211   let isCommutable = Commutable in {
212     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
213               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
214               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
215               [(set (ResTy2S VPR64:$Rd),
216                  (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
217               NoItinerary>;
218
219     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
220               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
221               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
222               [(set (ResTy4S VPR128:$Rd),
223                  (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
224               NoItinerary>;
225
226     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
227               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
229               [(set (ResTy2D VPR128:$Rd),
230                  (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
231                NoItinerary>;
232   }
233 }
234
235 //===----------------------------------------------------------------------===//
236 // Instruction Definitions
237 //===----------------------------------------------------------------------===//
238
239 // Vector Arithmetic Instructions
240
241 // Vector Add (Integer and Floating-Point)
242
243 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
244 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd,
245                                      v2f32, v4f32, v2f64, 1>;
246
247 // Patterns to match add of v1i8/v1i16/v1i32 types
248 def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)),
249           (EXTRACT_SUBREG
250               (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
251                          (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
252               sub_8)>;
253 def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)),
254           (EXTRACT_SUBREG
255               (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
256                          (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
257               sub_16)>;
258 def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)),
259           (EXTRACT_SUBREG
260               (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
261                          (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
262               sub_32)>;
263
264 // Vector Sub (Integer and Floating-Point)
265
266 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
267 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub,
268                                      v2f32, v4f32, v2f64, 0>;
269
270 // Patterns to match sub of v1i8/v1i16/v1i32 types
271 def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)),
272           (EXTRACT_SUBREG
273               (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
274                          (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
275               sub_8)>;
276 def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)),
277           (EXTRACT_SUBREG
278               (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
279                          (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
280               sub_16)>;
281 def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)),
282           (EXTRACT_SUBREG
283               (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
284                          (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
285               sub_32)>;
286
287 // Vector Multiply (Integer and Floating-Point)
288
289 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
290 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
291                                      v2f32, v4f32, v2f64, 1>;
292
293 // Patterns to match mul of v1i8/v1i16/v1i32 types
294 def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)),
295           (EXTRACT_SUBREG 
296               (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
297                          (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
298               sub_8)>;
299 def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)),
300           (EXTRACT_SUBREG 
301               (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
302                          (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
303               sub_16)>;
304 def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)),
305           (EXTRACT_SUBREG
306               (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
307                          (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
308               sub_32)>;
309
310 // Vector Multiply (Polynomial)
311
312 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
313                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
314
315 // Vector Multiply-accumulate and Multiply-subtract (Integer)
316
317 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
318 // two operands constraints.
319 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
320   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
321   bits<5> opcode, SDPatternOperator opnode>
322   : NeonI_3VSame<q, u, size, opcode,
323     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
324     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
325     [(set (OpTy VPRC:$Rd),
326        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
327     NoItinerary> {
328   let Constraints = "$src = $Rd";
329 }
330
331 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
332                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
333
334 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
335                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
336
337
338 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
339                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
340 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
341                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
342 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
343                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
344 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
345                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
346 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
347                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
348 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
349                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
350
351 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
352                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
353 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
354                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
355 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
356                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
357 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
358                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
359 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
360                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
361 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
362                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
363
364 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
365
366 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
367                         (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
368
369 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
370                         (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
371
372 let Predicates = [HasNEON, UseFusedMAC] in {
373 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
374                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
375 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
376                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
377 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
378                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
379
380 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
381                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
382 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
383                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
384 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
385                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
386 }
387
388 // We're also allowed to match the fma instruction regardless of compile
389 // options.
390 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
391           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
392 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
393           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
394 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
395           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
396
397 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
398           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
399 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
400           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
401 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
402           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
403
404 // Vector Divide (Floating-Point)
405
406 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
407                                      v2f32, v4f32, v2f64, 0>;
408
409 // Vector Bitwise Operations
410
411 // Vector Bitwise AND
412
413 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
414
415 // Vector Bitwise Exclusive OR
416
417 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
418
419 // Vector Bitwise OR
420
421 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
422
423 // ORR disassembled as MOV if Vn==Vm
424
425 // Vector Move - register
426 // Alias for ORR if Vn=Vm.
427 // FIXME: This is actually the preferred syntax but TableGen can't deal with
428 // custom printing of aliases.
429 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
430                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
431 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
432                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
433
434 // The MOVI instruction takes two immediate operands.  The first is the
435 // immediate encoding, while the second is the cmode.  A cmode of 14, or
436 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
437 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
438 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
439
440 def Neon_not8B  : PatFrag<(ops node:$in),
441                           (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
442 def Neon_not16B : PatFrag<(ops node:$in),
443                           (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
444
445 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
446                          (or node:$Rn, (Neon_not8B node:$Rm))>;
447
448 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
449                           (or node:$Rn, (Neon_not16B node:$Rm))>;
450
451 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
452                          (and node:$Rn, (Neon_not8B node:$Rm))>;
453
454 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
455                           (and node:$Rn, (Neon_not16B node:$Rm))>;
456
457
458 // Vector Bitwise OR NOT - register
459
460 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
461                                    Neon_orn8B, Neon_orn16B, 0>;
462
463 // Vector Bitwise Bit Clear (AND NOT) - register
464
465 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
466                                    Neon_bic8B, Neon_bic16B, 0>;
467
468 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
469                                    SDPatternOperator opnode16B,
470                                    Instruction INST8B,
471                                    Instruction INST16B> {
472   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
473             (INST8B VPR64:$Rn, VPR64:$Rm)>;
474   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
475             (INST8B VPR64:$Rn, VPR64:$Rm)>;
476   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
477             (INST8B VPR64:$Rn, VPR64:$Rm)>;
478   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
479             (INST16B VPR128:$Rn, VPR128:$Rm)>;
480   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
481             (INST16B VPR128:$Rn, VPR128:$Rm)>;
482   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
483             (INST16B VPR128:$Rn, VPR128:$Rm)>;
484 }
485
486 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
487 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
488 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
489 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
490 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
491 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
492
493 //   Vector Bitwise Select
494 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
495                                               0b0, 0b1, 0b01, 0b00011, vselect>;
496
497 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
498                                               0b1, 0b1, 0b01, 0b00011, vselect>;
499
500 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
501                                    Instruction INST8B,
502                                    Instruction INST16B> {
503   // Disassociate type from instruction definition
504   def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
505             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
506   def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
507             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
508   def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
509             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510   def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
511             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
512   def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
513             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
514   def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
515             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516   def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
517             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
518   def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
519             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
520   def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
521             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
522   def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
523             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
524   def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
525             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
526   def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
527             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
528
529   // Allow to match BSL instruction pattern with non-constant operand
530   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
531                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
532           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
533   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
534                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
535           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
536   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
537                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
538           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
539   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
540                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
541           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
542   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
543                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
544           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
545   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
546                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
547           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
548   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
549                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
550           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
551   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
552                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
553           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
554
555   // Allow to match llvm.arm.* intrinsics.
556   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
557                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
558             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
559   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
560                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
561             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
562   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
563                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
564             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
565   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
566                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
567             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
568   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
569                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
570             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
571   def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
572                     (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
573             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
574   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
575                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
576             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
577   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
578                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
579             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
580   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
581                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
582             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
583   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
584                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
585             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
586   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
587                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
588             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
589   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
590                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
591             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
592 }
593
594 // Additional patterns for bitwise instruction BSL
595 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
596
597 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
598                            (vselect node:$src, node:$Rn, node:$Rm),
599                            [{ (void)N; return false; }]>;
600
601 // Vector Bitwise Insert if True
602
603 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
604                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
605 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
606                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
607
608 // Vector Bitwise Insert if False
609
610 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
611                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
612 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
613                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
614
615 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
616
617 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
618                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
619 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
620                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
621
622 // Vector Absolute Difference and Accumulate (Unsigned)
623 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
624                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
625 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
626                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
627 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
628                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
629 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
630                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
631 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
632                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
633 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
634                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
635
636 // Vector Absolute Difference and Accumulate (Signed)
637 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
638                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
639 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
640                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
641 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
642                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
643 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
644                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
645 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
646                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
647 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
648                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
649
650
651 // Vector Absolute Difference (Signed, Unsigned)
652 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
653 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
654
655 // Vector Absolute Difference (Floating Point)
656 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
657                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
658
659 // Vector Reciprocal Step (Floating Point)
660 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
661                                        int_arm_neon_vrecps,
662                                        v2f32, v4f32, v2f64, 0>;
663
664 // Vector Reciprocal Square Root Step (Floating Point)
665 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
666                                         int_arm_neon_vrsqrts,
667                                         v2f32, v4f32, v2f64, 0>;
668
669 // Vector Comparisons
670
671 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
672                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
673 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
674                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
675 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
676                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
677 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
678                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
679 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
680                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
681
682 // NeonI_compare_aliases class: swaps register operands to implement
683 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
684 class NeonI_compare_aliases<string asmop, string asmlane,
685                             Instruction inst, RegisterOperand VPRC>
686   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
687                     ", $Rm" # asmlane,
688                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
689
690 // Vector Comparisons (Integer)
691
692 // Vector Compare Mask Equal (Integer)
693 let isCommutable =1 in {
694 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
695 }
696
697 // Vector Compare Mask Higher or Same (Unsigned Integer)
698 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
699
700 // Vector Compare Mask Greater Than or Equal (Integer)
701 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
702
703 // Vector Compare Mask Higher (Unsigned Integer)
704 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
705
706 // Vector Compare Mask Greater Than (Integer)
707 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
708
709 // Vector Compare Mask Bitwise Test (Integer)
710 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
711
712 // Vector Compare Mask Less or Same (Unsigned Integer)
713 // CMLS is alias for CMHS with operands reversed.
714 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
715 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
716 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
717 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
718 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
719 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
720 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
721
722 // Vector Compare Mask Less Than or Equal (Integer)
723 // CMLE is alias for CMGE with operands reversed.
724 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
725 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
726 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
727 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
728 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
729 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
730 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
731
732 // Vector Compare Mask Lower (Unsigned Integer)
733 // CMLO is alias for CMHI with operands reversed.
734 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
735 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
736 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
737 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
738 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
739 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
740 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
741
742 // Vector Compare Mask Less Than (Integer)
743 // CMLT is alias for CMGT with operands reversed.
744 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
745 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
746 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
747 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
748 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
749 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
750 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
751
752
753 def neon_uimm0_asmoperand : AsmOperandClass
754 {
755   let Name = "UImm0";
756   let PredicateMethod = "isUImm<0>";
757   let RenderMethod = "addImmOperands";
758 }
759
760 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
761   let ParserMatchClass = neon_uimm0_asmoperand;
762   let PrintMethod = "printNeonUImm0Operand";
763
764 }
765
766 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
767 {
768   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
769              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
770              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
771              [(set (v8i8 VPR64:$Rd),
772                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
773              NoItinerary>;
774
775   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
776              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
777              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
778              [(set (v16i8 VPR128:$Rd),
779                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
780              NoItinerary>;
781
782   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
783             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
784             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
785             [(set (v4i16 VPR64:$Rd),
786                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
787             NoItinerary>;
788
789   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
790             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
791             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
792             [(set (v8i16 VPR128:$Rd),
793                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
794             NoItinerary>;
795
796   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
797             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
798             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
799             [(set (v2i32 VPR64:$Rd),
800                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
801             NoItinerary>;
802
803   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
804             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
805             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
806             [(set (v4i32 VPR128:$Rd),
807                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
808             NoItinerary>;
809
810   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
811             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
812             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
813             [(set (v2i64 VPR128:$Rd),
814                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
815             NoItinerary>;
816 }
817
818 // Vector Compare Mask Equal to Zero (Integer)
819 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
820
821 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
822 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
823
824 // Vector Compare Mask Greater Than Zero (Signed Integer)
825 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
826
827 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
828 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
829
830 // Vector Compare Mask Less Than Zero (Signed Integer)
831 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
832
833 // Vector Comparisons (Floating Point)
834
835 // Vector Compare Mask Equal (Floating Point)
836 let isCommutable =1 in {
837 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
838                                       v2i32, v4i32, v2i64, 0>;
839 }
840
841 // Vector Compare Mask Greater Than Or Equal (Floating Point)
842 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
843                                       v2i32, v4i32, v2i64, 0>;
844
845 // Vector Compare Mask Greater Than (Floating Point)
846 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
847                                       v2i32, v4i32, v2i64, 0>;
848
849 // Vector Compare Mask Less Than Or Equal (Floating Point)
850 // FCMLE is alias for FCMGE with operands reversed.
851 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
852 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
853 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
854
855 // Vector Compare Mask Less Than (Floating Point)
856 // FCMLT is alias for FCMGT with operands reversed.
857 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
858 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
859 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
860
861 def fpzero_izero_asmoperand : AsmOperandClass {
862   let Name = "FPZeroIZero";
863   let ParserMethod = "ParseFPImm0AndImm0Operand";
864   let DiagnosticType = "FPZero";
865 }
866
867 def fpzz32 : Operand<f32>,
868              ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
869   let ParserMatchClass = fpzero_izero_asmoperand;
870   let PrintMethod = "printFPZeroOperand";
871   let DecoderMethod = "DecodeFPZeroOperand";
872 }
873
874 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
875                               string asmop, CondCode CC>
876 {
877   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
878             (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm),
879             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
880             [(set (v2i32 VPR64:$Rd),
881                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
882             NoItinerary>;
883
884   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
885             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
886             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
887             [(set (v4i32 VPR128:$Rd),
888                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
889             NoItinerary>;
890
891   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
892             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
893             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
894             [(set (v2i64 VPR128:$Rd),
895                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
896             NoItinerary>;
897 }
898
899 // Vector Compare Mask Equal to Zero (Floating Point)
900 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
901
902 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
903 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
904
905 // Vector Compare Mask Greater Than Zero (Floating Point)
906 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
907
908 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
909 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
910
911 // Vector Compare Mask Less Than Zero (Floating Point)
912 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
913
914 // Vector Absolute Comparisons (Floating Point)
915
916 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
917 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
918                                       int_arm_neon_vacge,
919                                       v2i32, v4i32, v2i64, 0>;
920
921 // Vector Absolute Compare Mask Greater Than (Floating Point)
922 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
923                                       int_arm_neon_vacgt,
924                                       v2i32, v4i32, v2i64, 0>;
925
926 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
927 // FACLE is alias for FACGE with operands reversed.
928 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
929 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
930 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
931
932 // Vector Absolute Compare Mask Less Than (Floating Point)
933 // FACLT is alias for FACGT with operands reversed.
934 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
935 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
936 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
937
938 // Vector halving add (Integer Signed, Unsigned)
939 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
940                                         int_arm_neon_vhadds, 1>;
941 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
942                                         int_arm_neon_vhaddu, 1>;
943
944 // Vector halving sub (Integer Signed, Unsigned)
945 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
946                                         int_arm_neon_vhsubs, 0>;
947 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
948                                         int_arm_neon_vhsubu, 0>;
949
950 // Vector rouding halving add (Integer Signed, Unsigned)
951 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
952                                          int_arm_neon_vrhadds, 1>;
953 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
954                                          int_arm_neon_vrhaddu, 1>;
955
956 // Vector Saturating add (Integer Signed, Unsigned)
957 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
958                    int_arm_neon_vqadds, 1>;
959 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
960                    int_arm_neon_vqaddu, 1>;
961
962 // Vector Saturating sub (Integer Signed, Unsigned)
963 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
964                    int_arm_neon_vqsubs, 1>;
965 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
966                    int_arm_neon_vqsubu, 1>;
967
968 // Vector Shift Left (Signed and Unsigned Integer)
969 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
970                  int_arm_neon_vshifts, 1>;
971 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
972                  int_arm_neon_vshiftu, 1>;
973
974 // Vector Saturating Shift Left (Signed and Unsigned Integer)
975 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
976                   int_arm_neon_vqshifts, 1>;
977 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
978                   int_arm_neon_vqshiftu, 1>;
979
980 // Vector Rouding Shift Left (Signed and Unsigned Integer)
981 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
982                   int_arm_neon_vrshifts, 1>;
983 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
984                   int_arm_neon_vrshiftu, 1>;
985
986 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
987 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
988                    int_arm_neon_vqrshifts, 1>;
989 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
990                    int_arm_neon_vqrshiftu, 1>;
991
992 // Vector Maximum (Signed and Unsigned Integer)
993 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
994 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
995
996 // Vector Minimum (Signed and Unsigned Integer)
997 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
998 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
999
1000 // Vector Maximum (Floating Point)
1001 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
1002                                      int_arm_neon_vmaxs,
1003                                      v2f32, v4f32, v2f64, 1>;
1004
1005 // Vector Minimum (Floating Point)
1006 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
1007                                      int_arm_neon_vmins,
1008                                      v2f32, v4f32, v2f64, 1>;
1009
1010 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
1011 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
1012                                        int_aarch64_neon_vmaxnm,
1013                                        v2f32, v4f32, v2f64, 1>;
1014
1015 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
1016 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
1017                                        int_aarch64_neon_vminnm,
1018                                        v2f32, v4f32, v2f64, 1>;
1019
1020 // Vector Maximum Pairwise (Signed and Unsigned Integer)
1021 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
1022 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
1023
1024 // Vector Minimum Pairwise (Signed and Unsigned Integer)
1025 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
1026 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
1027
1028 // Vector Maximum Pairwise (Floating Point)
1029 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
1030                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
1031
1032 // Vector Minimum Pairwise (Floating Point)
1033 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
1034                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
1035
1036 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
1037 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
1038                                        int_aarch64_neon_vpmaxnm,
1039                                        v2f32, v4f32, v2f64, 1>;
1040
1041 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
1042 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
1043                                        int_aarch64_neon_vpminnm,
1044                                        v2f32, v4f32, v2f64, 1>;
1045
1046 // Vector Addition Pairwise (Integer)
1047 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1048
1049 // Vector Addition Pairwise (Floating Point)
1050 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1051                                        int_arm_neon_vpadd,
1052                                        v2f32, v4f32, v2f64, 1>;
1053
1054 // Vector Saturating Doubling Multiply High
1055 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1056                     int_arm_neon_vqdmulh, 1>;
1057
1058 // Vector Saturating Rouding Doubling Multiply High
1059 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1060                      int_arm_neon_vqrdmulh, 1>;
1061
1062 // Vector Multiply Extended (Floating Point)
1063 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1064                                       int_aarch64_neon_vmulx,
1065                                       v2f32, v4f32, v2f64, 1>;
1066
1067 // Patterns to match llvm.aarch64.* intrinsic for 
1068 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1069 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1070   : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1071         (EXTRACT_SUBREG
1072              (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1073              sub_32)>;
1074
1075 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1076 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1077 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1078 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1079 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1080
1081 // Vector Immediate Instructions
1082
1083 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1084 {
1085   def _asmoperand : AsmOperandClass
1086     {
1087       let Name = "NeonMovImmShift" # PREFIX;
1088       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1089       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1090     }
1091 }
1092
1093 // Definition of vector immediates shift operands
1094
1095 // The selectable use-cases extract the shift operation
1096 // information from the OpCmode fields encoded in the immediate.
1097 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1098   uint64_t OpCmode = N->getZExtValue();
1099   unsigned ShiftImm;
1100   unsigned ShiftOnesIn;
1101   unsigned HasShift =
1102     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1103   if (!HasShift) return SDValue();
1104   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1105 }]>;
1106
1107 // Vector immediates shift operands which accept LSL and MSL
1108 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1109 // or 0, 8 (LSLH) or 8, 16 (MSL).
1110 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1111 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1112 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
1113 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1114
1115 multiclass neon_mov_imm_shift_operands<string PREFIX,
1116                                        string HALF, string ISHALF, code pred>
1117 {
1118    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1119     {
1120       let PrintMethod =
1121         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1122       let DecoderMethod =
1123         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1124       let ParserMatchClass =
1125         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1126     }
1127 }
1128
1129 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1130   unsigned ShiftImm;
1131   unsigned ShiftOnesIn;
1132   unsigned HasShift =
1133     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1134   return (HasShift && !ShiftOnesIn);
1135 }]>;
1136
1137 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1138   unsigned ShiftImm;
1139   unsigned ShiftOnesIn;
1140   unsigned HasShift =
1141     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1142   return (HasShift && ShiftOnesIn);
1143 }]>;
1144
1145 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1146   unsigned ShiftImm;
1147   unsigned ShiftOnesIn;
1148   unsigned HasShift =
1149     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1150   return (HasShift && !ShiftOnesIn);
1151 }]>;
1152
1153 def neon_uimm1_asmoperand : AsmOperandClass
1154 {
1155   let Name = "UImm1";
1156   let PredicateMethod = "isUImm<1>";
1157   let RenderMethod = "addImmOperands";
1158 }
1159
1160 def neon_uimm2_asmoperand : AsmOperandClass
1161 {
1162   let Name = "UImm2";
1163   let PredicateMethod = "isUImm<2>";
1164   let RenderMethod = "addImmOperands";
1165 }
1166
1167 def neon_uimm8_asmoperand : AsmOperandClass
1168 {
1169   let Name = "UImm8";
1170   let PredicateMethod = "isUImm<8>";
1171   let RenderMethod = "addImmOperands";
1172 }
1173
1174 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1175   let ParserMatchClass = neon_uimm8_asmoperand;
1176   let PrintMethod = "printUImmHexOperand";
1177 }
1178
1179 def neon_uimm64_mask_asmoperand : AsmOperandClass
1180 {
1181   let Name = "NeonUImm64Mask";
1182   let PredicateMethod = "isNeonUImm64Mask";
1183   let RenderMethod = "addNeonUImm64MaskOperands";
1184 }
1185
1186 // MCOperand for 64-bit bytemask with each byte having only the
1187 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1188 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1189   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1190   let PrintMethod = "printNeonUImm64MaskOperand";
1191 }
1192
1193 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1194                                    SDPatternOperator opnode>
1195 {
1196     // shift zeros, per word
1197     def _2S  : NeonI_1VModImm<0b0, op,
1198                               (outs VPR64:$Rd),
1199                               (ins neon_uimm8:$Imm,
1200                                 neon_mov_imm_LSL_operand:$Simm),
1201                               !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1202                               [(set (v2i32 VPR64:$Rd),
1203                                  (v2i32 (opnode (timm:$Imm),
1204                                    (neon_mov_imm_LSL_operand:$Simm))))],
1205                               NoItinerary> {
1206        bits<2> Simm;
1207        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1208      }
1209
1210     def _4S  : NeonI_1VModImm<0b1, op,
1211                               (outs VPR128:$Rd),
1212                               (ins neon_uimm8:$Imm,
1213                                 neon_mov_imm_LSL_operand:$Simm),
1214                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1215                               [(set (v4i32 VPR128:$Rd),
1216                                  (v4i32 (opnode (timm:$Imm),
1217                                    (neon_mov_imm_LSL_operand:$Simm))))],
1218                               NoItinerary> {
1219       bits<2> Simm;
1220       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1221     }
1222
1223     // shift zeros, per halfword
1224     def _4H  : NeonI_1VModImm<0b0, op,
1225                               (outs VPR64:$Rd),
1226                               (ins neon_uimm8:$Imm,
1227                                 neon_mov_imm_LSLH_operand:$Simm),
1228                               !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1229                               [(set (v4i16 VPR64:$Rd),
1230                                  (v4i16 (opnode (timm:$Imm),
1231                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1232                               NoItinerary> {
1233       bit  Simm;
1234       let cmode = {0b1, 0b0, Simm, 0b0};
1235     }
1236
1237     def _8H  : NeonI_1VModImm<0b1, op,
1238                               (outs VPR128:$Rd),
1239                               (ins neon_uimm8:$Imm,
1240                                 neon_mov_imm_LSLH_operand:$Simm),
1241                               !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1242                               [(set (v8i16 VPR128:$Rd),
1243                                  (v8i16 (opnode (timm:$Imm),
1244                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1245                               NoItinerary> {
1246       bit Simm;
1247       let cmode = {0b1, 0b0, Simm, 0b0};
1248      }
1249 }
1250
1251 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1252                                                    SDPatternOperator opnode,
1253                                                    SDPatternOperator neonopnode>
1254 {
1255   let Constraints = "$src = $Rd" in {
1256     // shift zeros, per word
1257     def _2S  : NeonI_1VModImm<0b0, op,
1258                  (outs VPR64:$Rd),
1259                  (ins VPR64:$src, neon_uimm8:$Imm,
1260                    neon_mov_imm_LSL_operand:$Simm),
1261                  !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1262                  [(set (v2i32 VPR64:$Rd),
1263                     (v2i32 (opnode (v2i32 VPR64:$src),
1264                       (v2i32 (neonopnode timm:$Imm,
1265                         neon_mov_imm_LSL_operand:$Simm)))))],
1266                  NoItinerary> {
1267       bits<2> Simm;
1268       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1269     }
1270
1271     def _4S  : NeonI_1VModImm<0b1, op,
1272                  (outs VPR128:$Rd),
1273                  (ins VPR128:$src, neon_uimm8:$Imm,
1274                    neon_mov_imm_LSL_operand:$Simm),
1275                  !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1276                  [(set (v4i32 VPR128:$Rd),
1277                     (v4i32 (opnode (v4i32 VPR128:$src),
1278                       (v4i32 (neonopnode timm:$Imm,
1279                         neon_mov_imm_LSL_operand:$Simm)))))],
1280                  NoItinerary> {
1281       bits<2> Simm;
1282       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1283     }
1284
1285     // shift zeros, per halfword
1286     def _4H  : NeonI_1VModImm<0b0, op,
1287                  (outs VPR64:$Rd),
1288                  (ins VPR64:$src, neon_uimm8:$Imm,
1289                    neon_mov_imm_LSLH_operand:$Simm),
1290                  !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1291                  [(set (v4i16 VPR64:$Rd),
1292                     (v4i16 (opnode (v4i16 VPR64:$src),
1293                        (v4i16 (neonopnode timm:$Imm,
1294                           neon_mov_imm_LSL_operand:$Simm)))))],
1295                  NoItinerary> {
1296       bit  Simm;
1297       let cmode = {0b1, 0b0, Simm, 0b1};
1298     }
1299
1300     def _8H  : NeonI_1VModImm<0b1, op,
1301                  (outs VPR128:$Rd),
1302                  (ins VPR128:$src, neon_uimm8:$Imm,
1303                    neon_mov_imm_LSLH_operand:$Simm),
1304                  !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1305                  [(set (v8i16 VPR128:$Rd),
1306                     (v8i16 (opnode (v8i16 VPR128:$src),
1307                       (v8i16 (neonopnode timm:$Imm,
1308                         neon_mov_imm_LSL_operand:$Simm)))))],
1309                  NoItinerary> {
1310       bit Simm;
1311       let cmode = {0b1, 0b0, Simm, 0b1};
1312     }
1313   }
1314 }
1315
1316 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1317                                    SDPatternOperator opnode>
1318 {
1319     // shift ones, per word
1320     def _2S  : NeonI_1VModImm<0b0, op,
1321                              (outs VPR64:$Rd),
1322                              (ins neon_uimm8:$Imm,
1323                                neon_mov_imm_MSL_operand:$Simm),
1324                              !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1325                               [(set (v2i32 VPR64:$Rd),
1326                                  (v2i32 (opnode (timm:$Imm),
1327                                    (neon_mov_imm_MSL_operand:$Simm))))],
1328                              NoItinerary> {
1329        bit Simm;
1330        let cmode = {0b1, 0b1, 0b0, Simm};
1331      }
1332
1333    def _4S  : NeonI_1VModImm<0b1, op,
1334                               (outs VPR128:$Rd),
1335                               (ins neon_uimm8:$Imm,
1336                                 neon_mov_imm_MSL_operand:$Simm),
1337                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1338                               [(set (v4i32 VPR128:$Rd),
1339                                  (v4i32 (opnode (timm:$Imm),
1340                                    (neon_mov_imm_MSL_operand:$Simm))))],
1341                               NoItinerary> {
1342      bit Simm;
1343      let cmode = {0b1, 0b1, 0b0, Simm};
1344    }
1345 }
1346
1347 // Vector Move Immediate Shifted
1348 let isReMaterializable = 1 in {
1349 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1350 }
1351
1352 // Vector Move Inverted Immediate Shifted
1353 let isReMaterializable = 1 in {
1354 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1355 }
1356
1357 // Vector Bitwise Bit Clear (AND NOT) - immediate
1358 let isReMaterializable = 1 in {
1359 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1360                                                          and, Neon_mvni>;
1361 }
1362
1363 // Vector Bitwise OR - immedidate
1364
1365 let isReMaterializable = 1 in {
1366 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1367                                                            or, Neon_movi>;
1368 }
1369
1370 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1371 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1372 // BIC immediate instructions selection requires additional patterns to
1373 // transform Neon_movi operands into BIC immediate operands
1374
1375 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1376   uint64_t OpCmode = N->getZExtValue();
1377   unsigned ShiftImm;
1378   unsigned ShiftOnesIn;
1379   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1380   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1381   // Transform encoded shift amount 0 to 1 and 1 to 0.
1382   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1383 }]>;
1384
1385 def neon_mov_imm_LSLH_transform_operand
1386   : ImmLeaf<i32, [{
1387     unsigned ShiftImm;
1388     unsigned ShiftOnesIn;
1389     unsigned HasShift =
1390       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1391     return (HasShift && !ShiftOnesIn); }],
1392   neon_mov_imm_LSLH_transform_XFORM>;
1393
1394 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8)
1395 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff)
1396 def : Pat<(v4i16 (and VPR64:$src,
1397             (v4i16 (Neon_movi 255,
1398               neon_mov_imm_LSLH_transform_operand:$Simm)))),
1399           (BICvi_lsl_4H VPR64:$src, 255,
1400             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1401
1402 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8)
1403 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff)
1404 def : Pat<(v8i16 (and VPR128:$src,
1405             (v8i16 (Neon_movi 255,
1406               neon_mov_imm_LSLH_transform_operand:$Simm)))),
1407           (BICvi_lsl_8H VPR128:$src, 255,
1408             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1409
1410 def : Pat<(v8i8 (and VPR64:$src,
1411                   (bitconvert(v4i16 (Neon_movi 255,
1412                     neon_mov_imm_LSLH_transform_operand:$Simm))))),
1413           (BICvi_lsl_4H VPR64:$src, 255,
1414             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1415 def : Pat<(v2i32 (and VPR64:$src,
1416                  (bitconvert(v4i16 (Neon_movi 255,
1417                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1418           (BICvi_lsl_4H VPR64:$src, 255,
1419             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1420 def : Pat<(v1i64 (and VPR64:$src,
1421                 (bitconvert(v4i16 (Neon_movi 255,
1422                   neon_mov_imm_LSLH_transform_operand:$Simm))))),
1423         (BICvi_lsl_4H VPR64:$src, 255,
1424           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1425
1426 def : Pat<(v16i8 (and VPR128:$src,
1427                  (bitconvert(v8i16 (Neon_movi 255,
1428                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1429         (BICvi_lsl_8H VPR128:$src, 255,
1430           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1431 def : Pat<(v4i32 (and VPR128:$src,
1432                  (bitconvert(v8i16 (Neon_movi 255,
1433                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1434         (BICvi_lsl_8H VPR128:$src, 255,
1435           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1436 def : Pat<(v2i64 (and VPR128:$src,
1437                  (bitconvert(v8i16 (Neon_movi 255,
1438                    neon_mov_imm_LSLH_transform_operand:$Simm))))),
1439         (BICvi_lsl_8H VPR128:$src, 255,
1440           neon_mov_imm_LSLH_transform_operand:$Simm)>;
1441
1442 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1443                                    SDPatternOperator neonopnode,
1444                                    Instruction INST4H,
1445                                    Instruction INST8H,
1446                                    Instruction INST2S,
1447                                    Instruction INST4S> {
1448   def : Pat<(v8i8 (opnode VPR64:$src,
1449                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1450                       neon_mov_imm_LSLH_operand:$Simm))))),
1451             (INST4H VPR64:$src, neon_uimm8:$Imm,
1452               neon_mov_imm_LSLH_operand:$Simm)>;
1453   def : Pat<(v2i32 (opnode VPR64:$src,
1454                    (bitconvert(v4i16 (neonopnode timm:$Imm,
1455                      neon_mov_imm_LSLH_operand:$Simm))))),
1456             (INST4H VPR64:$src, neon_uimm8:$Imm,
1457               neon_mov_imm_LSLH_operand:$Simm)>;
1458   def : Pat<(v1i64 (opnode VPR64:$src,
1459                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1460                     neon_mov_imm_LSLH_operand:$Simm))))),
1461           (INST4H VPR64:$src, neon_uimm8:$Imm,
1462             neon_mov_imm_LSLH_operand:$Simm)>;
1463
1464   def : Pat<(v16i8 (opnode VPR128:$src,
1465                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1466                      neon_mov_imm_LSLH_operand:$Simm))))),
1467           (INST8H VPR128:$src, neon_uimm8:$Imm,
1468             neon_mov_imm_LSLH_operand:$Simm)>;
1469   def : Pat<(v4i32 (opnode VPR128:$src,
1470                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1471                      neon_mov_imm_LSLH_operand:$Simm))))),
1472           (INST8H VPR128:$src, neon_uimm8:$Imm,
1473             neon_mov_imm_LSLH_operand:$Simm)>;
1474   def : Pat<(v2i64 (opnode VPR128:$src,
1475                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1476                      neon_mov_imm_LSLH_operand:$Simm))))),
1477           (INST8H VPR128:$src, neon_uimm8:$Imm,
1478             neon_mov_imm_LSLH_operand:$Simm)>;
1479
1480   def : Pat<(v8i8 (opnode VPR64:$src,
1481                     (bitconvert(v2i32 (neonopnode timm:$Imm,
1482                       neon_mov_imm_LSLH_operand:$Simm))))),
1483             (INST2S VPR64:$src, neon_uimm8:$Imm,
1484               neon_mov_imm_LSLH_operand:$Simm)>;
1485   def : Pat<(v4i16 (opnode VPR64:$src,
1486                    (bitconvert(v2i32 (neonopnode timm:$Imm,
1487                      neon_mov_imm_LSLH_operand:$Simm))))),
1488             (INST2S VPR64:$src, neon_uimm8:$Imm,
1489               neon_mov_imm_LSLH_operand:$Simm)>;
1490   def : Pat<(v1i64 (opnode VPR64:$src,
1491                   (bitconvert(v2i32 (neonopnode timm:$Imm,
1492                     neon_mov_imm_LSLH_operand:$Simm))))),
1493           (INST2S VPR64:$src, neon_uimm8:$Imm,
1494             neon_mov_imm_LSLH_operand:$Simm)>;
1495
1496   def : Pat<(v16i8 (opnode VPR128:$src,
1497                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1498                      neon_mov_imm_LSLH_operand:$Simm))))),
1499           (INST4S VPR128:$src, neon_uimm8:$Imm,
1500             neon_mov_imm_LSLH_operand:$Simm)>;
1501   def : Pat<(v8i16 (opnode VPR128:$src,
1502                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1503                      neon_mov_imm_LSLH_operand:$Simm))))),
1504           (INST4S VPR128:$src, neon_uimm8:$Imm,
1505             neon_mov_imm_LSLH_operand:$Simm)>;
1506   def : Pat<(v2i64 (opnode VPR128:$src,
1507                    (bitconvert(v4i32 (neonopnode timm:$Imm,
1508                      neon_mov_imm_LSLH_operand:$Simm))))),
1509           (INST4S VPR128:$src, neon_uimm8:$Imm,
1510             neon_mov_imm_LSLH_operand:$Simm)>;
1511 }
1512
1513 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1514 defm : Neon_bitwiseVi_patterns<and, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H,
1515                                BICvi_lsl_2S, BICvi_lsl_4S>;
1516
1517 // Additional patterns for Vector Bitwise OR - immedidate
1518 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H,
1519                                ORRvi_lsl_2S, ORRvi_lsl_4S>;
1520
1521
1522 // Vector Move Immediate Masked
1523 let isReMaterializable = 1 in {
1524 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1525 }
1526
1527 // Vector Move Inverted Immediate Masked
1528 let isReMaterializable = 1 in {
1529 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1530 }
1531
1532 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1533                                 Instruction inst, RegisterOperand VPRC>
1534   : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1535                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1536
1537 // Aliases for Vector Move Immediate Shifted
1538 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1539 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1540 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1541 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1542
1543 // Aliases for Vector Move Inverted Immediate Shifted
1544 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1545 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1546 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1547 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1548
1549 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1550 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1551 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1552 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1553 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1554
1555 // Aliases for Vector Bitwise OR - immedidate
1556 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1557 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1558 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1559 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1560
1561 //  Vector Move Immediate - per byte
1562 let isReMaterializable = 1 in {
1563 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1564                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1565                                "movi\t$Rd.8b, $Imm",
1566                                [(set (v8i8 VPR64:$Rd),
1567                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1568                                 NoItinerary> {
1569   let cmode = 0b1110;
1570 }
1571
1572 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1573                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1574                                 "movi\t$Rd.16b, $Imm",
1575                                 [(set (v16i8 VPR128:$Rd),
1576                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1577                                  NoItinerary> {
1578   let cmode = 0b1110;
1579 }
1580 }
1581
1582 // Vector Move Immediate - bytemask, per double word
1583 let isReMaterializable = 1 in {
1584 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1585                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1586                                "movi\t $Rd.2d, $Imm",
1587                                [(set (v2i64 VPR128:$Rd),
1588                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1589                                NoItinerary> {
1590   let cmode = 0b1110;
1591 }
1592 }
1593
1594 // Vector Move Immediate - bytemask, one doubleword
1595
1596 let isReMaterializable = 1 in {
1597 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1598                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1599                            "movi\t $Rd, $Imm",
1600                            [(set (v1i64 FPR64:$Rd),
1601                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1602                            NoItinerary> {
1603   let cmode = 0b1110;
1604 }
1605 }
1606
1607 // Vector Floating Point Move Immediate
1608
1609 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1610                       Operand immOpType, bit q, bit op>
1611   : NeonI_1VModImm<q, op,
1612                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1613                    "fmov\t$Rd" # asmlane # ", $Imm",
1614                    [(set (OpTy VPRC:$Rd),
1615                       (OpTy (Neon_fmovi (timm:$Imm))))],
1616                    NoItinerary> {
1617      let cmode = 0b1111;
1618    }
1619
1620 let isReMaterializable = 1 in {
1621 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1622 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1623 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1624 }
1625
1626 // Vector Shift (Immediate)
1627
1628 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1629 // as follows:
1630 //
1631 //    Offset    Encoding
1632 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1633 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1634 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1635 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1636 //
1637 // The shift right immediate amount, in the range 1 to element bits, is computed
1638 // as Offset - UInt(immh:immb).  The shift left immediate amount, in the range 0
1639 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1640
1641 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1642   let Name = "ShrImm" # OFFSET;
1643   let RenderMethod = "addImmOperands";
1644   let DiagnosticType = "ShrImm" # OFFSET;
1645 }
1646
1647 class shr_imm<string OFFSET> : Operand<i32> {
1648   let EncoderMethod = "getShiftRightImm" # OFFSET;
1649   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1650   let ParserMatchClass =
1651     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1652 }
1653
1654 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1655 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1656 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1657 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1658
1659 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1660 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1661 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1662 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1663
1664 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1665   let Name = "ShlImm" # OFFSET;
1666   let RenderMethod = "addImmOperands";
1667   let DiagnosticType = "ShlImm" # OFFSET;
1668 }
1669
1670 class shl_imm<string OFFSET> : Operand<i32> {
1671   let EncoderMethod = "getShiftLeftImm" # OFFSET;
1672   let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1673   let ParserMatchClass =
1674     !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1675 }
1676
1677 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1678 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1679 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1680 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1681
1682 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1683 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1684 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1685 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1686
1687 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1688                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1689   : NeonI_2VShiftImm<q, u, opcode,
1690                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1691                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1692                      [(set (Ty VPRC:$Rd),
1693                         (Ty (OpNode (Ty VPRC:$Rn),
1694                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1695                      NoItinerary>;
1696
1697 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1698   // 64-bit vector types.
1699   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1700     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1701   }
1702
1703   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1704     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1705   }
1706
1707   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1708     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1709   }
1710
1711   // 128-bit vector types.
1712   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1713     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1714   }
1715
1716   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1717     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1718   }
1719
1720   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1721     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1722   }
1723
1724   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1725     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1726   }
1727 }
1728
1729 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1730   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1731                      OpNode> {
1732     let Inst{22-19} = 0b0001;
1733   }
1734
1735   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1736                      OpNode> {
1737     let Inst{22-20} = 0b001;
1738   }
1739
1740   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1741                      OpNode> {
1742      let Inst{22-21} = 0b01;
1743   }
1744
1745   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1746                       OpNode> {
1747                       let Inst{22-19} = 0b0001;
1748                     }
1749
1750   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1751                      OpNode> {
1752                      let Inst{22-20} = 0b001;
1753                     }
1754
1755   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1756                      OpNode> {
1757                       let Inst{22-21} = 0b01;
1758                     }
1759
1760   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1761                      OpNode> {
1762                       let Inst{22} = 0b1;
1763                     }
1764 }
1765
1766 // Shift left
1767
1768 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1769
1770 // Additional patterns to match vector shift left by immediate.
1771 // (v1i8/v1i16/v1i32 types)
1772 def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn),
1773                      (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))),
1774           (EXTRACT_SUBREG
1775               (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1776                           shl_imm8:$Imm),
1777               sub_8)>;
1778 def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn),
1779                       (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))),
1780           (EXTRACT_SUBREG
1781               (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1782                           shl_imm16:$Imm),
1783               sub_16)>;
1784 def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn),
1785                       (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))),
1786           (EXTRACT_SUBREG
1787               (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1788                           shl_imm32:$Imm),
1789               sub_32)>;
1790
1791 // Shift right
1792 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1793 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1794
1795 // Additional patterns to match vector shift right by immediate.
1796 // (v1i8/v1i16/v1i32 types)
1797 def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn),
1798                      (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
1799           (EXTRACT_SUBREG
1800               (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1801                           shr_imm8:$Imm),
1802               sub_8)>;
1803 def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn),
1804                       (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
1805           (EXTRACT_SUBREG
1806               (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1807                           shr_imm16:$Imm),
1808               sub_16)>;
1809 def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn),
1810                       (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
1811           (EXTRACT_SUBREG
1812               (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1813                           shr_imm32:$Imm),
1814               sub_32)>;
1815 def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn),
1816                      (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
1817           (EXTRACT_SUBREG
1818               (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1819                           shr_imm8:$Imm),
1820               sub_8)>;
1821 def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn),
1822                       (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
1823           (EXTRACT_SUBREG
1824               (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1825                           shr_imm16:$Imm),
1826               sub_16)>;
1827 def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn),
1828                       (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
1829           (EXTRACT_SUBREG
1830               (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1831                           shr_imm32:$Imm),
1832               sub_32)>;
1833
1834 def Neon_High16B : PatFrag<(ops node:$in),
1835                            (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1836 def Neon_High8H  : PatFrag<(ops node:$in),
1837                            (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1838 def Neon_High4S  : PatFrag<(ops node:$in),
1839                            (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1840 def Neon_High2D  : PatFrag<(ops node:$in),
1841                            (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1842 def Neon_High4float : PatFrag<(ops node:$in),
1843                                (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1844 def Neon_High2double : PatFrag<(ops node:$in),
1845                                (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1846
1847 def Neon_Low16B : PatFrag<(ops node:$in),
1848                           (v8i8 (extract_subvector (v16i8 node:$in),
1849                                                    (iPTR 0)))>;
1850 def Neon_Low8H : PatFrag<(ops node:$in),
1851                          (v4i16 (extract_subvector (v8i16 node:$in),
1852                                                    (iPTR 0)))>;
1853 def Neon_Low4S : PatFrag<(ops node:$in),
1854                          (v2i32 (extract_subvector (v4i32 node:$in),
1855                                                    (iPTR 0)))>;
1856 def Neon_Low2D : PatFrag<(ops node:$in),
1857                          (v1i64 (extract_subvector (v2i64 node:$in),
1858                                                    (iPTR 0)))>;
1859 def Neon_Low4float : PatFrag<(ops node:$in),
1860                              (v2f32 (extract_subvector (v4f32 node:$in),
1861                                                        (iPTR 0)))>;
1862 def Neon_Low2double : PatFrag<(ops node:$in),
1863                               (v1f64 (extract_subvector (v2f64 node:$in),
1864                                                         (iPTR 0)))>;
1865
1866 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1867                    string SrcT, ValueType DestTy, ValueType SrcTy,
1868                    Operand ImmTy, SDPatternOperator ExtOp>
1869   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1870                      (ins VPR64:$Rn, ImmTy:$Imm),
1871                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1872                      [(set (DestTy VPR128:$Rd),
1873                         (DestTy (shl
1874                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1875                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1876                      NoItinerary>;
1877
1878 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1879                        string SrcT, ValueType DestTy, ValueType SrcTy,
1880                        int StartIndex, Operand ImmTy,
1881                        SDPatternOperator ExtOp, PatFrag getTop>
1882   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1883                      (ins VPR128:$Rn, ImmTy:$Imm),
1884                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1885                      [(set (DestTy VPR128:$Rd),
1886                         (DestTy (shl
1887                           (DestTy (ExtOp
1888                             (SrcTy (getTop VPR128:$Rn)))),
1889                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1890                      NoItinerary>;
1891
1892 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1893                          SDNode ExtOp> {
1894   // 64-bit vector types.
1895   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1896                          shl_imm8, ExtOp> {
1897     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1898   }
1899
1900   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1901                          shl_imm16, ExtOp> {
1902     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1903   }
1904
1905   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1906                          shl_imm32, ExtOp> {
1907     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1908   }
1909
1910   // 128-bit vector types
1911   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1912                               8, shl_imm8, ExtOp, Neon_High16B> {
1913     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1914   }
1915
1916   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1917                              4, shl_imm16, ExtOp, Neon_High8H> {
1918     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1919   }
1920
1921   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1922                              2, shl_imm32, ExtOp, Neon_High4S> {
1923     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1924   }
1925
1926   // Use other patterns to match when the immediate is 0.
1927   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1928             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1929
1930   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1931             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1932
1933   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1934             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1935
1936   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1937             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1938
1939   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1940             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1941
1942   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1943             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1944 }
1945
1946 // Shift left long
1947 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1948 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1949
1950 class NeonI_ext_len_alias<string asmop, string lane, string laneOp,
1951                        Instruction inst, RegisterOperand VPRC,
1952                        RegisterOperand VPRCOp>
1953   : NeonInstAlias<asmop # "\t$Rd" # lane #", $Rn" # laneOp,
1954                   (inst VPRC:$Rd, VPRCOp:$Rn, 0), 0b0>;
1955
1956 // Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0
1957 // Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0
1958 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1959 // custom printing of aliases.
1960 def SXTLvv_8B  : NeonI_ext_len_alias<"sxtl", ".8h", ".8b",  SSHLLvvi_8B, VPR128, VPR64>;
1961 def SXTLvv_4H  : NeonI_ext_len_alias<"sxtl", ".4s", ".4h",  SSHLLvvi_4H, VPR128, VPR64>;
1962 def SXTLvv_2S  : NeonI_ext_len_alias<"sxtl", ".2d", ".2s",  SSHLLvvi_2S, VPR128, VPR64>;
1963 def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b",  SSHLLvvi_16B, VPR128, VPR128>;
1964 def SXTL2vv_8H  : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h",  SSHLLvvi_8H, VPR128, VPR128>;
1965 def SXTL2vv_4S  : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s",  SSHLLvvi_4S, VPR128, VPR128>;
1966
1967 // Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0
1968 // Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0
1969 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1970 // custom printing of aliases.
1971 def UXTLvv_8B  : NeonI_ext_len_alias<"uxtl", ".8h", ".8b",  USHLLvvi_8B, VPR128, VPR64>;
1972 def UXTLvv_4H  : NeonI_ext_len_alias<"uxtl", ".4s", ".4h",  USHLLvvi_4H, VPR128, VPR64>;
1973 def UXTLvv_2S  : NeonI_ext_len_alias<"uxtl", ".2d", ".2s",  USHLLvvi_2S, VPR128, VPR64>;
1974 def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b",  USHLLvvi_16B, VPR128, VPR128>;
1975 def UXTL2vv_8H  : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h",  USHLLvvi_8H, VPR128, VPR128>;
1976 def UXTL2vv_4S  : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s",  USHLLvvi_4S, VPR128, VPR128>;
1977
1978 def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
1979 def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
1980 def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
1981
1982 // Rounding/Saturating shift
1983 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1984                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1985                   SDPatternOperator OpNode>
1986   : NeonI_2VShiftImm<q, u, opcode,
1987                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1988                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1989                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1990                         (i32 ImmTy:$Imm))))],
1991                      NoItinerary>;
1992
1993 // shift right (vector by immediate)
1994 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1995                            SDPatternOperator OpNode> {
1996   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1997                          OpNode> {
1998     let Inst{22-19} = 0b0001;
1999   }
2000
2001   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2002                          OpNode> {
2003     let Inst{22-20} = 0b001;
2004   }
2005
2006   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2007                          OpNode> {
2008     let Inst{22-21} = 0b01;
2009   }
2010
2011   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2012                          OpNode> {
2013     let Inst{22-19} = 0b0001;
2014   }
2015
2016   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2017                         OpNode> {
2018     let Inst{22-20} = 0b001;
2019   }
2020
2021   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2022                         OpNode> {
2023     let Inst{22-21} = 0b01;
2024   }
2025
2026   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2027                         OpNode> {
2028     let Inst{22} = 0b1;
2029   }
2030 }
2031
2032 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
2033                           SDPatternOperator OpNode> {
2034   // 64-bit vector types.
2035   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2036                         OpNode> {
2037     let Inst{22-19} = 0b0001;
2038   }
2039
2040   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2041                         OpNode> {
2042     let Inst{22-20} = 0b001;
2043   }
2044
2045   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2046                         OpNode> {
2047     let Inst{22-21} = 0b01;
2048   }
2049
2050   // 128-bit vector types.
2051   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2052                          OpNode> {
2053     let Inst{22-19} = 0b0001;
2054   }
2055
2056   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2057                         OpNode> {
2058     let Inst{22-20} = 0b001;
2059   }
2060
2061   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2062                         OpNode> {
2063     let Inst{22-21} = 0b01;
2064   }
2065
2066   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2067                         OpNode> {
2068     let Inst{22} = 0b1;
2069   }
2070 }
2071
2072 // Rounding shift right
2073 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
2074                                 int_aarch64_neon_vsrshr>;
2075 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
2076                                 int_aarch64_neon_vurshr>;
2077
2078 // Saturating shift left unsigned
2079 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
2080
2081 // Saturating shift left
2082 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
2083 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
2084
2085 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
2086                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2087                   SDNode OpNode>
2088   : NeonI_2VShiftImm<q, u, opcode,
2089            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2090            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2091            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2092               (Ty (OpNode (Ty VPRC:$Rn),
2093                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
2094            NoItinerary> {
2095   let Constraints = "$src = $Rd";
2096 }
2097
2098 // Shift Right accumulate
2099 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
2100   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2101                         OpNode> {
2102     let Inst{22-19} = 0b0001;
2103   }
2104
2105   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2106                         OpNode> {
2107     let Inst{22-20} = 0b001;
2108   }
2109
2110   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2111                         OpNode> {
2112     let Inst{22-21} = 0b01;
2113   }
2114
2115   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2116                          OpNode> {
2117     let Inst{22-19} = 0b0001;
2118   }
2119
2120   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2121                         OpNode> {
2122     let Inst{22-20} = 0b001;
2123   }
2124
2125   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2126                         OpNode> {
2127     let Inst{22-21} = 0b01;
2128   }
2129
2130   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2131                         OpNode> {
2132     let Inst{22} = 0b1;
2133   }
2134 }
2135
2136 // Shift right and accumulate
2137 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
2138 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
2139
2140 // Rounding shift accumulate
2141 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
2142                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2143                     SDPatternOperator OpNode>
2144   : NeonI_2VShiftImm<q, u, opcode,
2145                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2146                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2147                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2148                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
2149                      NoItinerary> {
2150   let Constraints = "$src = $Rd";
2151 }
2152
2153 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
2154                              SDPatternOperator OpNode> {
2155   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2156                           OpNode> {
2157     let Inst{22-19} = 0b0001;
2158   }
2159
2160   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2161                           OpNode> {
2162     let Inst{22-20} = 0b001;
2163   }
2164
2165   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2166                           OpNode> {
2167     let Inst{22-21} = 0b01;
2168   }
2169
2170   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2171                            OpNode> {
2172     let Inst{22-19} = 0b0001;
2173   }
2174
2175   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2176                           OpNode> {
2177     let Inst{22-20} = 0b001;
2178   }
2179
2180   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2181                           OpNode> {
2182     let Inst{22-21} = 0b01;
2183   }
2184
2185   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2186                           OpNode> {
2187     let Inst{22} = 0b1;
2188   }
2189 }
2190
2191 // Rounding shift right and accumulate
2192 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
2193 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
2194
2195 // Shift insert by immediate
2196 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
2197                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2198                   SDPatternOperator OpNode>
2199     : NeonI_2VShiftImm<q, u, opcode,
2200            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2201            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2202            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
2203              (i32 ImmTy:$Imm))))],
2204            NoItinerary> {
2205   let Constraints = "$src = $Rd";
2206 }
2207
2208 // shift left insert (vector by immediate)
2209 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2210   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2211                         int_aarch64_neon_vsli> {
2212     let Inst{22-19} = 0b0001;
2213   }
2214
2215   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2216                         int_aarch64_neon_vsli> {
2217     let Inst{22-20} = 0b001;
2218   }
2219
2220   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2221                         int_aarch64_neon_vsli> {
2222     let Inst{22-21} = 0b01;
2223   }
2224
2225     // 128-bit vector types
2226   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2227                          int_aarch64_neon_vsli> {
2228     let Inst{22-19} = 0b0001;
2229   }
2230
2231   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2232                         int_aarch64_neon_vsli> {
2233     let Inst{22-20} = 0b001;
2234   }
2235
2236   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2237                         int_aarch64_neon_vsli> {
2238     let Inst{22-21} = 0b01;
2239   }
2240
2241   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2242                         int_aarch64_neon_vsli> {
2243     let Inst{22} = 0b1;
2244   }
2245 }
2246
2247 // shift right insert (vector by immediate)
2248 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2249     // 64-bit vector types.
2250   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2251                         int_aarch64_neon_vsri> {
2252     let Inst{22-19} = 0b0001;
2253   }
2254
2255   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2256                         int_aarch64_neon_vsri> {
2257     let Inst{22-20} = 0b001;
2258   }
2259
2260   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2261                         int_aarch64_neon_vsri> {
2262     let Inst{22-21} = 0b01;
2263   }
2264
2265     // 128-bit vector types
2266   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2267                          int_aarch64_neon_vsri> {
2268     let Inst{22-19} = 0b0001;
2269   }
2270
2271   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2272                         int_aarch64_neon_vsri> {
2273     let Inst{22-20} = 0b001;
2274   }
2275
2276   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2277                         int_aarch64_neon_vsri> {
2278     let Inst{22-21} = 0b01;
2279   }
2280
2281   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2282                         int_aarch64_neon_vsri> {
2283     let Inst{22} = 0b1;
2284   }
2285 }
2286
2287 // Shift left and insert
2288 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2289
2290 // Shift right and insert
2291 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2292
2293 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2294                     string SrcT, Operand ImmTy>
2295   : NeonI_2VShiftImm<q, u, opcode,
2296                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2297                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2298                      [], NoItinerary>;
2299
2300 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2301                        string SrcT, Operand ImmTy>
2302   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2303                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2304                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2305                      [], NoItinerary> {
2306   let Constraints = "$src = $Rd";
2307 }
2308
2309 // left long shift by immediate
2310 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2311   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2312     let Inst{22-19} = 0b0001;
2313   }
2314
2315   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2316     let Inst{22-20} = 0b001;
2317   }
2318
2319   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2320     let Inst{22-21} = 0b01;
2321   }
2322
2323   // Shift Narrow High
2324   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2325                               shr_imm8> {
2326     let Inst{22-19} = 0b0001;
2327   }
2328
2329   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2330                              shr_imm16> {
2331     let Inst{22-20} = 0b001;
2332   }
2333
2334   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2335                              shr_imm32> {
2336     let Inst{22-21} = 0b01;
2337   }
2338 }
2339
2340 // Shift right narrow
2341 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2342
2343 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2344 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2345 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2346 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2347 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2348 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2349 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2350 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2351
2352 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2353                               (v2i64 (concat_vectors (v1i64 node:$Rm),
2354                                                      (v1i64 node:$Rn)))>;
2355 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2356                               (v8i16 (concat_vectors (v4i16 node:$Rm),
2357                                                      (v4i16 node:$Rn)))>;
2358 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2359                               (v4i32 (concat_vectors (v2i32 node:$Rm),
2360                                                      (v2i32 node:$Rn)))>;
2361 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2362                               (v4f32 (concat_vectors (v2f32 node:$Rm),
2363                                                      (v2f32 node:$Rn)))>;
2364 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2365                               (v2f64 (concat_vectors (v1f64 node:$Rm),
2366                                                      (v1f64 node:$Rn)))>;
2367
2368 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2369                              (v8i16 (srl (v8i16 node:$lhs),
2370                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2371 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2372                              (v4i32 (srl (v4i32 node:$lhs),
2373                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2374 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2375                              (v2i64 (srl (v2i64 node:$lhs),
2376                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2377 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2378                              (v8i16 (sra (v8i16 node:$lhs),
2379                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2380 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2381                              (v4i32 (sra (v4i32 node:$lhs),
2382                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2383 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2384                              (v2i64 (sra (v2i64 node:$lhs),
2385                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2386
2387 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2388 multiclass Neon_shiftNarrow_patterns<string shr> {
2389   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2390               (i32 shr_imm8:$Imm)))),
2391             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2392   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2393               (i32 shr_imm16:$Imm)))),
2394             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2395   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2396               (i32 shr_imm32:$Imm)))),
2397             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2398
2399   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2400               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2401                 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2402             (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2403                          VPR128:$Rn, imm:$Imm)>;
2404   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2405               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2406                 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2407             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2408                         VPR128:$Rn, imm:$Imm)>;
2409   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2410               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2411                 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2412             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2413                         VPR128:$Rn, imm:$Imm)>;
2414 }
2415
2416 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2417   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2418             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2419   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2420             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2421   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2422             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2423
2424   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2425                 (v1i64 (bitconvert (v8i8
2426                     (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2427             (!cast<Instruction>(prefix # "_16B")
2428                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2429                 VPR128:$Rn, imm:$Imm)>;
2430   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2431                 (v1i64 (bitconvert (v4i16
2432                     (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2433             (!cast<Instruction>(prefix # "_8H")
2434                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2435                 VPR128:$Rn, imm:$Imm)>;
2436   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2437                 (v1i64 (bitconvert (v2i32
2438                     (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2439             (!cast<Instruction>(prefix # "_4S")
2440                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2441                   VPR128:$Rn, imm:$Imm)>;
2442 }
2443
2444 defm : Neon_shiftNarrow_patterns<"lshr">;
2445 defm : Neon_shiftNarrow_patterns<"ashr">;
2446
2447 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2448 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2449 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2450 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2451 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2452 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2453 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2454
2455 // Convert fix-point and float-pointing
2456 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2457                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2458                 Operand ImmTy, SDPatternOperator IntOp>
2459   : NeonI_2VShiftImm<q, u, opcode,
2460                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2461                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2462                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2463                        (i32 ImmTy:$Imm))))],
2464                      NoItinerary>;
2465
2466 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2467                               SDPatternOperator IntOp> {
2468   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2469                       shr_imm32, IntOp> {
2470     let Inst{22-21} = 0b01;
2471   }
2472
2473   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2474                       shr_imm32, IntOp> {
2475     let Inst{22-21} = 0b01;
2476   }
2477
2478   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2479                       shr_imm64, IntOp> {
2480     let Inst{22} = 0b1;
2481   }
2482 }
2483
2484 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2485                               SDPatternOperator IntOp> {
2486   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2487                       shr_imm32, IntOp> {
2488     let Inst{22-21} = 0b01;
2489   }
2490
2491   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2492                       shr_imm32, IntOp> {
2493     let Inst{22-21} = 0b01;
2494   }
2495
2496   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2497                       shr_imm64, IntOp> {
2498     let Inst{22} = 0b1;
2499   }
2500 }
2501
2502 // Convert fixed-point to floating-point
2503 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2504                                    int_arm_neon_vcvtfxs2fp>;
2505 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2506                                    int_arm_neon_vcvtfxu2fp>;
2507
2508 // Convert floating-point to fixed-point
2509 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2510                                    int_arm_neon_vcvtfp2fxs>;
2511 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2512                                    int_arm_neon_vcvtfp2fxu>;
2513
2514 multiclass Neon_sshll2_0<SDNode ext>
2515 {
2516   def _v8i8  : PatFrag<(ops node:$Rn),
2517                        (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2518   def _v4i16 : PatFrag<(ops node:$Rn),
2519                        (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2520   def _v2i32 : PatFrag<(ops node:$Rn),
2521                        (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2522 }
2523
2524 defm NI_sext_high : Neon_sshll2_0<sext>;
2525 defm NI_zext_high : Neon_sshll2_0<zext>;
2526
2527
2528 //===----------------------------------------------------------------------===//
2529 // Multiclasses for NeonI_Across
2530 //===----------------------------------------------------------------------===//
2531
2532 // Variant 1
2533
2534 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2535                             string asmop, SDPatternOperator opnode>
2536 {
2537     def _1h8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2538                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2539                 asmop # "\t$Rd, $Rn.8b",
2540                 [(set (v1i16 FPR16:$Rd),
2541                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2542                 NoItinerary>;
2543
2544     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2545                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2546                 asmop # "\t$Rd, $Rn.16b",
2547                 [(set (v1i16 FPR16:$Rd),
2548                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2549                 NoItinerary>;
2550
2551     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2552                 (outs FPR32:$Rd), (ins VPR64:$Rn),
2553                 asmop # "\t$Rd, $Rn.4h",
2554                 [(set (v1i32 FPR32:$Rd),
2555                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2556                 NoItinerary>;
2557
2558     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2559                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2560                 asmop # "\t$Rd, $Rn.8h",
2561                 [(set (v1i32 FPR32:$Rd),
2562                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2563                 NoItinerary>;
2564
2565     // _1d2s doesn't exist!
2566
2567     def _1d4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2568                 (outs FPR64:$Rd), (ins VPR128:$Rn),
2569                 asmop # "\t$Rd, $Rn.4s",
2570                 [(set (v1i64 FPR64:$Rd),
2571                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2572                 NoItinerary>;
2573 }
2574
2575 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2576 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2577
2578 // Variant 2
2579
2580 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2581                             string asmop, SDPatternOperator opnode>
2582 {
2583     def _1b8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2584                 (outs FPR8:$Rd), (ins VPR64:$Rn),
2585                 asmop # "\t$Rd, $Rn.8b",
2586                 [(set (v1i8 FPR8:$Rd),
2587                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2588                 NoItinerary>;
2589
2590     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2591                 (outs FPR8:$Rd), (ins VPR128:$Rn),
2592                 asmop # "\t$Rd, $Rn.16b",
2593                 [(set (v1i8 FPR8:$Rd),
2594                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2595                 NoItinerary>;
2596
2597     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2598                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2599                 asmop # "\t$Rd, $Rn.4h",
2600                 [(set (v1i16 FPR16:$Rd),
2601                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2602                 NoItinerary>;
2603
2604     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2605                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2606                 asmop # "\t$Rd, $Rn.8h",
2607                 [(set (v1i16 FPR16:$Rd),
2608                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2609                 NoItinerary>;
2610
2611     // _1s2s doesn't exist!
2612
2613     def _1s4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2614                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2615                 asmop # "\t$Rd, $Rn.4s",
2616                 [(set (v1i32 FPR32:$Rd),
2617                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2618                 NoItinerary>;
2619 }
2620
2621 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2622 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2623
2624 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2625 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2626
2627 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2628
2629 // Variant 3
2630
2631 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2632                             string asmop, SDPatternOperator opnode> {
2633     def _1s4s:  NeonI_2VAcross<0b1, u, size, opcode,
2634                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2635                 asmop # "\t$Rd, $Rn.4s",
2636                 [(set (f32 FPR32:$Rd),
2637                     (f32 (opnode (v4f32 VPR128:$Rn))))],
2638                 NoItinerary>;
2639 }
2640
2641 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2642                                 int_aarch64_neon_vmaxnmv>;
2643 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2644                                 int_aarch64_neon_vminnmv>;
2645
2646 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2647                               int_aarch64_neon_vmaxv>;
2648 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2649                               int_aarch64_neon_vminv>;
2650
2651 // The followings are for instruction class (Perm)
2652
2653 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2654                     string asmop, RegisterOperand OpVPR, string OpS,
2655                     SDPatternOperator opnode, ValueType Ty>
2656   : NeonI_Perm<q, size, opcode,
2657                (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2658                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2659                [(set (Ty OpVPR:$Rd),
2660                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2661                NoItinerary>;
2662
2663 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2664                           SDPatternOperator opnode> {
2665   def _8b  : NeonI_Permute<0b0, 0b00, opcode, asmop,
2666                            VPR64, "8b", opnode, v8i8>;
2667   def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2668                            VPR128, "16b",opnode, v16i8>;
2669   def _4h  : NeonI_Permute<0b0, 0b01, opcode, asmop,
2670                            VPR64, "4h", opnode, v4i16>;
2671   def _8h  : NeonI_Permute<0b1, 0b01, opcode, asmop,
2672                            VPR128, "8h", opnode, v8i16>;
2673   def _2s  : NeonI_Permute<0b0, 0b10, opcode, asmop,
2674                            VPR64, "2s", opnode, v2i32>;
2675   def _4s  : NeonI_Permute<0b1, 0b10, opcode, asmop,
2676                            VPR128, "4s", opnode, v4i32>;
2677   def _2d  : NeonI_Permute<0b1, 0b11, opcode, asmop,
2678                            VPR128, "2d", opnode, v2i64>;
2679 }
2680
2681 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2682 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2683 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2684 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2685 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2686 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2687
2688 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2689   def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2690             (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2691
2692   def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2693             (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2694
2695   def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2696             (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2697 }
2698
2699 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2700 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2701 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2702 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2703 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2704 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2705
2706 // The followings are for instruction class (3V Diff)
2707
2708 // normal long/long2 pattern
2709 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2710                  string asmop, string ResS, string OpS,
2711                  SDPatternOperator opnode, SDPatternOperator ext,
2712                  RegisterOperand OpVPR,
2713                  ValueType ResTy, ValueType OpTy>
2714   : NeonI_3VDiff<q, u, size, opcode,
2715                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2716                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2717                  [(set (ResTy VPR128:$Rd),
2718                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2719                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2720                  NoItinerary>;
2721
2722 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2723                         string asmop, SDPatternOperator opnode,
2724                         bit Commutable = 0> {
2725   let isCommutable = Commutable in {
2726     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2727                            opnode, sext, VPR64, v8i16, v8i8>;
2728     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2729                            opnode, sext, VPR64, v4i32, v4i16>;
2730     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2731                            opnode, sext, VPR64, v2i64, v2i32>;
2732   }
2733 }
2734
2735 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2736                          SDPatternOperator opnode, bit Commutable = 0> {
2737   let isCommutable = Commutable in {
2738     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2739                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2740     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2741                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2742     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2743                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2744   }
2745 }
2746
2747 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2748                         SDPatternOperator opnode, bit Commutable = 0> {
2749   let isCommutable = Commutable in {
2750     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2751                            opnode, zext, VPR64, v8i16, v8i8>;
2752     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2753                            opnode, zext, VPR64, v4i32, v4i16>;
2754     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2755                            opnode, zext, VPR64, v2i64, v2i32>;
2756   }
2757 }
2758
2759 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2760                          SDPatternOperator opnode, bit Commutable = 0> {
2761   let isCommutable = Commutable in {
2762     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2763                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2764     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2765                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2766     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2767                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2768   }
2769 }
2770
2771 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2772 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2773
2774 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2775 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2776
2777 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2778 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2779
2780 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2781 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2782
2783 // normal wide/wide2 pattern
2784 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2785                  string asmop, string ResS, string OpS,
2786                  SDPatternOperator opnode, SDPatternOperator ext,
2787                  RegisterOperand OpVPR,
2788                  ValueType ResTy, ValueType OpTy>
2789   : NeonI_3VDiff<q, u, size, opcode,
2790                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2791                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2792                  [(set (ResTy VPR128:$Rd),
2793                     (ResTy (opnode (ResTy VPR128:$Rn),
2794                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2795                  NoItinerary>;
2796
2797 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2798                         SDPatternOperator opnode> {
2799   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2800                          opnode, sext, VPR64, v8i16, v8i8>;
2801   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2802                          opnode, sext, VPR64, v4i32, v4i16>;
2803   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2804                          opnode, sext, VPR64, v2i64, v2i32>;
2805 }
2806
2807 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2808 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2809
2810 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2811                          SDPatternOperator opnode> {
2812   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2813                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2814   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2815                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2816   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2817                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2818 }
2819
2820 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2821 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2822
2823 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2824                         SDPatternOperator opnode> {
2825   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2826                          opnode, zext, VPR64, v8i16, v8i8>;
2827   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2828                          opnode, zext, VPR64, v4i32, v4i16>;
2829   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2830                          opnode, zext, VPR64, v2i64, v2i32>;
2831 }
2832
2833 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2834 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2835
2836 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2837                          SDPatternOperator opnode> {
2838   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2839                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2840   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2841                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2842   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2843                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2844 }
2845
2846 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2847 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2848
2849 // Get the high half part of the vector element.
2850 multiclass NeonI_get_high {
2851   def _8h : PatFrag<(ops node:$Rn),
2852                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2853                                              (v8i16 (Neon_vdup (i32 8)))))))>;
2854   def _4s : PatFrag<(ops node:$Rn),
2855                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2856                                               (v4i32 (Neon_vdup (i32 16)))))))>;
2857   def _2d : PatFrag<(ops node:$Rn),
2858                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2859                                               (v2i64 (Neon_vdup (i32 32)))))))>;
2860 }
2861
2862 defm NI_get_hi : NeonI_get_high;
2863
2864 // pattern for addhn/subhn with 2 operands
2865 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2866                            string asmop, string ResS, string OpS,
2867                            SDPatternOperator opnode, SDPatternOperator get_hi,
2868                            ValueType ResTy, ValueType OpTy>
2869   : NeonI_3VDiff<q, u, size, opcode,
2870                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2871                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2872                  [(set (ResTy VPR64:$Rd),
2873                     (ResTy (get_hi
2874                       (OpTy (opnode (OpTy VPR128:$Rn),
2875                                     (OpTy VPR128:$Rm))))))],
2876                  NoItinerary>;
2877
2878 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2879                                 SDPatternOperator opnode, bit Commutable = 0> {
2880   let isCommutable = Commutable in {
2881     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2882                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2883     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2884                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2885     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2886                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2887   }
2888 }
2889
2890 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2891 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2892
2893 // pattern for operation with 2 operands
2894 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2895                     string asmop, string ResS, string OpS,
2896                     SDPatternOperator opnode,
2897                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2898                     ValueType ResTy, ValueType OpTy>
2899   : NeonI_3VDiff<q, u, size, opcode,
2900                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2901                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2902                  [(set (ResTy ResVPR:$Rd),
2903                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2904                  NoItinerary>;
2905
2906 // normal narrow pattern
2907 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2908                           SDPatternOperator opnode, bit Commutable = 0> {
2909   let isCommutable = Commutable in {
2910     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2911                               opnode, VPR64, VPR128, v8i8, v8i16>;
2912     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2913                               opnode, VPR64, VPR128, v4i16, v4i32>;
2914     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2915                               opnode, VPR64, VPR128, v2i32, v2i64>;
2916   }
2917 }
2918
2919 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2920 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2921
2922 // pattern for acle intrinsic with 3 operands
2923 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2924                      string asmop, string ResS, string OpS>
2925   : NeonI_3VDiff<q, u, size, opcode,
2926                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2927                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2928                  [], NoItinerary> {
2929   let Constraints = "$src = $Rd";
2930   let neverHasSideEffects = 1;
2931 }
2932
2933 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2934   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2935   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2936   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2937 }
2938
2939 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2940 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2941
2942 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2943 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2944
2945 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2946 // part.
2947 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2948                         SDPatternOperator coreop>
2949   : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2950                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2951                                                         (SrcTy VPR128:$Rm)))))),
2952         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2953               VPR128:$Rn, VPR128:$Rm)>;
2954
2955 // addhn2 patterns
2956 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2957           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2958 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2959           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2960 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2961           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2962
2963 // subhn2 patterns
2964 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2965           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2966 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2967           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2968 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2969           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2970
2971 // raddhn2 patterns
2972 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2973 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2974 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2975
2976 // rsubhn2 patterns
2977 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2978 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2979 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2980
2981 // pattern that need to extend result
2982 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2983                      string asmop, string ResS, string OpS,
2984                      SDPatternOperator opnode,
2985                      RegisterOperand OpVPR,
2986                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2987   : NeonI_3VDiff<q, u, size, opcode,
2988                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2989                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2990                  [(set (ResTy VPR128:$Rd),
2991                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2992                                                 (OpTy OpVPR:$Rm))))))],
2993                  NoItinerary>;
2994
2995 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2996                            SDPatternOperator opnode, bit Commutable = 0> {
2997   let isCommutable = Commutable in {
2998     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2999                                opnode, VPR64, v8i16, v8i8, v8i8>;
3000     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3001                                opnode, VPR64, v4i32, v4i16, v4i16>;
3002     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3003                                opnode, VPR64, v2i64, v2i32, v2i32>;
3004   }
3005 }
3006
3007 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
3008 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
3009
3010 multiclass NeonI_Op_High<SDPatternOperator op> {
3011   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
3012                      (op (v8i8 (Neon_High16B node:$Rn)),
3013                          (v8i8 (Neon_High16B node:$Rm)))>;
3014   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
3015                      (op (v4i16 (Neon_High8H node:$Rn)),
3016                          (v4i16 (Neon_High8H node:$Rm)))>;
3017   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
3018                      (op (v2i32 (Neon_High4S node:$Rn)),
3019                          (v2i32 (Neon_High4S node:$Rm)))>;
3020 }
3021
3022 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
3023 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
3024 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
3025 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
3026 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
3027 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
3028
3029 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
3030                             bit Commutable = 0> {
3031   let isCommutable = Commutable in {
3032     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3033                                 !cast<PatFrag>(opnode # "_16B"),
3034                                 VPR128, v8i16, v16i8, v8i8>;
3035     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3036                                 !cast<PatFrag>(opnode # "_8H"),
3037                                 VPR128, v4i32, v8i16, v4i16>;
3038     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3039                                 !cast<PatFrag>(opnode # "_4S"),
3040                                 VPR128, v2i64, v4i32, v2i32>;
3041   }
3042 }
3043
3044 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
3045 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
3046
3047 // For pattern that need two operators being chained.
3048 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
3049                      string asmop, string ResS, string OpS,
3050                      SDPatternOperator opnode, SDPatternOperator subop,
3051                      RegisterOperand OpVPR,
3052                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
3053   : NeonI_3VDiff<q, u, size, opcode,
3054                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3055                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3056                  [(set (ResTy VPR128:$Rd),
3057                     (ResTy (opnode
3058                       (ResTy VPR128:$src),
3059                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
3060                                                  (OpTy OpVPR:$Rm))))))))],
3061                  NoItinerary> {
3062   let Constraints = "$src = $Rd";
3063 }
3064
3065 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
3066                              SDPatternOperator opnode, SDPatternOperator subop>{
3067   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3068                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
3069   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3070                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
3071   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3072                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
3073 }
3074
3075 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
3076                                    add, int_arm_neon_vabds>;
3077 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
3078                                    add, int_arm_neon_vabdu>;
3079
3080 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
3081                               SDPatternOperator opnode, string subop> {
3082   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3083                              opnode, !cast<PatFrag>(subop # "_16B"),
3084                              VPR128, v8i16, v16i8, v8i8>;
3085   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3086                              opnode, !cast<PatFrag>(subop # "_8H"),
3087                              VPR128, v4i32, v8i16, v4i16>;
3088   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3089                              opnode, !cast<PatFrag>(subop # "_4S"),
3090                              VPR128, v2i64, v4i32, v2i32>;
3091 }
3092
3093 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
3094                                      "NI_sabdl_hi">;
3095 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
3096                                      "NI_uabdl_hi">;
3097
3098 // Long pattern with 2 operands
3099 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
3100                           SDPatternOperator opnode, bit Commutable = 0> {
3101   let isCommutable = Commutable in {
3102     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3103                               opnode, VPR128, VPR64, v8i16, v8i8>;
3104     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3105                               opnode, VPR128, VPR64, v4i32, v4i16>;
3106     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3107                               opnode, VPR128, VPR64, v2i64, v2i32>;
3108   }
3109 }
3110
3111 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
3112 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
3113
3114 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
3115                            string asmop, string ResS, string OpS,
3116                            SDPatternOperator opnode,
3117                            ValueType ResTy, ValueType OpTy>
3118   : NeonI_3VDiff<q, u, size, opcode,
3119                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3120                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3121                  [(set (ResTy VPR128:$Rd),
3122                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
3123                  NoItinerary>;
3124
3125 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
3126                                    string opnode, bit Commutable = 0> {
3127   let isCommutable = Commutable in {
3128     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3129                                       !cast<PatFrag>(opnode # "_16B"),
3130                                       v8i16, v16i8>;
3131     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3132                                      !cast<PatFrag>(opnode # "_8H"),
3133                                      v4i32, v8i16>;
3134     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3135                                      !cast<PatFrag>(opnode # "_4S"),
3136                                      v2i64, v4i32>;
3137   }
3138 }
3139
3140 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
3141                                          "NI_smull_hi", 1>;
3142 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
3143                                          "NI_umull_hi", 1>;
3144
3145 // Long pattern with 3 operands
3146 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
3147                      string asmop, string ResS, string OpS,
3148                      SDPatternOperator opnode,
3149                      ValueType ResTy, ValueType OpTy>
3150   : NeonI_3VDiff<q, u, size, opcode,
3151                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
3152                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3153                  [(set (ResTy VPR128:$Rd),
3154                     (ResTy (opnode
3155                       (ResTy VPR128:$src),
3156                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
3157                NoItinerary> {
3158   let Constraints = "$src = $Rd";
3159 }
3160
3161 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
3162                              SDPatternOperator opnode> {
3163   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3164                              opnode, v8i16, v8i8>;
3165   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3166                              opnode, v4i32, v4i16>;
3167   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3168                              opnode, v2i64, v2i32>;
3169 }
3170
3171 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3172                          (add node:$Rd,
3173                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3174
3175 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3176                          (add node:$Rd,
3177                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3178
3179 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3180                          (sub node:$Rd,
3181                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3182
3183 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3184                          (sub node:$Rd,
3185                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3186
3187 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
3188 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
3189
3190 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
3191 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
3192
3193 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
3194                            string asmop, string ResS, string OpS,
3195                            SDPatternOperator subop, SDPatternOperator opnode,
3196                            RegisterOperand OpVPR,
3197                            ValueType ResTy, ValueType OpTy>
3198   : NeonI_3VDiff<q, u, size, opcode,
3199                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3200                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3201                [(set (ResTy VPR128:$Rd),
3202                   (ResTy (subop
3203                     (ResTy VPR128:$src),
3204                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3205                NoItinerary> {
3206   let Constraints = "$src = $Rd";
3207 }
3208
3209 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3210                                    SDPatternOperator subop, string opnode> {
3211   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3212                                     subop, !cast<PatFrag>(opnode # "_16B"),
3213                                     VPR128, v8i16, v16i8>;
3214   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3215                                    subop, !cast<PatFrag>(opnode # "_8H"),
3216                                    VPR128, v4i32, v8i16>;
3217   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3218                                    subop, !cast<PatFrag>(opnode # "_4S"),
3219                                    VPR128, v2i64, v4i32>;
3220 }
3221
3222 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3223                                           add, "NI_smull_hi">;
3224 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3225                                           add, "NI_umull_hi">;
3226
3227 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3228                                           sub, "NI_smull_hi">;
3229 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3230                                           sub, "NI_umull_hi">;
3231
3232 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3233                                     SDPatternOperator opnode> {
3234   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3235                                    opnode, int_arm_neon_vqdmull,
3236                                    VPR64, v4i32, v4i16>;
3237   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3238                                    opnode, int_arm_neon_vqdmull,
3239                                    VPR64, v2i64, v2i32>;
3240 }
3241
3242 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3243                                            int_arm_neon_vqadds>;
3244 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3245                                            int_arm_neon_vqsubs>;
3246
3247 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3248                          SDPatternOperator opnode, bit Commutable = 0> {
3249   let isCommutable = Commutable in {
3250     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3251                               opnode, VPR128, VPR64, v4i32, v4i16>;
3252     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3253                               opnode, VPR128, VPR64, v2i64, v2i32>;
3254   }
3255 }
3256
3257 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3258                                 int_arm_neon_vqdmull, 1>;
3259
3260 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3261                                    string opnode, bit Commutable = 0> {
3262   let isCommutable = Commutable in {
3263     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3264                                      !cast<PatFrag>(opnode # "_8H"),
3265                                      v4i32, v8i16>;
3266     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3267                                      !cast<PatFrag>(opnode # "_4S"),
3268                                      v2i64, v4i32>;
3269   }
3270 }
3271
3272 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3273                                            "NI_qdmull_hi", 1>;
3274
3275 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3276                                      SDPatternOperator opnode> {
3277   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3278                                    opnode, NI_qdmull_hi_8H,
3279                                    VPR128, v4i32, v8i16>;
3280   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3281                                    opnode, NI_qdmull_hi_4S,
3282                                    VPR128, v2i64, v4i32>;
3283 }
3284
3285 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3286                                              int_arm_neon_vqadds>;
3287 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3288                                              int_arm_neon_vqsubs>;
3289
3290 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3291                          SDPatternOperator opnode_8h8b,
3292                          SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3293   let isCommutable = Commutable in {
3294     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3295                               opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3296
3297     def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3298                               opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3299   }
3300 }
3301
3302 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3303                               int_aarch64_neon_vmull_p64, 1>;
3304
3305 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3306                                    string opnode, bit Commutable = 0> {
3307   let isCommutable = Commutable in {
3308     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3309                                       !cast<PatFrag>(opnode # "_16B"),
3310                                       v8i16, v16i8>;
3311
3312     def _1q2d : 
3313       NeonI_3VDiff<0b1, u, 0b11, opcode,
3314                    (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3315                    asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3316                    [(set (v16i8 VPR128:$Rd),
3317                       (v16i8 (int_aarch64_neon_vmull_p64 
3318                         (v1i64 (scalar_to_vector
3319                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3320                         (v1i64 (scalar_to_vector
3321                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3322                    NoItinerary>;
3323   }
3324 }
3325
3326 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3327                                          1>;
3328
3329 // End of implementation for instruction class (3V Diff)
3330
3331 // The followings are vector load/store multiple N-element structure
3332 // (class SIMD lselem).
3333
3334 // ld1:         load multiple 1-element structure to 1/2/3/4 registers.
3335 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3336 //              The structure consists of a sequence of sets of N values.
3337 //              The first element of the structure is placed in the first lane
3338 //              of the first first vector, the second element in the first lane
3339 //              of the second vector, and so on.
3340 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3341 // the three 64-bit vectors list {BA, DC, FE}.
3342 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3343 // 64-bit vectors list {DA, EB, FC}.
3344 // Store instructions store multiple structure to N registers like load.
3345
3346
3347 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3348                     RegisterOperand VecList, string asmop>
3349   : NeonI_LdStMult<q, 1, opcode, size,
3350                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3351                  asmop # "\t$Rt, [$Rn]",
3352                  [],
3353                  NoItinerary> {
3354   let mayLoad = 1;
3355   let neverHasSideEffects = 1;
3356 }
3357
3358 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3359   def _8B : NeonI_LDVList<0, opcode, 0b00,
3360                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3361
3362   def _4H : NeonI_LDVList<0, opcode, 0b01,
3363                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3364
3365   def _2S : NeonI_LDVList<0, opcode, 0b10,
3366                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3367
3368   def _16B : NeonI_LDVList<1, opcode, 0b00,
3369                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3370
3371   def _8H : NeonI_LDVList<1, opcode, 0b01,
3372                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3373
3374   def _4S : NeonI_LDVList<1, opcode, 0b10,
3375                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3376
3377   def _2D : NeonI_LDVList<1, opcode, 0b11,
3378                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3379 }
3380
3381 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3382 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3383 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3384
3385 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3386
3387 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3388
3389 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3390
3391 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3392 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3393 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3394
3395 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3396 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3397
3398 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3399 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3400
3401 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3402                     RegisterOperand VecList, string asmop>
3403   : NeonI_LdStMult<q, 0, opcode, size,
3404                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3405                  asmop # "\t$Rt, [$Rn]",
3406                  [],
3407                  NoItinerary> {
3408   let mayStore = 1;
3409   let neverHasSideEffects = 1;
3410 }
3411
3412 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3413   def _8B : NeonI_STVList<0, opcode, 0b00,
3414                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3415
3416   def _4H : NeonI_STVList<0, opcode, 0b01,
3417                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3418
3419   def _2S : NeonI_STVList<0, opcode, 0b10,
3420                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3421
3422   def _16B : NeonI_STVList<1, opcode, 0b00,
3423                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3424
3425   def _8H : NeonI_STVList<1, opcode, 0b01,
3426                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3427
3428   def _4S : NeonI_STVList<1, opcode, 0b10,
3429                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3430
3431   def _2D : NeonI_STVList<1, opcode, 0b11,
3432                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3433 }
3434
3435 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3436 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3437 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3438
3439 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3440
3441 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3442
3443 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3444
3445 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3446 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3447 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3448
3449 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3450 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3451
3452 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3453 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3454
3455 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3456 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3457
3458 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3459 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3460
3461 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3462 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3463
3464 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3465 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3466
3467 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3468 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3469
3470 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3471 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3472
3473 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3474           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3475 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3476           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3477
3478 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3479           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3480 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3481           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3482
3483 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3484           (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3485 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3486           (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3487
3488 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3489           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3490 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3491           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3492
3493 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3494           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3495 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3496           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3497
3498 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3499           (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3500 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3501           (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3502
3503 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3504 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3505 // these patterns are not needed any more.
3506 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3507 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3508 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3509
3510 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3511           (LSFP8_STR $value, $addr, 0)>;
3512 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3513           (LSFP16_STR $value, $addr, 0)>;
3514 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3515           (LSFP32_STR $value, $addr, 0)>;
3516
3517
3518 // End of vector load/store multiple N-element structure(class SIMD lselem)
3519
3520 // The followings are post-index vector load/store multiple N-element
3521 // structure(class SIMD lselem-post)
3522 def exact1_asmoperand : AsmOperandClass {
3523   let Name = "Exact1";
3524   let PredicateMethod = "isExactImm<1>";
3525   let RenderMethod = "addImmOperands";
3526 }
3527 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3528   let ParserMatchClass = exact1_asmoperand;
3529 }
3530
3531 def exact2_asmoperand : AsmOperandClass {
3532   let Name = "Exact2";
3533   let PredicateMethod = "isExactImm<2>";
3534   let RenderMethod = "addImmOperands";
3535 }
3536 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3537   let ParserMatchClass = exact2_asmoperand;
3538 }
3539
3540 def exact3_asmoperand : AsmOperandClass {
3541   let Name = "Exact3";
3542   let PredicateMethod = "isExactImm<3>";
3543   let RenderMethod = "addImmOperands";
3544 }
3545 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3546   let ParserMatchClass = exact3_asmoperand;
3547 }
3548
3549 def exact4_asmoperand : AsmOperandClass {
3550   let Name = "Exact4";
3551   let PredicateMethod = "isExactImm<4>";
3552   let RenderMethod = "addImmOperands";
3553 }
3554 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3555   let ParserMatchClass = exact4_asmoperand;
3556 }
3557
3558 def exact6_asmoperand : AsmOperandClass {
3559   let Name = "Exact6";
3560   let PredicateMethod = "isExactImm<6>";
3561   let RenderMethod = "addImmOperands";
3562 }
3563 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3564   let ParserMatchClass = exact6_asmoperand;
3565 }
3566
3567 def exact8_asmoperand : AsmOperandClass {
3568   let Name = "Exact8";
3569   let PredicateMethod = "isExactImm<8>";
3570   let RenderMethod = "addImmOperands";
3571 }
3572 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3573   let ParserMatchClass = exact8_asmoperand;
3574 }
3575
3576 def exact12_asmoperand : AsmOperandClass {
3577   let Name = "Exact12";
3578   let PredicateMethod = "isExactImm<12>";
3579   let RenderMethod = "addImmOperands";
3580 }
3581 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3582   let ParserMatchClass = exact12_asmoperand;
3583 }
3584
3585 def exact16_asmoperand : AsmOperandClass {
3586   let Name = "Exact16";
3587   let PredicateMethod = "isExactImm<16>";
3588   let RenderMethod = "addImmOperands";
3589 }
3590 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3591   let ParserMatchClass = exact16_asmoperand;
3592 }
3593
3594 def exact24_asmoperand : AsmOperandClass {
3595   let Name = "Exact24";
3596   let PredicateMethod = "isExactImm<24>";
3597   let RenderMethod = "addImmOperands";
3598 }
3599 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3600   let ParserMatchClass = exact24_asmoperand;
3601 }
3602
3603 def exact32_asmoperand : AsmOperandClass {
3604   let Name = "Exact32";
3605   let PredicateMethod = "isExactImm<32>";
3606   let RenderMethod = "addImmOperands";
3607 }
3608 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3609   let ParserMatchClass = exact32_asmoperand;
3610 }
3611
3612 def exact48_asmoperand : AsmOperandClass {
3613   let Name = "Exact48";
3614   let PredicateMethod = "isExactImm<48>";
3615   let RenderMethod = "addImmOperands";
3616 }
3617 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3618   let ParserMatchClass = exact48_asmoperand;
3619 }
3620
3621 def exact64_asmoperand : AsmOperandClass {
3622   let Name = "Exact64";
3623   let PredicateMethod = "isExactImm<64>";
3624   let RenderMethod = "addImmOperands";
3625 }
3626 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3627   let ParserMatchClass = exact64_asmoperand;
3628 }
3629
3630 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3631                            RegisterOperand VecList, Operand ImmTy,
3632                            string asmop> {
3633   let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3634       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3635     def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3636                      (outs VecList:$Rt, GPR64xsp:$wb),
3637                      (ins GPR64xsp:$Rn, ImmTy:$amt),
3638                      asmop # "\t$Rt, [$Rn], $amt",
3639                      [],
3640                      NoItinerary> {
3641       let Rm = 0b11111;
3642     }
3643
3644     def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3645                         (outs VecList:$Rt, GPR64xsp:$wb),
3646                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3647                         asmop # "\t$Rt, [$Rn], $Rm",
3648                         [],
3649                         NoItinerary>;
3650   }
3651 }
3652
3653 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3654     Operand ImmTy2, string asmop> {
3655   defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3656                               !cast<RegisterOperand>(List # "8B_operand"),
3657                               ImmTy, asmop>;
3658
3659   defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3660                               !cast<RegisterOperand>(List # "4H_operand"),
3661                               ImmTy, asmop>;
3662
3663   defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3664                               !cast<RegisterOperand>(List # "2S_operand"),
3665                               ImmTy, asmop>;
3666
3667   defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3668                                !cast<RegisterOperand>(List # "16B_operand"),
3669                                ImmTy2, asmop>;
3670
3671   defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3672                               !cast<RegisterOperand>(List # "8H_operand"),
3673                               ImmTy2, asmop>;
3674
3675   defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3676                               !cast<RegisterOperand>(List # "4S_operand"),
3677                               ImmTy2, asmop>;
3678
3679   defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3680                               !cast<RegisterOperand>(List # "2D_operand"),
3681                               ImmTy2, asmop>;
3682 }
3683
3684 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3685 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3686 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3687                                  "ld1">;
3688
3689 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3690
3691 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3692                              "ld3">;
3693
3694 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3695
3696 // Post-index load multiple 1-element structures from N consecutive registers
3697 // (N = 2,3,4)
3698 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3699                                "ld1">;
3700 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3701                                    uimm_exact16, "ld1">;
3702
3703 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3704                                "ld1">;
3705 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3706                                    uimm_exact24, "ld1">;
3707
3708 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3709                                 "ld1">;
3710 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3711                                    uimm_exact32, "ld1">;
3712
3713 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3714                             RegisterOperand VecList, Operand ImmTy,
3715                             string asmop> {
3716   let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3717       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3718     def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3719                      (outs GPR64xsp:$wb),
3720                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3721                      asmop # "\t$Rt, [$Rn], $amt",
3722                      [],
3723                      NoItinerary> {
3724       let Rm = 0b11111;
3725     }
3726
3727     def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3728                       (outs GPR64xsp:$wb),
3729                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3730                       asmop # "\t$Rt, [$Rn], $Rm",
3731                       [],
3732                       NoItinerary>;
3733   }
3734 }
3735
3736 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3737                            Operand ImmTy2, string asmop> {
3738   defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3739                  !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3740
3741   defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3742                               !cast<RegisterOperand>(List # "4H_operand"),
3743                               ImmTy, asmop>;
3744
3745   defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3746                               !cast<RegisterOperand>(List # "2S_operand"),
3747                               ImmTy, asmop>;
3748
3749   defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3750                                !cast<RegisterOperand>(List # "16B_operand"),
3751                                ImmTy2, asmop>;
3752
3753   defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3754                               !cast<RegisterOperand>(List # "8H_operand"),
3755                               ImmTy2, asmop>;
3756
3757   defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3758                               !cast<RegisterOperand>(List # "4S_operand"),
3759                               ImmTy2, asmop>;
3760
3761   defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3762                               !cast<RegisterOperand>(List # "2D_operand"),
3763                               ImmTy2, asmop>;
3764 }
3765
3766 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3767 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3768 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3769                                  "st1">;
3770
3771 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3772
3773 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3774                              "st3">;
3775
3776 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3777
3778 // Post-index load multiple 1-element structures from N consecutive registers
3779 // (N = 2,3,4)
3780 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3781                                "st1">;
3782 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3783                                    uimm_exact16, "st1">;
3784
3785 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3786                                "st1">;
3787 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3788                                    uimm_exact24, "st1">;
3789
3790 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3791                                "st1">;
3792 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3793                                    uimm_exact32, "st1">;
3794
3795 // End of post-index vector load/store multiple N-element structure
3796 // (class SIMD lselem-post)
3797
3798 // The followings are vector load/store single N-element structure
3799 // (class SIMD lsone).
3800 def neon_uimm0_bare : Operand<i64>,
3801                         ImmLeaf<i64, [{return Imm == 0;}]> {
3802   let ParserMatchClass = neon_uimm0_asmoperand;
3803   let PrintMethod = "printUImmBareOperand";
3804 }
3805
3806 def neon_uimm1_bare : Operand<i64>,
3807                         ImmLeaf<i64, [{return Imm < 2;}]> {
3808   let ParserMatchClass = neon_uimm1_asmoperand;
3809   let PrintMethod = "printUImmBareOperand";
3810 }
3811
3812 def neon_uimm2_bare : Operand<i64>,
3813                         ImmLeaf<i64, [{return Imm < 4;}]> {
3814   let ParserMatchClass = neon_uimm2_asmoperand;
3815   let PrintMethod = "printUImmBareOperand";
3816 }
3817
3818 def neon_uimm3_bare : Operand<i64>,
3819                         ImmLeaf<i64, [{return Imm < 8;}]> {
3820   let ParserMatchClass = uimm3_asmoperand;
3821   let PrintMethod = "printUImmBareOperand";
3822 }
3823
3824 def neon_uimm4_bare : Operand<i64>,
3825                         ImmLeaf<i64, [{return Imm < 16;}]> {
3826   let ParserMatchClass = uimm4_asmoperand;
3827   let PrintMethod = "printUImmBareOperand";
3828 }
3829
3830 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3831                     RegisterOperand VecList, string asmop>
3832     : NeonI_LdOne_Dup<q, r, opcode, size,
3833                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3834                       asmop # "\t$Rt, [$Rn]",
3835                       [],
3836                       NoItinerary> {
3837   let mayLoad = 1;
3838   let neverHasSideEffects = 1;
3839 }
3840
3841 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3842   def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3843                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3844
3845   def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3846                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3847
3848   def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3849                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3850
3851   def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3852                           !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3853
3854   def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3855                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3856
3857   def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3858                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3859
3860   def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3861                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3862
3863   def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3864                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3865 }
3866
3867 // Load single 1-element structure to all lanes of 1 register
3868 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3869
3870 // Load single N-element structure to all lanes of N consecutive
3871 // registers (N = 2,3,4)
3872 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3873 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3874 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3875
3876
3877 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3878                     Instruction INST>
3879     : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3880           (VTy (INST GPR64xsp:$Rn))>;
3881
3882 // Match all LD1R instructions
3883 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3884
3885 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3886
3887 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3888
3889 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3890
3891 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3892 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3893
3894 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3895 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3896
3897 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3898 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3899
3900 class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3901                        Instruction INST>
3902   : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
3903         (VTy (INST GPR64xsp:$Rn))>;
3904
3905 def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
3906 def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
3907
3908 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3909                                 RegisterClass RegList> {
3910   defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3911   defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3912   defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3913   defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3914 }
3915
3916 // Special vector list operand of 128-bit vectors with bare layout.
3917 // i.e. only show ".b", ".h", ".s", ".d"
3918 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3919 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3920 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3921 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3922
3923 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3924                      Operand ImmOp, string asmop>
3925     : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3926                          (outs VList:$Rt),
3927                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3928                          asmop # "\t$Rt[$lane], [$Rn]",
3929                          [],
3930                          NoItinerary> {
3931   let mayLoad = 1;
3932   let neverHasSideEffects = 1;
3933   let hasExtraDefRegAllocReq = 1;
3934   let Constraints = "$src = $Rt";
3935 }
3936
3937 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3938   def _B : NeonI_LDN_Lane<r, 0b00, op0,
3939                           !cast<RegisterOperand>(List # "B_operand"),
3940                           neon_uimm4_bare, asmop> {
3941     let Inst{12-10} = lane{2-0};
3942     let Inst{30} = lane{3};
3943   }
3944
3945   def _H : NeonI_LDN_Lane<r, 0b01, op0,
3946                           !cast<RegisterOperand>(List # "H_operand"),
3947                           neon_uimm3_bare, asmop> {
3948     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3949     let Inst{30} = lane{2};
3950   }
3951
3952   def _S : NeonI_LDN_Lane<r, 0b10, op0,
3953                           !cast<RegisterOperand>(List # "S_operand"),
3954                           neon_uimm2_bare, asmop> {
3955     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3956     let Inst{30} = lane{1};
3957   }
3958
3959   def _D : NeonI_LDN_Lane<r, 0b10, op0,
3960                           !cast<RegisterOperand>(List # "D_operand"),
3961                           neon_uimm1_bare, asmop> {
3962     let Inst{12-10} = 0b001;
3963     let Inst{30} = lane{0};
3964   }
3965 }
3966
3967 // Load single 1-element structure to one lane of 1 register.
3968 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3969
3970 // Load single N-element structure to one lane of N consecutive registers
3971 // (N = 2,3,4)
3972 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3973 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3974 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3975
3976 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3977                           Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3978                           Instruction INST> {
3979   def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3980                      (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3981             (VTy (EXTRACT_SUBREG
3982                      (INST GPR64xsp:$Rn,
3983                            (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3984                            ImmOp:$lane),
3985                      sub_64))>;
3986
3987   def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3988                       (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3989             (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3990 }
3991
3992 // Match all LD1LN instructions
3993 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3994                       extloadi8, LD1LN_B>;
3995
3996 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3997                       extloadi16, LD1LN_H>;
3998
3999 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
4000                       load, LD1LN_S>;
4001 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
4002                       load, LD1LN_S>;
4003
4004 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
4005                       load, LD1LN_D>;
4006 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
4007                       load, LD1LN_D>;
4008
4009 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4010                      Operand ImmOp, string asmop>
4011     : NeonI_LdStOne_Lane<0, r, op2_1, op0,
4012                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
4013                          asmop # "\t$Rt[$lane], [$Rn]",
4014                          [],
4015                          NoItinerary> {
4016   let mayStore = 1;
4017   let neverHasSideEffects = 1;
4018   let hasExtraDefRegAllocReq = 1;
4019 }
4020
4021 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
4022   def _B : NeonI_STN_Lane<r, 0b00, op0,
4023                           !cast<RegisterOperand>(List # "B_operand"),
4024                           neon_uimm4_bare, asmop> {
4025     let Inst{12-10} = lane{2-0};
4026     let Inst{30} = lane{3};
4027   }
4028
4029   def _H : NeonI_STN_Lane<r, 0b01, op0,
4030                           !cast<RegisterOperand>(List # "H_operand"),
4031                           neon_uimm3_bare, asmop> {
4032     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4033     let Inst{30} = lane{2};
4034   }
4035
4036   def _S : NeonI_STN_Lane<r, 0b10, op0,
4037                           !cast<RegisterOperand>(List # "S_operand"),
4038                            neon_uimm2_bare, asmop> {
4039     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4040     let Inst{30} = lane{1};
4041   }
4042
4043   def _D : NeonI_STN_Lane<r, 0b10, op0,
4044                           !cast<RegisterOperand>(List # "D_operand"),
4045                           neon_uimm1_bare, asmop>{
4046     let Inst{12-10} = 0b001;
4047     let Inst{30} = lane{0};
4048   }
4049 }
4050
4051 // Store single 1-element structure from one lane of 1 register.
4052 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
4053
4054 // Store single N-element structure from one lane of N consecutive registers
4055 // (N = 2,3,4)
4056 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
4057 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
4058 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
4059
4060 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
4061                           Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
4062                           Instruction INST> {
4063   def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
4064                      GPR64xsp:$Rn),
4065             (INST GPR64xsp:$Rn,
4066                   (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
4067                   ImmOp:$lane)>;
4068
4069   def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
4070                      GPR64xsp:$Rn),
4071             (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
4072 }
4073
4074 // Match all ST1LN instructions
4075 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
4076                       truncstorei8, ST1LN_B>;
4077
4078 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
4079                       truncstorei16, ST1LN_H>;
4080
4081 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
4082                       store, ST1LN_S>;
4083 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
4084                       store, ST1LN_S>;
4085
4086 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
4087                       store, ST1LN_D>;
4088 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
4089                       store, ST1LN_D>;
4090
4091 // End of vector load/store single N-element structure (class SIMD lsone).
4092
4093
4094 // The following are post-index load/store single N-element instructions
4095 // (class SIMD lsone-post)
4096
4097 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
4098                             RegisterOperand VecList, Operand ImmTy,
4099                             string asmop> {
4100   let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
4101   DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4102     def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4103                       (outs VecList:$Rt, GPR64xsp:$wb),
4104                       (ins GPR64xsp:$Rn, ImmTy:$amt),
4105                       asmop # "\t$Rt, [$Rn], $amt",
4106                       [],
4107                       NoItinerary> {
4108                         let Rm = 0b11111;
4109                       }
4110
4111     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4112                       (outs VecList:$Rt, GPR64xsp:$wb),
4113                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
4114                       asmop # "\t$Rt, [$Rn], $Rm",
4115                       [],
4116                       NoItinerary>;
4117   }
4118 }
4119
4120 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
4121                          Operand uimm_b, Operand uimm_h,
4122                          Operand uimm_s, Operand uimm_d> {
4123   defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
4124                               !cast<RegisterOperand>(List # "8B_operand"),
4125                               uimm_b, asmop>;
4126
4127   defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
4128                               !cast<RegisterOperand>(List # "4H_operand"),
4129                               uimm_h, asmop>;
4130
4131   defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
4132                               !cast<RegisterOperand>(List # "2S_operand"),
4133                               uimm_s, asmop>;
4134
4135   defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
4136                               !cast<RegisterOperand>(List # "1D_operand"),
4137                               uimm_d, asmop>;
4138
4139   defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
4140                                !cast<RegisterOperand>(List # "16B_operand"),
4141                                uimm_b, asmop>;
4142
4143   defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
4144                               !cast<RegisterOperand>(List # "8H_operand"),
4145                               uimm_h, asmop>;
4146
4147   defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
4148                               !cast<RegisterOperand>(List # "4S_operand"),
4149                               uimm_s, asmop>;
4150
4151   defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
4152                               !cast<RegisterOperand>(List # "2D_operand"),
4153                               uimm_d, asmop>;
4154 }
4155
4156 // Post-index load single 1-element structure to all lanes of 1 register
4157 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
4158                              uimm_exact2, uimm_exact4, uimm_exact8>;
4159
4160 // Post-index load single N-element structure to all lanes of N consecutive
4161 // registers (N = 2,3,4)
4162 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
4163                              uimm_exact4, uimm_exact8, uimm_exact16>;
4164 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
4165                              uimm_exact6, uimm_exact12, uimm_exact24>;
4166 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
4167                              uimm_exact8, uimm_exact16, uimm_exact32>;
4168
4169 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
4170     Constraints = "$Rn = $wb, $Rt = $src",
4171     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4172   class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4173                                 Operand ImmTy, Operand ImmOp, string asmop>
4174       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4175                                 (outs VList:$Rt, GPR64xsp:$wb),
4176                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4177                                     VList:$src, ImmOp:$lane),
4178                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4179                                 [],
4180                                 NoItinerary> {
4181     let Rm = 0b11111;
4182   }
4183
4184   class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4185                                  Operand ImmTy, Operand ImmOp, string asmop>
4186       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4187                                 (outs VList:$Rt, GPR64xsp:$wb),
4188                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
4189                                     VList:$src, ImmOp:$lane),
4190                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4191                                 [],
4192                                 NoItinerary>;
4193 }
4194
4195 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4196                            Operand uimm_b, Operand uimm_h,
4197                            Operand uimm_s, Operand uimm_d> {
4198   def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
4199                                !cast<RegisterOperand>(List # "B_operand"),
4200                                uimm_b, neon_uimm4_bare, asmop> {
4201     let Inst{12-10} = lane{2-0};
4202     let Inst{30} = lane{3};
4203   }
4204
4205   def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
4206                                    !cast<RegisterOperand>(List # "B_operand"),
4207                                    uimm_b, neon_uimm4_bare, asmop> {
4208     let Inst{12-10} = lane{2-0};
4209     let Inst{30} = lane{3};
4210   }
4211
4212   def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4213                                !cast<RegisterOperand>(List # "H_operand"),
4214                                uimm_h, neon_uimm3_bare, asmop> {
4215     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4216     let Inst{30} = lane{2};
4217   }
4218
4219   def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4220                                    !cast<RegisterOperand>(List # "H_operand"),
4221                                    uimm_h, neon_uimm3_bare, asmop> {
4222     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4223     let Inst{30} = lane{2};
4224   }
4225
4226   def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4227                                !cast<RegisterOperand>(List # "S_operand"),
4228                                uimm_s, neon_uimm2_bare, asmop> {
4229     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4230     let Inst{30} = lane{1};
4231   }
4232
4233   def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4234                                    !cast<RegisterOperand>(List # "S_operand"),
4235                                    uimm_s, neon_uimm2_bare, asmop> {
4236     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4237     let Inst{30} = lane{1};
4238   }
4239
4240   def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4241                                !cast<RegisterOperand>(List # "D_operand"),
4242                                uimm_d, neon_uimm1_bare, asmop> {
4243     let Inst{12-10} = 0b001;
4244     let Inst{30} = lane{0};
4245   }
4246
4247   def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4248                                    !cast<RegisterOperand>(List # "D_operand"),
4249                                    uimm_d, neon_uimm1_bare, asmop> {
4250     let Inst{12-10} = 0b001;
4251     let Inst{30} = lane{0};
4252   }
4253 }
4254
4255 // Post-index load single 1-element structure to one lane of 1 register.
4256 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4257                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4258
4259 // Post-index load single N-element structure to one lane of N consecutive
4260 // registers
4261 // (N = 2,3,4)
4262 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4263                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4264 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4265                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4266 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4267                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4268
4269 let mayStore = 1, neverHasSideEffects = 1,
4270     hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4271     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4272   class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4273                       Operand ImmTy, Operand ImmOp, string asmop>
4274       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4275                                 (outs GPR64xsp:$wb),
4276                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4277                                     VList:$Rt, ImmOp:$lane),
4278                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4279                                 [],
4280                                 NoItinerary> {
4281     let Rm = 0b11111;
4282   }
4283
4284   class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4285                        Operand ImmTy, Operand ImmOp, string asmop>
4286       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4287                                 (outs GPR64xsp:$wb),
4288                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4289                                     ImmOp:$lane),
4290                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4291                                 [],
4292                                 NoItinerary>;
4293 }
4294
4295 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4296                            Operand uimm_b, Operand uimm_h,
4297                            Operand uimm_s, Operand uimm_d> {
4298   def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4299                                !cast<RegisterOperand>(List # "B_operand"),
4300                                uimm_b, neon_uimm4_bare, asmop> {
4301     let Inst{12-10} = lane{2-0};
4302     let Inst{30} = lane{3};
4303   }
4304
4305   def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4306                                    !cast<RegisterOperand>(List # "B_operand"),
4307                                    uimm_b, neon_uimm4_bare, asmop> {
4308     let Inst{12-10} = lane{2-0};
4309     let Inst{30} = lane{3};
4310   }
4311
4312   def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4313                                !cast<RegisterOperand>(List # "H_operand"),
4314                                uimm_h, neon_uimm3_bare, asmop> {
4315     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4316     let Inst{30} = lane{2};
4317   }
4318
4319   def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4320                                    !cast<RegisterOperand>(List # "H_operand"),
4321                                    uimm_h, neon_uimm3_bare, asmop> {
4322     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4323     let Inst{30} = lane{2};
4324   }
4325
4326   def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4327                                !cast<RegisterOperand>(List # "S_operand"),
4328                                uimm_s, neon_uimm2_bare, asmop> {
4329     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4330     let Inst{30} = lane{1};
4331   }
4332
4333   def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4334                                    !cast<RegisterOperand>(List # "S_operand"),
4335                                    uimm_s, neon_uimm2_bare, asmop> {
4336     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4337     let Inst{30} = lane{1};
4338   }
4339
4340   def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4341                                !cast<RegisterOperand>(List # "D_operand"),
4342                                uimm_d, neon_uimm1_bare, asmop> {
4343     let Inst{12-10} = 0b001;
4344     let Inst{30} = lane{0};
4345   }
4346
4347   def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4348                                    !cast<RegisterOperand>(List # "D_operand"),
4349                                    uimm_d, neon_uimm1_bare, asmop> {
4350     let Inst{12-10} = 0b001;
4351     let Inst{30} = lane{0};
4352   }
4353 }
4354
4355 // Post-index store single 1-element structure from one lane of 1 register.
4356 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4357                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4358
4359 // Post-index store single N-element structure from one lane of N consecutive
4360 // registers (N = 2,3,4)
4361 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4362                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4363 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4364                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4365 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4366                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4367
4368 // End of post-index load/store single N-element instructions
4369 // (class SIMD lsone-post)
4370
4371 // Neon Scalar instructions implementation
4372 // Scalar Three Same
4373
4374 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4375                              RegisterClass FPRC>
4376   : NeonI_Scalar3Same<u, size, opcode,
4377                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4378                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4379                       [],
4380                       NoItinerary>;
4381
4382 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4383   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4384
4385 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4386                                       bit Commutable = 0> {
4387   let isCommutable = Commutable in {
4388     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4389     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4390   }
4391 }
4392
4393 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4394                                       string asmop, bit Commutable = 0> {
4395   let isCommutable = Commutable in {
4396     def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4397     def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4398   }
4399 }
4400
4401 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4402                                         string asmop, bit Commutable = 0> {
4403   let isCommutable = Commutable in {
4404     def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4405     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4406     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4407     def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4408   }
4409 }
4410
4411 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4412                                             Instruction INSTD> {
4413   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4414             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4415 }
4416
4417 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4418                                                Instruction INSTB,
4419                                                Instruction INSTH,
4420                                                Instruction INSTS,
4421                                                Instruction INSTD>
4422   : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4423   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4424            (INSTB FPR8:$Rn, FPR8:$Rm)>;
4425   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4426            (INSTH FPR16:$Rn, FPR16:$Rm)>;
4427   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4428            (INSTS FPR32:$Rn, FPR32:$Rm)>;
4429 }
4430
4431 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4432                                              Instruction INSTH,
4433                                              Instruction INSTS> {
4434   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4435             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4436   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4437             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4438 }
4439
4440 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4441                                              ValueType SResTy, ValueType STy,
4442                                              Instruction INSTS, ValueType DResTy,
4443                                              ValueType DTy, Instruction INSTD> {
4444   def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4445             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4446   def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4447             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4448 }
4449
4450 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4451                                               Instruction INSTD>
4452   : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4453         (INSTD FPR64:$Rn, FPR64:$Rm)>;
4454
4455 // Scalar Three Different
4456
4457 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4458                              RegisterClass FPRCD, RegisterClass FPRCS>
4459   : NeonI_Scalar3Diff<u, size, opcode,
4460                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4461                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4462                       [],
4463                       NoItinerary>;
4464
4465 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4466   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4467   def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4468 }
4469
4470 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4471   let Constraints = "$Src = $Rd" in {
4472     def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4473                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4474                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4475                        [],
4476                        NoItinerary>;
4477     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4478                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4479                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4480                        [],
4481                        NoItinerary>;
4482   }
4483 }
4484
4485 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4486                                              Instruction INSTH,
4487                                              Instruction INSTS> {
4488   def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4489             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4490   def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4491             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4492 }
4493
4494 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4495                                              Instruction INSTH,
4496                                              Instruction INSTS> {
4497   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4498             (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4499   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4500             (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4501 }
4502
4503 // Scalar Two Registers Miscellaneous
4504
4505 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4506                              RegisterClass FPRCD, RegisterClass FPRCS>
4507   : NeonI_Scalar2SameMisc<u, size, opcode,
4508                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4509                           !strconcat(asmop, "\t$Rd, $Rn"),
4510                           [],
4511                           NoItinerary>;
4512
4513 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4514                                          string asmop> {
4515   def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4516                                       FPR32>;
4517   def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4518                                       FPR64>;
4519 }
4520
4521 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4522   def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4523 }
4524
4525 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4526   : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4527   def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4528   def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4529   def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4530 }
4531
4532 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4533   : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4534
4535 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4536                                                  string asmop> {
4537   def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4538   def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4539   def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4540 }
4541
4542 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4543                                        string asmop, RegisterClass FPRC>
4544   : NeonI_Scalar2SameMisc<u, size, opcode,
4545                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4546                           !strconcat(asmop, "\t$Rd, $Rn"),
4547                           [],
4548                           NoItinerary>;
4549
4550 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4551                                                  string asmop> {
4552
4553   let Constraints = "$Src = $Rd" in {
4554     def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4555     def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4556     def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4557     def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4558   }
4559 }
4560
4561 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4562                                                   Instruction INSTD>
4563   : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4564         (INSTD FPR64:$Rn)>;
4565
4566 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4567                                                       Instruction INSTS,
4568                                                       Instruction INSTD> {
4569   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4570             (INSTS FPR32:$Rn)>;
4571   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4572             (INSTD FPR64:$Rn)>;
4573 }
4574
4575 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4576                                                 Instruction INSTD>
4577   : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4578             (INSTD FPR64:$Rn)>;
4579
4580 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4581                                                      Instruction INSTS,
4582                                                      Instruction INSTD> {
4583   def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4584             (INSTS FPR32:$Rn)>;
4585   def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4586             (INSTD FPR64:$Rn)>;
4587 }
4588
4589 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4590                                                  Instruction INSTS,
4591                                                  Instruction INSTD> {
4592   def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4593             (INSTS FPR32:$Rn)>;
4594   def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4595             (INSTD FPR64:$Rn)>;
4596 }
4597
4598 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4599                                               Instruction INSTD>
4600   : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4601         (INSTD FPR64:$Rn)>;
4602
4603 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4604   : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4605                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4606                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4607                           [],
4608                           NoItinerary>;
4609
4610 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4611                                               string asmop> {
4612   def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4613                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
4614                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4615                            [],
4616                            NoItinerary>;
4617   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4618                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
4619                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4620                            [],
4621                            NoItinerary>;
4622 }
4623
4624 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4625                                                 Instruction INSTD>
4626   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4627                        (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4628         (INSTD FPR64:$Rn, 0)>;
4629
4630 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4631                                                    Instruction INSTD>
4632   : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4633                           (i32 neon_uimm0:$Imm), CC)),
4634         (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4635
4636 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4637                                                       CondCode CC,
4638                                                       Instruction INSTS,
4639                                                       Instruction INSTD> {
4640   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))),
4641             (INSTS FPR32:$Rn, fpzz32:$FPImm)>;
4642   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))),
4643             (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4644   def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)),
4645             (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4646 }
4647
4648 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4649                                                 Instruction INSTD> {
4650   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4651             (INSTD FPR64:$Rn)>;
4652 }
4653
4654 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4655                                                    Instruction INSTB,
4656                                                    Instruction INSTH,
4657                                                    Instruction INSTS,
4658                                                    Instruction INSTD>
4659   : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4660   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4661             (INSTB FPR8:$Rn)>;
4662   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4663             (INSTH FPR16:$Rn)>;
4664   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4665             (INSTS FPR32:$Rn)>;
4666 }
4667
4668 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4669                                                        SDPatternOperator opnode,
4670                                                        Instruction INSTH,
4671                                                        Instruction INSTS,
4672                                                        Instruction INSTD> {
4673   def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4674             (INSTH FPR16:$Rn)>;
4675   def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4676             (INSTS FPR32:$Rn)>;
4677   def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4678             (INSTD FPR64:$Rn)>;
4679
4680 }
4681
4682 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4683                                                        SDPatternOperator opnode,
4684                                                        Instruction INSTB,
4685                                                        Instruction INSTH,
4686                                                        Instruction INSTS,
4687                                                        Instruction INSTD> {
4688   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4689             (INSTB FPR8:$Src, FPR8:$Rn)>;
4690   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4691             (INSTH FPR16:$Src, FPR16:$Rn)>;
4692   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4693             (INSTS FPR32:$Src, FPR32:$Rn)>;
4694   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4695             (INSTD FPR64:$Src, FPR64:$Rn)>;
4696 }
4697
4698 // Scalar Shift By Immediate
4699
4700 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4701                                 RegisterClass FPRC, Operand ImmTy>
4702   : NeonI_ScalarShiftImm<u, opcode,
4703                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4704                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4705                          [], NoItinerary>;
4706
4707 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4708                                             string asmop> {
4709   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4710     bits<6> Imm;
4711     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4712     let Inst{21-16} = Imm;
4713   }
4714 }
4715
4716 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4717                                                string asmop>
4718   : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4719   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4720     bits<3> Imm;
4721     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4722     let Inst{18-16} = Imm;
4723   }
4724   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4725     bits<4> Imm;
4726     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4727     let Inst{19-16} = Imm;
4728   }
4729   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4730     bits<5> Imm;
4731     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4732     let Inst{20-16} = Imm;
4733   }
4734 }
4735
4736 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4737                                             string asmop> {
4738   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4739     bits<6> Imm;
4740     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4741     let Inst{21-16} = Imm;
4742   }
4743 }
4744
4745 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4746                                               string asmop>
4747   : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4748   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4749     bits<3> Imm;
4750     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4751     let Inst{18-16} = Imm;
4752   }
4753   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4754     bits<4> Imm;
4755     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4756     let Inst{19-16} = Imm;
4757   }
4758   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4759     bits<5> Imm;
4760     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4761     let Inst{20-16} = Imm;
4762   }
4763 }
4764
4765 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4766   : NeonI_ScalarShiftImm<u, opcode,
4767                          (outs FPR64:$Rd),
4768                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4769                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4770                          [], NoItinerary> {
4771     bits<6> Imm;
4772     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4773     let Inst{21-16} = Imm;
4774     let Constraints = "$Src = $Rd";
4775 }
4776
4777 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4778   : NeonI_ScalarShiftImm<u, opcode,
4779                          (outs FPR64:$Rd),
4780                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4781                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4782                          [], NoItinerary> {
4783     bits<6> Imm;
4784     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4785     let Inst{21-16} = Imm;
4786     let Constraints = "$Src = $Rd";
4787 }
4788
4789 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4790                                        RegisterClass FPRCD, RegisterClass FPRCS,
4791                                        Operand ImmTy>
4792   : NeonI_ScalarShiftImm<u, opcode,
4793                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4794                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4795                          [], NoItinerary>;
4796
4797 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4798                                                 string asmop> {
4799   def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4800                                              shr_imm8> {
4801     bits<3> Imm;
4802     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4803     let Inst{18-16} = Imm;
4804   }
4805   def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4806                                              shr_imm16> {
4807     bits<4> Imm;
4808     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4809     let Inst{19-16} = Imm;
4810   }
4811   def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4812                                              shr_imm32> {
4813     bits<5> Imm;
4814     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4815     let Inst{20-16} = Imm;
4816   }
4817 }
4818
4819 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4820   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4821     bits<5> Imm;
4822     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4823     let Inst{20-16} = Imm;
4824   }
4825   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4826     bits<6> Imm;
4827     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4828     let Inst{21-16} = Imm;
4829   }
4830 }
4831
4832 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4833                                                Instruction INSTD> {
4834   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4835                 (INSTD FPR64:$Rn, imm:$Imm)>;
4836 }
4837
4838 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4839                                                Instruction INSTD> {
4840   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4841                 (INSTD FPR64:$Rn, imm:$Imm)>;
4842 }
4843
4844 class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
4845                                              Instruction INSTD>
4846   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4847             (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
4848         (INSTD FPR64:$Rn, imm:$Imm)>;
4849
4850 class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
4851                                              Instruction INSTD>
4852   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4853             (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4854         (INSTD FPR64:$Rn, imm:$Imm)>;
4855
4856 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4857                                                    Instruction INSTB,
4858                                                    Instruction INSTH,
4859                                                    Instruction INSTS,
4860                                                    Instruction INSTD>
4861   : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4862   def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4863                 (INSTB FPR8:$Rn, imm:$Imm)>;
4864   def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4865                 (INSTH FPR16:$Rn, imm:$Imm)>;
4866   def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4867                 (INSTS FPR32:$Rn, imm:$Imm)>;
4868 }
4869
4870 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4871                                                 Instruction INSTD>
4872   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4873             (i32 shl_imm64:$Imm))),
4874         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4875
4876 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4877                                                 Instruction INSTD>
4878   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4879             (i32 shr_imm64:$Imm))),
4880         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4881
4882 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4883                                                        SDPatternOperator opnode,
4884                                                        Instruction INSTH,
4885                                                        Instruction INSTS,
4886                                                        Instruction INSTD> {
4887   def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4888                 (INSTH FPR16:$Rn, imm:$Imm)>;
4889   def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4890                 (INSTS FPR32:$Rn, imm:$Imm)>;
4891   def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4892                 (INSTD FPR64:$Rn, imm:$Imm)>;
4893 }
4894
4895 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4896                                                       Instruction INSTS,
4897                                                       Instruction INSTD> {
4898   def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4899                 (INSTS FPR32:$Rn, imm:$Imm)>;
4900   def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4901                 (INSTD FPR64:$Rn, imm:$Imm)>;
4902 }
4903
4904 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4905                                                       Instruction INSTS,
4906                                                       Instruction INSTD> {
4907   def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4908                 (INSTS FPR32:$Rn, imm:$Imm)>;
4909   def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4910                 (INSTD FPR64:$Rn, imm:$Imm)>;
4911 }
4912
4913 // Scalar Signed Shift Right (Immediate)
4914 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4915 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4916 // Pattern to match llvm.arm.* intrinsic.
4917 def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
4918
4919 // Scalar Unsigned Shift Right (Immediate)
4920 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4921 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4922 // Pattern to match llvm.arm.* intrinsic.
4923 def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
4924
4925 // Scalar Signed Rounding Shift Right (Immediate)
4926 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4927 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4928
4929 // Scalar Unigned Rounding Shift Right (Immediate)
4930 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4931 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4932
4933 // Scalar Signed Shift Right and Accumulate (Immediate)
4934 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4935 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4936           <int_aarch64_neon_vsrads_n, SSRA>;
4937
4938 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4939 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4940 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4941           <int_aarch64_neon_vsradu_n, USRA>;
4942
4943 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4944 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4945 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4946           <int_aarch64_neon_vrsrads_n, SRSRA>;
4947
4948 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4949 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4950 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4951           <int_aarch64_neon_vrsradu_n, URSRA>;
4952
4953 // Scalar Shift Left (Immediate)
4954 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4955 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4956 // Pattern to match llvm.arm.* intrinsic.
4957 def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
4958
4959 // Signed Saturating Shift Left (Immediate)
4960 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4961 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4962                                                SQSHLbbi, SQSHLhhi,
4963                                                SQSHLssi, SQSHLddi>;
4964 // Pattern to match llvm.arm.* intrinsic.
4965 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4966
4967 // Unsigned Saturating Shift Left (Immediate)
4968 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4969 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4970                                                UQSHLbbi, UQSHLhhi,
4971                                                UQSHLssi, UQSHLddi>;
4972 // Pattern to match llvm.arm.* intrinsic.
4973 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4974
4975 // Signed Saturating Shift Left Unsigned (Immediate)
4976 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4977 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4978                                                SQSHLUbbi, SQSHLUhhi,
4979                                                SQSHLUssi, SQSHLUddi>;
4980
4981 // Shift Right And Insert (Immediate)
4982 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4983 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4984           <int_aarch64_neon_vsri, SRI>;
4985
4986 // Shift Left And Insert (Immediate)
4987 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4988 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4989           <int_aarch64_neon_vsli, SLI>;
4990
4991 // Signed Saturating Shift Right Narrow (Immediate)
4992 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4993 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4994                                                     SQSHRNbhi, SQSHRNhsi,
4995                                                     SQSHRNsdi>;
4996
4997 // Unsigned Saturating Shift Right Narrow (Immediate)
4998 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4999 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
5000                                                     UQSHRNbhi, UQSHRNhsi,
5001                                                     UQSHRNsdi>;
5002
5003 // Signed Saturating Rounded Shift Right Narrow (Immediate)
5004 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
5005 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
5006                                                     SQRSHRNbhi, SQRSHRNhsi,
5007                                                     SQRSHRNsdi>;
5008
5009 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
5010 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
5011 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
5012                                                     UQRSHRNbhi, UQRSHRNhsi,
5013                                                     UQRSHRNsdi>;
5014
5015 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
5016 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
5017 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
5018                                                     SQSHRUNbhi, SQSHRUNhsi,
5019                                                     SQSHRUNsdi>;
5020
5021 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
5022 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
5023 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
5024                                                     SQRSHRUNbhi, SQRSHRUNhsi,
5025                                                     SQRSHRUNsdi>;
5026
5027 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
5028 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
5029 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
5030                                                   SCVTF_Nssi, SCVTF_Nddi>;
5031
5032 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
5033 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
5034 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
5035                                                   UCVTF_Nssi, UCVTF_Nddi>;
5036
5037 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
5038 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
5039 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
5040                                                   FCVTZS_Nssi, FCVTZS_Nddi>;
5041
5042 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
5043 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
5044 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
5045                                                   FCVTZU_Nssi, FCVTZU_Nddi>;
5046
5047 // Patterns For Convert Instructions Between v1f64 and v1i64
5048 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
5049                                              Instruction INST>
5050     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
5051           (INST FPR64:$Rn, imm:$Imm)>;
5052
5053 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
5054                                              Instruction INST>
5055     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
5056           (INST FPR64:$Rn, imm:$Imm)>;
5057
5058 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
5059                                              SCVTF_Nddi>;
5060
5061 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
5062                                              UCVTF_Nddi>;
5063
5064 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
5065                                              FCVTZS_Nddi>;
5066
5067 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
5068                                              FCVTZU_Nddi>;
5069
5070 // Scalar Integer Add
5071 let isCommutable = 1 in {
5072 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
5073 }
5074
5075 // Scalar Integer Sub
5076 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
5077
5078 // Pattern for Scalar Integer Add and Sub with D register only
5079 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
5080 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
5081
5082 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
5083 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
5084 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
5085 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
5086 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
5087
5088 // Scalar Integer Saturating Add (Signed, Unsigned)
5089 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
5090 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
5091
5092 // Scalar Integer Saturating Sub (Signed, Unsigned)
5093 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
5094 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
5095
5096
5097 // Patterns to match llvm.aarch64.* intrinsic for
5098 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
5099 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
5100                                            SQADDhhh, SQADDsss, SQADDddd>;
5101 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
5102                                            UQADDhhh, UQADDsss, UQADDddd>;
5103 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
5104                                            SQSUBhhh, SQSUBsss, SQSUBddd>;
5105 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
5106                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
5107
5108 // Scalar Integer Saturating Doubling Multiply Half High
5109 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
5110
5111 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5112 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
5113
5114 // Patterns to match llvm.arm.* intrinsic for
5115 // Scalar Integer Saturating Doubling Multiply Half High and
5116 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5117 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
5118                                                                SQDMULHsss>;
5119 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
5120                                                                 SQRDMULHsss>;
5121
5122 // Scalar Floating-point Multiply Extended
5123 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
5124
5125 // Scalar Floating-point Reciprocal Step
5126 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
5127 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
5128                                          FRECPSsss, f64, f64, FRECPSddd>;
5129 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5130           (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
5131
5132 // Scalar Floating-point Reciprocal Square Root Step
5133 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
5134 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
5135                                          FRSQRTSsss, f64, f64, FRSQRTSddd>;
5136 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5137           (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
5138 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
5139
5140 // Patterns to match llvm.aarch64.* intrinsic for
5141 // Scalar Floating-point Multiply Extended,
5142 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
5143                                                   Instruction INSTS,
5144                                                   Instruction INSTD> {
5145   def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5146             (INSTS FPR32:$Rn, FPR32:$Rm)>;
5147   def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5148             (INSTD FPR64:$Rn, FPR64:$Rm)>;
5149 }
5150
5151 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
5152                                               FMULXsss, FMULXddd>;
5153 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5154           (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
5155
5156 // Scalar Integer Shift Left (Signed, Unsigned)
5157 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
5158 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
5159
5160 // Patterns to match llvm.arm.* intrinsic for
5161 // Scalar Integer Shift Left (Signed, Unsigned)
5162 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
5163 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
5164
5165 // Patterns to match llvm.aarch64.* intrinsic for
5166 // Scalar Integer Shift Left (Signed, Unsigned)
5167 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
5168 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
5169
5170 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
5171 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
5172 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
5173
5174 // Patterns to match llvm.aarch64.* intrinsic for
5175 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
5176 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
5177                                            SQSHLhhh, SQSHLsss, SQSHLddd>;
5178 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
5179                                            UQSHLhhh, UQSHLsss, UQSHLddd>;
5180
5181 // Patterns to match llvm.arm.* intrinsic for
5182 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
5183 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
5184 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
5185
5186 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5187 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
5188 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
5189
5190 // Patterns to match llvm.aarch64.* intrinsic for
5191 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5192 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
5193 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
5194
5195 // Patterns to match llvm.arm.* intrinsic for
5196 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5197 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
5198 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
5199
5200 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5201 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
5202 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
5203
5204 // Patterns to match llvm.aarch64.* intrinsic for
5205 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5206 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
5207                                            SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
5208 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
5209                                            UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
5210
5211 // Patterns to match llvm.arm.* intrinsic for
5212 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5213 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5214 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5215
5216 // Signed Saturating Doubling Multiply-Add Long
5217 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5218 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5219                                             SQDMLALshh, SQDMLALdss>;
5220
5221 // Signed Saturating Doubling Multiply-Subtract Long
5222 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5223 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5224                                             SQDMLSLshh, SQDMLSLdss>;
5225
5226 // Signed Saturating Doubling Multiply Long
5227 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5228 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5229                                          SQDMULLshh, SQDMULLdss>;
5230
5231 // Scalar Signed Integer Convert To Floating-point
5232 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5233 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5234                                                  SCVTFss, SCVTFdd>;
5235
5236 // Scalar Unsigned Integer Convert To Floating-point
5237 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5238 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5239                                                  UCVTFss, UCVTFdd>;
5240
5241 // Scalar Floating-point Converts
5242 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5243 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5244                                                   FCVTXN>;
5245
5246 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5247 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5248                                                   FCVTNSss, FCVTNSdd>;
5249 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5250
5251 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5252 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5253                                                   FCVTNUss, FCVTNUdd>;
5254 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5255
5256 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5257 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5258                                                   FCVTMSss, FCVTMSdd>;
5259 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5260
5261 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5262 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5263                                                   FCVTMUss, FCVTMUdd>;
5264 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5265
5266 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5267 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5268                                                   FCVTASss, FCVTASdd>;
5269 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5270
5271 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5272 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5273                                                   FCVTAUss, FCVTAUdd>;
5274 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5275
5276 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5277 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5278                                                   FCVTPSss, FCVTPSdd>;
5279 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5280
5281 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5282 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5283                                                   FCVTPUss, FCVTPUdd>;
5284 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5285
5286 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5287 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5288                                                   FCVTZSss, FCVTZSdd>;
5289 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5290                                                 FCVTZSdd>;
5291
5292 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5293 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5294                                                   FCVTZUss, FCVTZUdd>;
5295 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5296                                                 FCVTZUdd>;
5297
5298 // Patterns For Convert Instructions Between v1f64 and v1i64
5299 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5300                                               Instruction INST>
5301     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5302
5303 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5304                                               Instruction INST>
5305     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5306
5307 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5308 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5309
5310 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5311 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5312
5313 // Scalar Floating-point Reciprocal Estimate
5314 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5315 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5316                                              FRECPEss, FRECPEdd>;
5317 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5318                                               FRECPEdd>;
5319
5320 // Scalar Floating-point Reciprocal Exponent
5321 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5322 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5323                                              FRECPXss, FRECPXdd>;
5324
5325 // Scalar Floating-point Reciprocal Square Root Estimate
5326 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5327 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5328                                                  FRSQRTEss, FRSQRTEdd>;
5329 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5330                                               FRSQRTEdd>;
5331
5332 // Scalar Floating-point Round
5333 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5334     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5335
5336 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5337 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5338 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5339 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5340 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5341 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5342 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5343
5344 // Scalar Integer Compare
5345
5346 // Scalar Compare Bitwise Equal
5347 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5348 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5349
5350 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5351                                               Instruction INSTD,
5352                                               CondCode CC>
5353   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5354         (INSTD FPR64:$Rn, FPR64:$Rm)>;
5355
5356 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5357
5358 // Scalar Compare Signed Greather Than Or Equal
5359 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5360 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5361 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5362
5363 // Scalar Compare Unsigned Higher Or Same
5364 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5365 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5366 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5367
5368 // Scalar Compare Unsigned Higher
5369 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5370 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5371 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5372
5373 // Scalar Compare Signed Greater Than
5374 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5375 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5376 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5377
5378 // Scalar Compare Bitwise Test Bits
5379 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5380 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5381 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5382
5383 // Scalar Compare Bitwise Equal To Zero
5384 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5385 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5386                                                 CMEQddi>;
5387 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5388
5389 // Scalar Compare Signed Greather Than Or Equal To Zero
5390 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5391 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5392                                                 CMGEddi>;
5393 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5394
5395 // Scalar Compare Signed Greater Than Zero
5396 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5397 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5398                                                 CMGTddi>;
5399 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5400
5401 // Scalar Compare Signed Less Than Or Equal To Zero
5402 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5403 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5404                                                 CMLEddi>;
5405 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5406
5407 // Scalar Compare Less Than Zero
5408 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5409 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5410                                                 CMLTddi>;
5411 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5412
5413 // Scalar Floating-point Compare
5414
5415 // Scalar Floating-point Compare Mask Equal
5416 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5417 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5418                                          FCMEQsss, v1i64, f64, FCMEQddd>;
5419 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5420
5421 // Scalar Floating-point Compare Mask Equal To Zero
5422 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5423 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5424                                                   FCMEQZssi, FCMEQZddi>;
5425
5426 // Scalar Floating-point Compare Mask Greater Than Or Equal
5427 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5428 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5429                                          FCMGEsss, v1i64, f64, FCMGEddd>;
5430 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5431
5432 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5433 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5434 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5435                                                   FCMGEZssi, FCMGEZddi>;
5436
5437 // Scalar Floating-point Compare Mask Greather Than
5438 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5439 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5440                                          FCMGTsss, v1i64, f64, FCMGTddd>;
5441 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5442
5443 // Scalar Floating-point Compare Mask Greather Than Zero
5444 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5445 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5446                                                   FCMGTZssi, FCMGTZddi>;
5447
5448 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5449 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5450 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5451                                                   FCMLEZssi, FCMLEZddi>;
5452
5453 // Scalar Floating-point Compare Mask Less Than Zero
5454 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5455 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5456                                                   FCMLTZssi, FCMLTZddi>;
5457
5458 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5459 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5460 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5461                                          FACGEsss, v1i64, f64, FACGEddd>;
5462 def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5463           (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5464
5465 // Scalar Floating-point Absolute Compare Mask Greater Than
5466 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5467 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5468                                          FACGTsss, v1i64, f64, FACGTddd>;
5469 def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5470           (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5471
5472 // Scalar Floating-point Absolute Difference
5473 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5474 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5475                                          FABDsss, f64, f64, FABDddd>;
5476
5477 // Scalar Absolute Value
5478 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5479 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5480
5481 // Scalar Signed Saturating Absolute Value
5482 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5483 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5484                                                SQABSbb, SQABShh, SQABSss, SQABSdd>;
5485
5486 // Scalar Negate
5487 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5488 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5489
5490 // Scalar Signed Saturating Negate
5491 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5492 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5493                                                SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5494
5495 // Scalar Signed Saturating Accumulated of Unsigned Value
5496 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5497 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5498                                                      SUQADDbb, SUQADDhh,
5499                                                      SUQADDss, SUQADDdd>;
5500
5501 // Scalar Unsigned Saturating Accumulated of Signed Value
5502 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5503 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5504                                                      USQADDbb, USQADDhh,
5505                                                      USQADDss, USQADDdd>;
5506
5507 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5508                                           (v1i64 FPR64:$Rn))),
5509           (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5510
5511 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5512                                           (v1i64 FPR64:$Rn))),
5513           (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5514
5515 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5516           (ABSdd FPR64:$Rn)>;
5517
5518 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5519           (SQABSdd FPR64:$Rn)>;
5520
5521 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5522           (SQNEGdd FPR64:$Rn)>;
5523
5524 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5525                       (v1i64 FPR64:$Rn))),
5526           (NEGdd FPR64:$Rn)>;
5527
5528 // Scalar Signed Saturating Extract Unsigned Narrow
5529 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5530 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5531                                                      SQXTUNbh, SQXTUNhs,
5532                                                      SQXTUNsd>;
5533
5534 // Scalar Signed Saturating Extract Narrow
5535 defm SQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5536 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5537                                                      SQXTNbh, SQXTNhs,
5538                                                      SQXTNsd>;
5539
5540 // Scalar Unsigned Saturating Extract Narrow
5541 defm UQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5542 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5543                                                      UQXTNbh, UQXTNhs,
5544                                                      UQXTNsd>;
5545
5546 // Scalar Reduce Pairwise
5547
5548 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5549                                      string asmop, bit Commutable = 0> {
5550   let isCommutable = Commutable in {
5551     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5552                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
5553                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5554                                 [],
5555                                 NoItinerary>;
5556   }
5557 }
5558
5559 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5560                                      string asmop, bit Commutable = 0>
5561   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5562   let isCommutable = Commutable in {
5563     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5564                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
5565                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5566                                 [],
5567                                 NoItinerary>;
5568   }
5569 }
5570
5571 // Scalar Reduce Addition Pairwise (Integer) with
5572 // Pattern to match llvm.arm.* intrinsic
5573 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5574
5575 // Pattern to match llvm.aarch64.* intrinsic for
5576 // Scalar Reduce Addition Pairwise (Integer)
5577 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5578           (ADDPvv_D_2D VPR128:$Rn)>;
5579 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5580           (ADDPvv_D_2D VPR128:$Rn)>;
5581
5582 // Scalar Reduce Addition Pairwise (Floating Point)
5583 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5584
5585 // Scalar Reduce Maximum Pairwise (Floating Point)
5586 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5587
5588 // Scalar Reduce Minimum Pairwise (Floating Point)
5589 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5590
5591 // Scalar Reduce maxNum Pairwise (Floating Point)
5592 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5593
5594 // Scalar Reduce minNum Pairwise (Floating Point)
5595 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5596
5597 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5598                                             Instruction INSTS,
5599                                             Instruction INSTD> {
5600   def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5601             (INSTS VPR64:$Rn)>;
5602   def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5603             (INSTD VPR128:$Rn)>;
5604 }
5605
5606 // Patterns to match llvm.aarch64.* intrinsic for
5607 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5608 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5609                                         FADDPvv_S_2S, FADDPvv_D_2D>;
5610
5611 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5612                                         FMAXPvv_S_2S, FMAXPvv_D_2D>;
5613
5614 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5615                                         FMINPvv_S_2S, FMINPvv_D_2D>;
5616
5617 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5618                                         FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5619
5620 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5621                                         FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5622
5623 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5624           (FADDPvv_S_2S (v2f32
5625                (EXTRACT_SUBREG
5626                    (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5627                    sub_64)))>;
5628
5629 // Scalar by element Arithmetic
5630
5631 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5632                                     string rmlane, bit u, bit szhi, bit szlo,
5633                                     RegisterClass ResFPR, RegisterClass OpFPR,
5634                                     RegisterOperand OpVPR, Operand OpImm>
5635   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5636                              (outs ResFPR:$Rd),
5637                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5638                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5639                              [],
5640                              NoItinerary> {
5641   bits<3> Imm;
5642   bits<5> MRm;
5643 }
5644
5645 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5646                                                     string rmlane,
5647                                                     bit u, bit szhi, bit szlo,
5648                                                     RegisterClass ResFPR,
5649                                                     RegisterClass OpFPR,
5650                                                     RegisterOperand OpVPR,
5651                                                     Operand OpImm>
5652   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5653                              (outs ResFPR:$Rd),
5654                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5655                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5656                              [],
5657                              NoItinerary> {
5658   let Constraints = "$src = $Rd";
5659   bits<3> Imm;
5660   bits<5> MRm;
5661 }
5662
5663 // Scalar Floating Point  multiply (scalar, by element)
5664 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5665   0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5666   let Inst{11} = Imm{1}; // h
5667   let Inst{21} = Imm{0}; // l
5668   let Inst{20-16} = MRm;
5669 }
5670 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5671   0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5672   let Inst{11} = Imm{0}; // h
5673   let Inst{21} = 0b0;    // l
5674   let Inst{20-16} = MRm;
5675 }
5676
5677 // Scalar Floating Point  multiply extended (scalar, by element)
5678 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5679   0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5680   let Inst{11} = Imm{1}; // h
5681   let Inst{21} = Imm{0}; // l
5682   let Inst{20-16} = MRm;
5683 }
5684 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5685   0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5686   let Inst{11} = Imm{0}; // h
5687   let Inst{21} = 0b0;    // l
5688   let Inst{20-16} = MRm;
5689 }
5690
5691 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5692   SDPatternOperator opnode,
5693   Instruction INST,
5694   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5695   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5696
5697   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5698                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5699              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5700
5701   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5702                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5703              (ResTy (INST (ResTy FPRC:$Rn),
5704                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5705                OpNImm:$Imm))>;
5706
5707   // swapped operands
5708   def  : Pat<(ResTy (opnode
5709                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5710                (ResTy FPRC:$Rn))),
5711              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5712
5713   def  : Pat<(ResTy (opnode
5714                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5715                (ResTy FPRC:$Rn))),
5716              (ResTy (INST (ResTy FPRC:$Rn),
5717                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5718                OpNImm:$Imm))>;
5719 }
5720
5721 // Patterns for Scalar Floating Point  multiply (scalar, by element)
5722 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5723   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5724 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5725   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5726
5727 // Patterns for Scalar Floating Point  multiply extended (scalar, by element)
5728 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5729   FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5730   v2f32, v4f32, neon_uimm1_bare>;
5731 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5732   FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5733   v1f64, v2f64, neon_uimm0_bare>;
5734
5735 // Scalar Floating Point fused multiply-add (scalar, by element)
5736 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5737   0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5738   let Inst{11} = Imm{1}; // h
5739   let Inst{21} = Imm{0}; // l
5740   let Inst{20-16} = MRm;
5741 }
5742 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5743   0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5744   let Inst{11} = Imm{0}; // h
5745   let Inst{21} = 0b0;    // l
5746   let Inst{20-16} = MRm;
5747 }
5748
5749 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5750 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5751   0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5752   let Inst{11} = Imm{1}; // h
5753   let Inst{21} = Imm{0}; // l
5754   let Inst{20-16} = MRm;
5755 }
5756 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5757   0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5758   let Inst{11} = Imm{0}; // h
5759   let Inst{21} = 0b0;    // l
5760   let Inst{20-16} = MRm;
5761 }
5762 // We are allowed to match the fma instruction regardless of compile options.
5763 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5764   Instruction FMLAI, Instruction FMLSI,
5765   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5766   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5767   // fmla
5768   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5769                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5770                (ResTy FPRC:$Ra))),
5771              (ResTy (FMLAI (ResTy FPRC:$Ra),
5772                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5773
5774   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5775                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5776                (ResTy FPRC:$Ra))),
5777              (ResTy (FMLAI (ResTy FPRC:$Ra),
5778                (ResTy FPRC:$Rn),
5779                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5780                OpNImm:$Imm))>;
5781
5782   // swapped fmla operands
5783   def  : Pat<(ResTy (fma
5784                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5785                (ResTy FPRC:$Rn),
5786                (ResTy FPRC:$Ra))),
5787              (ResTy (FMLAI (ResTy FPRC:$Ra),
5788                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5789
5790   def  : Pat<(ResTy (fma
5791                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5792                (ResTy FPRC:$Rn),
5793                (ResTy FPRC:$Ra))),
5794              (ResTy (FMLAI (ResTy FPRC:$Ra),
5795                (ResTy FPRC:$Rn),
5796                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5797                OpNImm:$Imm))>;
5798
5799   // fmls
5800   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5801                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5802                (ResTy FPRC:$Ra))),
5803              (ResTy (FMLSI (ResTy FPRC:$Ra),
5804                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5805
5806   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5807                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5808                (ResTy FPRC:$Ra))),
5809              (ResTy (FMLSI (ResTy FPRC:$Ra),
5810                (ResTy FPRC:$Rn),
5811                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5812                OpNImm:$Imm))>;
5813
5814   // swapped fmls operands
5815   def  : Pat<(ResTy (fma
5816                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5817                (ResTy FPRC:$Rn),
5818                (ResTy FPRC:$Ra))),
5819              (ResTy (FMLSI (ResTy FPRC:$Ra),
5820                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5821
5822   def  : Pat<(ResTy (fma
5823                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5824                (ResTy FPRC:$Rn),
5825                (ResTy FPRC:$Ra))),
5826              (ResTy (FMLSI (ResTy FPRC:$Ra),
5827                (ResTy FPRC:$Rn),
5828                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5829                OpNImm:$Imm))>;
5830 }
5831
5832 // Scalar Floating Point fused multiply-add and
5833 // multiply-subtract (scalar, by element)
5834 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5835   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5836 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5837   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5838 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5839   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5840
5841 // Scalar Signed saturating doubling multiply long (scalar, by element)
5842 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5843   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5844   let Inst{11} = 0b0; // h
5845   let Inst{21} = Imm{1}; // l
5846   let Inst{20} = Imm{0}; // m
5847   let Inst{19-16} = MRm{3-0};
5848 }
5849 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5850   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5851   let Inst{11} = Imm{2}; // h
5852   let Inst{21} = Imm{1}; // l
5853   let Inst{20} = Imm{0}; // m
5854   let Inst{19-16} = MRm{3-0};
5855 }
5856 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5857   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5858   let Inst{11} = 0b0;    // h
5859   let Inst{21} = Imm{0}; // l
5860   let Inst{20-16} = MRm;
5861 }
5862 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5863   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5864   let Inst{11} = Imm{1};    // h
5865   let Inst{21} = Imm{0};    // l
5866   let Inst{20-16} = MRm;
5867 }
5868
5869 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5870   SDPatternOperator opnode,
5871   Instruction INST,
5872   ValueType ResTy, RegisterClass FPRC,
5873   ValueType OpVTy, ValueType OpTy,
5874   ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5875
5876   def  : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5877                (OpVTy (scalar_to_vector
5878                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5879              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5880
5881   //swapped operands
5882   def  : Pat<(ResTy (opnode
5883                (OpVTy (scalar_to_vector
5884                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5885                  (OpVTy FPRC:$Rn))),
5886              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5887 }
5888
5889
5890 // Patterns for Scalar Signed saturating doubling
5891 // multiply long (scalar, by element)
5892 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5893   SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5894   i32, VPR64Lo, neon_uimm2_bare>;
5895 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5896   SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5897   i32, VPR128Lo, neon_uimm3_bare>;
5898 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5899   SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5900   i32, VPR64Lo, neon_uimm1_bare>;
5901 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5902   SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5903   i32, VPR128Lo, neon_uimm2_bare>;
5904
5905 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5906 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5907   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5908   let Inst{11} = 0b0; // h
5909   let Inst{21} = Imm{1}; // l
5910   let Inst{20} = Imm{0}; // m
5911   let Inst{19-16} = MRm{3-0};
5912 }
5913 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5914   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5915   let Inst{11} = Imm{2}; // h
5916   let Inst{21} = Imm{1}; // l
5917   let Inst{20} = Imm{0}; // m
5918   let Inst{19-16} = MRm{3-0};
5919 }
5920 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5921   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5922   let Inst{11} = 0b0;    // h
5923   let Inst{21} = Imm{0}; // l
5924   let Inst{20-16} = MRm;
5925 }
5926 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5927   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5928   let Inst{11} = Imm{1};    // h
5929   let Inst{21} = Imm{0};    // l
5930   let Inst{20-16} = MRm;
5931 }
5932
5933 // Scalar Signed saturating doubling
5934 // multiply-subtract long (scalar, by element)
5935 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5936   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5937   let Inst{11} = 0b0; // h
5938   let Inst{21} = Imm{1}; // l
5939   let Inst{20} = Imm{0}; // m
5940   let Inst{19-16} = MRm{3-0};
5941 }
5942 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5943   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5944   let Inst{11} = Imm{2}; // h
5945   let Inst{21} = Imm{1}; // l
5946   let Inst{20} = Imm{0}; // m
5947   let Inst{19-16} = MRm{3-0};
5948 }
5949 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5950   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5951   let Inst{11} = 0b0;    // h
5952   let Inst{21} = Imm{0}; // l
5953   let Inst{20-16} = MRm;
5954 }
5955 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5956   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5957   let Inst{11} = Imm{1};    // h
5958   let Inst{21} = Imm{0};    // l
5959   let Inst{20-16} = MRm;
5960 }
5961
5962 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5963   SDPatternOperator opnode,
5964   SDPatternOperator coreopnode,
5965   Instruction INST,
5966   ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5967   ValueType OpTy,
5968   ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5969
5970   def  : Pat<(ResTy (opnode
5971                (ResTy ResFPRC:$Ra),
5972                (ResTy (coreopnode (OpTy FPRC:$Rn),
5973                  (OpTy (scalar_to_vector
5974                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5975              (ResTy (INST (ResTy ResFPRC:$Ra),
5976                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5977
5978   // swapped operands
5979   def  : Pat<(ResTy (opnode
5980                (ResTy ResFPRC:$Ra),
5981                (ResTy (coreopnode
5982                  (OpTy (scalar_to_vector
5983                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5984                  (OpTy FPRC:$Rn))))),
5985              (ResTy (INST (ResTy ResFPRC:$Ra),
5986                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5987 }
5988
5989 // Patterns for Scalar Signed saturating
5990 // doubling multiply-add long (scalar, by element)
5991 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5992   int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5993   i32, VPR64Lo, neon_uimm2_bare>;
5994 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5995   int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5996   i32, VPR128Lo, neon_uimm3_bare>;
5997 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5998   int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5999   i32, VPR64Lo, neon_uimm1_bare>;
6000 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
6001   int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
6002   i32, VPR128Lo, neon_uimm2_bare>;
6003
6004 // Patterns for Scalar Signed saturating
6005 // doubling multiply-sub long (scalar, by element)
6006 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6007   int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
6008   i32, VPR64Lo, neon_uimm2_bare>;
6009 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6010   int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
6011   i32, VPR128Lo, neon_uimm3_bare>;
6012 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6013   int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
6014   i32, VPR64Lo, neon_uimm1_bare>;
6015 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6016   int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
6017   i32, VPR128Lo, neon_uimm2_bare>;
6018
6019 // Scalar Signed saturating doubling multiply returning
6020 // high half (scalar, by element)
6021 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6022   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
6023   let Inst{11} = 0b0; // h
6024   let Inst{21} = Imm{1}; // l
6025   let Inst{20} = Imm{0}; // m
6026   let Inst{19-16} = MRm{3-0};
6027 }
6028 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6029   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
6030   let Inst{11} = Imm{2}; // h
6031   let Inst{21} = Imm{1}; // l
6032   let Inst{20} = Imm{0}; // m
6033   let Inst{19-16} = MRm{3-0};
6034 }
6035 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6036   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
6037   let Inst{11} = 0b0;    // h
6038   let Inst{21} = Imm{0}; // l
6039   let Inst{20-16} = MRm;
6040 }
6041 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6042   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
6043   let Inst{11} = Imm{1};    // h
6044   let Inst{21} = Imm{0};    // l
6045   let Inst{20-16} = MRm;
6046 }
6047
6048 // Patterns for Scalar Signed saturating doubling multiply returning
6049 // high half (scalar, by element)
6050 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6051   SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
6052   i32, VPR64Lo, neon_uimm2_bare>;
6053 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6054   SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
6055   i32, VPR128Lo, neon_uimm3_bare>;
6056 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6057   SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
6058   i32, VPR64Lo, neon_uimm1_bare>;
6059 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6060   SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
6061   i32, VPR128Lo, neon_uimm2_bare>;
6062
6063 // Scalar Signed saturating rounding doubling multiply
6064 // returning high half (scalar, by element)
6065 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6066   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
6067   let Inst{11} = 0b0; // h
6068   let Inst{21} = Imm{1}; // l
6069   let Inst{20} = Imm{0}; // m
6070   let Inst{19-16} = MRm{3-0};
6071 }
6072 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6073   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
6074   let Inst{11} = Imm{2}; // h
6075   let Inst{21} = Imm{1}; // l
6076   let Inst{20} = Imm{0}; // m
6077   let Inst{19-16} = MRm{3-0};
6078 }
6079 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6080   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
6081   let Inst{11} = 0b0;    // h
6082   let Inst{21} = Imm{0}; // l
6083   let Inst{20-16} = MRm;
6084 }
6085 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6086   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
6087   let Inst{11} = Imm{1};    // h
6088   let Inst{21} = Imm{0};    // l
6089   let Inst{20-16} = MRm;
6090 }
6091
6092 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6093   SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
6094   VPR64Lo, neon_uimm2_bare>;
6095 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6096   SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
6097   VPR128Lo, neon_uimm3_bare>;
6098 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6099   SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
6100   VPR64Lo, neon_uimm1_bare>;
6101 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6102   SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
6103   VPR128Lo, neon_uimm2_bare>;
6104
6105 // Scalar general arithmetic operation
6106 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
6107                                         Instruction INST> 
6108     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
6109
6110 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
6111                                         Instruction INST> 
6112     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
6113           (INST FPR64:$Rn, FPR64:$Rm)>;
6114
6115 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
6116                                         Instruction INST> 
6117     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
6118               (v1f64 FPR64:$Ra))),
6119           (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
6120
6121 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
6122 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
6123 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
6124 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
6125 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
6126 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
6127 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
6128 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
6129 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
6130
6131 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
6132 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
6133
6134 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
6135 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
6136
6137 // Scalar Copy - DUP element to scalar
6138 class NeonI_Scalar_DUP<string asmop, string asmlane,
6139                        RegisterClass ResRC, RegisterOperand VPRC,
6140                        Operand OpImm>
6141   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
6142                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
6143                      [],
6144                      NoItinerary> {
6145   bits<4> Imm;
6146 }
6147
6148 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
6149   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6150 }
6151 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
6152   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6153 }
6154 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
6155   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6156 }
6157 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
6158   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6159 }
6160
6161 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)),
6162           (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>;
6163 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)),
6164           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>;
6165 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)),
6166           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>;
6167 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)),
6168           (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>;
6169
6170 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)),
6171           (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>;
6172 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)),
6173           (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>;
6174
6175 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)),
6176           (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>;
6177 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)),
6178           (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6179             1))>;
6180
6181 def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)),
6182           (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>;
6183
6184 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
6185   ValueType ResTy, ValueType OpTy,Operand OpLImm,
6186   ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
6187
6188   def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
6189             (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
6190
6191   def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
6192             (ResTy (DUPI
6193               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6194                 OpNImm:$Imm))>;
6195 }
6196
6197 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
6198 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
6199                                         v8i8, v16i8, neon_uimm3_bare>;
6200 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
6201                                         v4i16, v8i16, neon_uimm2_bare>;
6202 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
6203                                         v2i32, v4i32, neon_uimm1_bare>;
6204
6205 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
6206                                           ValueType OpTy, ValueType ElemTy,
6207                                           Operand OpImm, ValueType OpNTy,
6208                                           ValueType ExTy, Operand OpNImm> {
6209
6210   def : Pat<(ResTy (vector_insert (ResTy undef),
6211               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
6212               (neon_uimm0_bare:$Imm))),
6213             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6214
6215   def : Pat<(ResTy (vector_insert (ResTy undef),
6216               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6217               (OpNImm:$Imm))),
6218             (ResTy (DUPI
6219               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6220               OpNImm:$Imm))>;
6221 }
6222
6223 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6224                                           ValueType OpTy, ValueType ElemTy,
6225                                           Operand OpImm, ValueType OpNTy,
6226                                           ValueType ExTy, Operand OpNImm> {
6227
6228   def : Pat<(ResTy (scalar_to_vector
6229               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6230             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6231
6232   def : Pat<(ResTy (scalar_to_vector
6233               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6234             (ResTy (DUPI
6235               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6236               OpNImm:$Imm))>;
6237 }
6238
6239 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6240 // instructions.
6241 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6242   v1i64, v2i64, i64, neon_uimm1_bare,
6243   v1i64, v2i64, neon_uimm0_bare>;
6244 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6245   v1i32, v4i32, i32, neon_uimm2_bare,
6246   v2i32, v4i32, neon_uimm1_bare>;
6247 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6248   v1i16, v8i16, i32, neon_uimm3_bare,
6249   v4i16, v8i16, neon_uimm2_bare>;
6250 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6251   v1i8, v16i8, i32, neon_uimm4_bare,
6252   v8i8, v16i8, neon_uimm3_bare>;
6253 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6254   v1i64, v2i64, i64, neon_uimm1_bare,
6255   v1i64, v2i64, neon_uimm0_bare>;
6256 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6257   v1i32, v4i32, i32, neon_uimm2_bare,
6258   v2i32, v4i32, neon_uimm1_bare>;
6259 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6260   v1i16, v8i16, i32, neon_uimm3_bare,
6261   v4i16, v8i16, neon_uimm2_bare>;
6262 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6263   v1i8, v16i8, i32, neon_uimm4_bare,
6264   v8i8, v16i8, neon_uimm3_bare>;
6265
6266 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6267                                   Instruction DUPI, Operand OpImm,
6268                                   RegisterClass ResRC> {
6269   def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6270           (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6271 }
6272
6273 // Aliases for Scalar copy - DUP element (scalar)
6274 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6275 // custom printing of aliases.
6276 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6277 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6278 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6279 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6280
6281 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6282                       ValueType OpTy> {
6283   def : Pat<(ResTy (GetLow VPR128:$Rn)),
6284             (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6285   def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6286             (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6287 }
6288
6289 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6290 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6291 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6292 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6293 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6294 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6295
6296 // The following is for sext/zext from v1xx to v1xx
6297 multiclass NeonI_ext<string prefix, SDNode ExtOp> {
6298   // v1i32 -> v1i64
6299   def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))),
6300             (EXTRACT_SUBREG 
6301               (v2i64 (!cast<Instruction>(prefix # "_2S")
6302                 (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)),
6303               sub_64)>;
6304   
6305   // v1i16 -> v1i32
6306   def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))),
6307             (EXTRACT_SUBREG 
6308               (v4i32 (!cast<Instruction>(prefix # "_4H")
6309                 (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6310               sub_32)>;
6311   
6312   // v1i8 -> v1i16
6313   def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))),
6314             (EXTRACT_SUBREG 
6315               (v8i16 (!cast<Instruction>(prefix # "_8B")
6316                 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6317               sub_16)>;
6318 }
6319
6320 defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
6321 defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
6322
6323 // zext v1i8 -> v1i32
6324 def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
6325           (v1i32 (EXTRACT_SUBREG
6326             (v1i64 (SUBREG_TO_REG (i64 0),
6327               (v1i8 (DUPbv_B
6328                 (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6329                 0)),
6330               sub_8)),
6331             sub_32))>;
6332
6333 // zext v1i8 -> v1i64
6334 def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
6335           (v1i64 (SUBREG_TO_REG (i64 0),
6336             (v1i8 (DUPbv_B
6337               (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6338               0)),
6339             sub_8))>;
6340
6341 // zext v1i16 -> v1i64
6342 def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
6343           (v1i64 (SUBREG_TO_REG (i64 0),
6344             (v1i16 (DUPhv_H
6345               (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)),
6346               0)),
6347             sub_16))>;
6348
6349 // sext v1i8 -> v1i32
6350 def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
6351           (EXTRACT_SUBREG
6352             (v4i32 (SSHLLvvi_4H
6353               (v4i16 (SUBREG_TO_REG (i64 0),
6354                 (v1i16 (EXTRACT_SUBREG 
6355                   (v8i16 (SSHLLvvi_8B
6356                     (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6357                   sub_16)),
6358                 sub_16)), 0)),
6359             sub_32)>;
6360               
6361 // sext v1i8 -> v1i64
6362 def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
6363           (EXTRACT_SUBREG 
6364             (v2i64 (SSHLLvvi_2S
6365               (v2i32 (SUBREG_TO_REG (i64 0),
6366                 (v1i32 (EXTRACT_SUBREG
6367                   (v4i32 (SSHLLvvi_4H
6368                     (v4i16 (SUBREG_TO_REG (i64 0),
6369                       (v1i16 (EXTRACT_SUBREG 
6370                         (v8i16 (SSHLLvvi_8B
6371                           (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6372                         sub_16)),
6373                       sub_16)), 0)),
6374                   sub_32)),
6375                 sub_32)), 0)),
6376             sub_64)>;
6377
6378   
6379 // sext v1i16 -> v1i64
6380 def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))),
6381           (EXTRACT_SUBREG
6382             (v2i64 (SSHLLvvi_2S
6383               (v2i32 (SUBREG_TO_REG (i64 0),
6384                 (v1i32 (EXTRACT_SUBREG 
6385                   (v4i32 (SSHLLvvi_4H
6386                     (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6387                   sub_32)),
6388                 sub_32)), 0)),
6389             sub_64)>;
6390
6391 //===----------------------------------------------------------------------===//
6392 // Non-Instruction Patterns
6393 //===----------------------------------------------------------------------===//
6394
6395 // 64-bit vector bitcasts...
6396
6397 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
6398 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
6399 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
6400 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
6401
6402 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
6403 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
6404 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
6405 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
6406
6407 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
6408 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
6409 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
6410 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
6411
6412 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
6413 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
6414 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
6415 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
6416
6417 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
6418 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6419 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6420 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6421
6422 def : Pat<(v1i64 (bitconvert (v1f64  VPR64:$src))), (v1i64 VPR64:$src)>;
6423 def : Pat<(v2f32 (bitconvert (v1f64  VPR64:$src))), (v2f32 VPR64:$src)>;
6424 def : Pat<(v2i32 (bitconvert (v1f64  VPR64:$src))), (v2i32 VPR64:$src)>;
6425 def : Pat<(v4i16 (bitconvert (v1f64  VPR64:$src))), (v4i16 VPR64:$src)>;
6426 def : Pat<(v8i8 (bitconvert (v1f64  VPR64:$src))), (v8i8 VPR64:$src)>;
6427 def : Pat<(f64   (bitconvert (v1f64  VPR64:$src))), (f64 VPR64:$src)>;
6428
6429 def : Pat<(v1f64 (bitconvert (v1i64  VPR64:$src))), (v1f64 VPR64:$src)>;
6430 def : Pat<(v1f64 (bitconvert (v2f32  VPR64:$src))), (v1f64 VPR64:$src)>;
6431 def : Pat<(v1f64 (bitconvert (v2i32  VPR64:$src))), (v1f64 VPR64:$src)>;
6432 def : Pat<(v1f64 (bitconvert (v4i16  VPR64:$src))), (v1f64 VPR64:$src)>;
6433 def : Pat<(v1f64 (bitconvert (v8i8  VPR64:$src))), (v1f64 VPR64:$src)>;
6434 def : Pat<(v1f64 (bitconvert (f64  VPR64:$src))), (v1f64 VPR64:$src)>;
6435
6436 // ..and 128-bit vector bitcasts...
6437
6438 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
6439 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
6440 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
6441 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
6442 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
6443
6444 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
6445 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
6446 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
6447 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
6448 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
6449
6450 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
6451 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
6452 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
6453 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
6454 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
6455
6456 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
6457 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
6458 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
6459 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
6460 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
6461
6462 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
6463 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
6464 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
6465 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
6466 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
6467
6468 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
6469 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
6470 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
6471 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
6472 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
6473
6474 // ...and scalar bitcasts...
6475 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
6476 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
6477 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
6478 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
6479
6480 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
6481 def : Pat<(i64 (bitconvert (v1f64  FPR64:$src))), (FMOVxd $src)>;
6482 def : Pat<(i64 (bitconvert (v2i32  FPR64:$src))), (FMOVxd $src)>;
6483 def : Pat<(i64 (bitconvert (v2f32  FPR64:$src))), (FMOVxd $src)>;
6484 def : Pat<(i64 (bitconvert (v4i16  FPR64:$src))), (FMOVxd $src)>;
6485 def : Pat<(i64 (bitconvert (v8i8  FPR64:$src))), (FMOVxd $src)>;
6486
6487 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
6488
6489 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6490 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6491 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6492
6493 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
6494 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
6495 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
6496 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
6497 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
6498
6499 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
6500 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
6501 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
6502 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
6503 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
6504 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
6505
6506 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
6507 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
6508 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
6509 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
6510
6511 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6512 def : Pat<(v1f64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6513 def : Pat<(v2i32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6514 def : Pat<(v2f32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6515 def : Pat<(v4i16 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6516 def : Pat<(v8i8 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6517
6518 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
6519
6520 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
6521 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
6522 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
6523 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
6524 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
6525
6526 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
6527 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
6528 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
6529 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
6530 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
6531 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
6532
6533 // Scalar Three Same
6534
6535 def neon_uimm3 : Operand<i64>,
6536                    ImmLeaf<i64, [{return Imm < 8;}]> {
6537   let ParserMatchClass = uimm3_asmoperand;
6538   let PrintMethod = "printUImmHexOperand";
6539 }
6540
6541 def neon_uimm4 : Operand<i64>,
6542                    ImmLeaf<i64, [{return Imm < 16;}]> {
6543   let ParserMatchClass = uimm4_asmoperand;
6544   let PrintMethod = "printUImmHexOperand";
6545 }
6546
6547 // Bitwise Extract
6548 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6549                     string OpS, RegisterOperand OpVPR, Operand OpImm>
6550   : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6551                      (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6552                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6553                      ", $Rm." # OpS # ", $Index",
6554                      [],
6555                      NoItinerary>{
6556   bits<4> Index;
6557 }
6558
6559 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6560                                VPR64, neon_uimm3> {
6561   let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6562 }
6563
6564 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6565                                VPR128, neon_uimm4> {
6566   let Inst{14-11} = Index;
6567 }
6568
6569 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6570                  Operand OpImm>
6571   : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6572                                  (i64 OpImm:$Imm))),
6573               (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6574
6575 def : NI_Extract<v8i8,  VPR64,  EXTvvvi_8b,  neon_uimm3>;
6576 def : NI_Extract<v4i16, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6577 def : NI_Extract<v2i32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6578 def : NI_Extract<v1i64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6579 def : NI_Extract<v2f32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6580 def : NI_Extract<v1f64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6581 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6582 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6583 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6584 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6585 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6586 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6587
6588 // Table lookup
6589 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6590              string asmop, string OpS, RegisterOperand OpVPR,
6591              RegisterOperand VecList>
6592   : NeonI_TBL<q, op2, len, op,
6593               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6594               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6595               [],
6596               NoItinerary>;
6597
6598 // The vectors in look up table are always 16b
6599 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6600   def _8b  : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6601                     !cast<RegisterOperand>(List # "16B_operand")>;
6602
6603   def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6604                     !cast<RegisterOperand>(List # "16B_operand")>;
6605 }
6606
6607 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6608 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6609 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6610 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6611
6612 // Table lookup extension
6613 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6614              string asmop, string OpS, RegisterOperand OpVPR,
6615              RegisterOperand VecList>
6616   : NeonI_TBL<q, op2, len, op,
6617               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6618               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6619               [],
6620               NoItinerary> {
6621   let Constraints = "$src = $Rd";
6622 }
6623
6624 // The vectors in look up table are always 16b
6625 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6626   def _8b  : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6627                     !cast<RegisterOperand>(List # "16B_operand")>;
6628
6629   def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6630                     !cast<RegisterOperand>(List # "16B_operand")>;
6631 }
6632
6633 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6634 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6635 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6636 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6637
6638 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6639                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6640   : NeonI_copy<0b1, 0b0, 0b0011,
6641                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6642                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6643                [(set (ResTy VPR128:$Rd),
6644                  (ResTy (vector_insert
6645                    (ResTy VPR128:$src),
6646                    (OpTy OpGPR:$Rn),
6647                    (OpImm:$Imm))))],
6648                NoItinerary> {
6649   bits<4> Imm;
6650   let Constraints = "$src = $Rd";
6651 }
6652
6653 //Insert element (vector, from main)
6654 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6655                            neon_uimm4_bare> {
6656   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6657 }
6658 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6659                            neon_uimm3_bare> {
6660   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6661 }
6662 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6663                            neon_uimm2_bare> {
6664   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6665 }
6666 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6667                            neon_uimm1_bare> {
6668   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6669 }
6670
6671 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6672                     (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6673 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6674                     (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6675 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6676                     (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6677 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6678                     (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6679
6680 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6681                              RegisterClass OpGPR, ValueType OpTy,
6682                              Operand OpImm, Instruction INS>
6683   : Pat<(ResTy (vector_insert
6684               (ResTy VPR64:$src),
6685               (OpTy OpGPR:$Rn),
6686               (OpImm:$Imm))),
6687         (ResTy (EXTRACT_SUBREG
6688           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6689             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6690
6691 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6692                                           neon_uimm3_bare, INSbw>;
6693 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6694                                           neon_uimm2_bare, INShw>;
6695 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6696                                           neon_uimm1_bare, INSsw>;
6697 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6698                                           neon_uimm0_bare, INSdx>;
6699
6700 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6701   : NeonI_insert<0b1, 0b1,
6702                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6703                  ResImm:$Immd, ResImm:$Immn),
6704                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6705                  [],
6706                  NoItinerary> {
6707   let Constraints = "$src = $Rd";
6708   bits<4> Immd;
6709   bits<4> Immn;
6710 }
6711
6712 //Insert element (vector, from element)
6713 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6714   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6715   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6716 }
6717 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6718   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6719   let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6720   // bit 11 is unspecified, but should be set to zero.
6721 }
6722 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6723   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6724   let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6725   // bits 11-12 are unspecified, but should be set to zero.
6726 }
6727 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6728   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6729   let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6730   // bits 11-13 are unspecified, but should be set to zero.
6731 }
6732
6733 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6734                     (INSELb VPR128:$Rd, VPR128:$Rn,
6735                       neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6736 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6737                     (INSELh VPR128:$Rd, VPR128:$Rn,
6738                       neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6739 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6740                     (INSELs VPR128:$Rd, VPR128:$Rn,
6741                       neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6742 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6743                     (INSELd VPR128:$Rd, VPR128:$Rn,
6744                       neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6745
6746 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6747                                 ValueType MidTy, Operand StImm, Operand NaImm,
6748                                 Instruction INS> {
6749 def : Pat<(ResTy (vector_insert
6750             (ResTy VPR128:$src),
6751             (MidTy (vector_extract
6752               (ResTy VPR128:$Rn),
6753               (StImm:$Immn))),
6754             (StImm:$Immd))),
6755           (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6756               StImm:$Immd, StImm:$Immn)>;
6757
6758 def : Pat <(ResTy (vector_insert
6759              (ResTy VPR128:$src),
6760              (MidTy (vector_extract
6761                (NaTy VPR64:$Rn),
6762                (NaImm:$Immn))),
6763              (StImm:$Immd))),
6764            (INS (ResTy VPR128:$src),
6765              (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6766              StImm:$Immd, NaImm:$Immn)>;
6767
6768 def : Pat <(NaTy (vector_insert
6769              (NaTy VPR64:$src),
6770              (MidTy (vector_extract
6771                (ResTy VPR128:$Rn),
6772                (StImm:$Immn))),
6773              (NaImm:$Immd))),
6774            (NaTy (EXTRACT_SUBREG
6775              (ResTy (INS
6776                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6777                (ResTy VPR128:$Rn),
6778                NaImm:$Immd, StImm:$Immn)),
6779              sub_64))>;
6780
6781 def : Pat <(NaTy (vector_insert
6782              (NaTy VPR64:$src),
6783              (MidTy (vector_extract
6784                (NaTy VPR64:$Rn),
6785                (NaImm:$Immn))),
6786              (NaImm:$Immd))),
6787            (NaTy (EXTRACT_SUBREG
6788              (ResTy (INS
6789                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6790                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6791                NaImm:$Immd, NaImm:$Immn)),
6792              sub_64))>;
6793 }
6794
6795 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6796                             neon_uimm1_bare, INSELs>;
6797 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6798                             neon_uimm0_bare, INSELd>;
6799 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6800                             neon_uimm3_bare, INSELb>;
6801 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6802                             neon_uimm2_bare, INSELh>;
6803 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6804                             neon_uimm1_bare, INSELs>;
6805 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6806                             neon_uimm0_bare, INSELd>;
6807
6808 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6809                                       ValueType MidTy,
6810                                       RegisterClass OpFPR, Operand ResImm,
6811                                       SubRegIndex SubIndex, Instruction INS> {
6812 def : Pat <(ResTy (vector_insert
6813              (ResTy VPR128:$src),
6814              (MidTy OpFPR:$Rn),
6815              (ResImm:$Imm))),
6816            (INS (ResTy VPR128:$src),
6817              (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6818              ResImm:$Imm,
6819              (i64 0))>;
6820
6821 def : Pat <(NaTy (vector_insert
6822              (NaTy VPR64:$src),
6823              (MidTy OpFPR:$Rn),
6824              (ResImm:$Imm))),
6825            (NaTy (EXTRACT_SUBREG
6826              (ResTy (INS
6827                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6828                (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6829                ResImm:$Imm,
6830                (i64 0))),
6831              sub_64))>;
6832 }
6833
6834 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6835                                   sub_32, INSELs>;
6836 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6837                                   sub_64, INSELd>;
6838
6839 class NeonI_SMOV<string asmop, string Res, bit Q,
6840                  ValueType OpTy, ValueType eleTy,
6841                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6842   : NeonI_copy<Q, 0b0, 0b0101,
6843                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6844                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6845                [(set (ResTy ResGPR:$Rd),
6846                  (ResTy (sext_inreg
6847                    (ResTy (vector_extract
6848                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
6849                    eleTy)))],
6850                NoItinerary> {
6851   bits<4> Imm;
6852 }
6853
6854 //Signed integer move (main, from element)
6855 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6856                         GPR32, i32> {
6857   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6858 }
6859 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6860                         GPR32, i32> {
6861   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6862 }
6863 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6864                         GPR64, i64> {
6865   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6866 }
6867 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6868                         GPR64, i64> {
6869   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6870 }
6871 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6872                         GPR64, i64> {
6873   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6874 }
6875
6876 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6877                                ValueType eleTy, Operand StImm,  Operand NaImm,
6878                                Instruction SMOVI> {
6879   def : Pat<(i64 (sext_inreg
6880               (i64 (anyext
6881                 (i32 (vector_extract
6882                   (StTy VPR128:$Rn), (StImm:$Imm))))),
6883               eleTy)),
6884             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6885
6886   def : Pat<(i64 (sext
6887               (i32 (vector_extract
6888                 (StTy VPR128:$Rn), (StImm:$Imm))))),
6889             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6890
6891   def : Pat<(i64 (sext_inreg
6892               (i64 (vector_extract
6893                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6894               eleTy)),
6895             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6896               NaImm:$Imm)>;
6897
6898   def : Pat<(i64 (sext_inreg
6899               (i64 (anyext
6900                 (i32 (vector_extract
6901                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6902               eleTy)),
6903             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6904               NaImm:$Imm)>;
6905
6906   def : Pat<(i64 (sext
6907               (i32 (vector_extract
6908                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6909             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6910               NaImm:$Imm)>;
6911 }
6912
6913 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6914                           neon_uimm3_bare, SMOVxb>;
6915 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6916                           neon_uimm2_bare, SMOVxh>;
6917 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6918                           neon_uimm1_bare, SMOVxs>;
6919
6920 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6921                           ValueType eleTy, Operand StImm,  Operand NaImm,
6922                           Instruction SMOVI>
6923   : Pat<(i32 (sext_inreg
6924           (i32 (vector_extract
6925             (NaTy VPR64:$Rn), (NaImm:$Imm))),
6926           eleTy)),
6927         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6928           NaImm:$Imm)>;
6929
6930 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6931                          neon_uimm3_bare, SMOVwb>;
6932 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6933                          neon_uimm2_bare, SMOVwh>;
6934
6935 class NeonI_UMOV<string asmop, string Res, bit Q,
6936                  ValueType OpTy, Operand OpImm,
6937                  RegisterClass ResGPR, ValueType ResTy>
6938   : NeonI_copy<Q, 0b0, 0b0111,
6939                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6940                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6941                [(set (ResTy ResGPR:$Rd),
6942                   (ResTy (vector_extract
6943                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6944                NoItinerary> {
6945   bits<4> Imm;
6946 }
6947
6948 //Unsigned integer move (main, from element)
6949 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6950                          GPR32, i32> {
6951   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6952 }
6953 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6954                          GPR32, i32> {
6955   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6956 }
6957 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6958                          GPR32, i32> {
6959   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6960 }
6961 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6962                          GPR64, i64> {
6963   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6964 }
6965
6966 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6967                     (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6968 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6969                     (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6970
6971 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6972                          Operand StImm,  Operand NaImm,
6973                          Instruction SMOVI>
6974   : Pat<(ResTy (vector_extract
6975           (NaTy VPR64:$Rn), NaImm:$Imm)),
6976         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6977           NaImm:$Imm)>;
6978
6979 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6980                         neon_uimm3_bare, UMOVwb>;
6981 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6982                         neon_uimm2_bare, UMOVwh>;
6983 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6984                         neon_uimm1_bare, UMOVws>;
6985
6986 def : Pat<(i32 (and
6987             (i32 (vector_extract
6988               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6989             255)),
6990           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6991
6992 def : Pat<(i32 (and
6993             (i32 (vector_extract
6994               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6995             65535)),
6996           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6997
6998 def : Pat<(i64 (zext
6999             (i32 (vector_extract
7000               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
7001           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
7002
7003 def : Pat<(i32 (and
7004             (i32 (vector_extract
7005               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
7006             255)),
7007           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7008             neon_uimm3_bare:$Imm)>;
7009
7010 def : Pat<(i32 (and
7011             (i32 (vector_extract
7012               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
7013             65535)),
7014           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7015             neon_uimm2_bare:$Imm)>;
7016
7017 def : Pat<(i64 (zext
7018             (i32 (vector_extract
7019               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
7020           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7021             neon_uimm0_bare:$Imm)>;
7022
7023 // Additional copy patterns for scalar types
7024 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
7025           (UMOVwb (v16i8
7026             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
7027
7028 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
7029           (UMOVwh (v8i16
7030             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
7031
7032 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
7033           (FMOVws FPR32:$Rn)>;
7034
7035 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
7036           (FMOVxd FPR64:$Rn)>;
7037
7038 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
7039           (f64 FPR64:$Rn)>;
7040
7041 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
7042           (v1i8 (EXTRACT_SUBREG (v16i8
7043             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
7044             sub_8))>;
7045
7046 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
7047           (v1i16 (EXTRACT_SUBREG (v8i16
7048             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
7049             sub_16))>;
7050
7051 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
7052           (FMOVsw $src)>;
7053
7054 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
7055           (FMOVdx $src)>;
7056
7057 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
7058           (v8i8 (EXTRACT_SUBREG (v16i8
7059             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
7060             sub_64))>;
7061
7062 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
7063           (v4i16 (EXTRACT_SUBREG (v8i16
7064             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
7065             sub_64))>;
7066
7067 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
7068           (v2i32 (EXTRACT_SUBREG (v16i8
7069             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
7070             sub_64))>;
7071
7072 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
7073           (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7074
7075 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
7076           (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7077
7078 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
7079           (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7080
7081 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
7082           (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7083
7084 def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
7085           (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7086 def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
7087           (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7088
7089 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
7090           (v1f64 FPR64:$Rn)>;
7091
7092 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
7093           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
7094                          (f64 FPR64:$src), sub_64)>;
7095
7096 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
7097                     RegisterOperand ResVPR, Operand OpImm>
7098   : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
7099                (ins VPR128:$Rn, OpImm:$Imm),
7100                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
7101                [],
7102                NoItinerary> {
7103   bits<4> Imm;
7104 }
7105
7106 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
7107                               neon_uimm4_bare> {
7108   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7109 }
7110
7111 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
7112                               neon_uimm3_bare> {
7113   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7114 }
7115
7116 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
7117                               neon_uimm2_bare> {
7118   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7119 }
7120
7121 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
7122                               neon_uimm1_bare> {
7123   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
7124 }
7125
7126 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
7127                               neon_uimm4_bare> {
7128   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7129 }
7130
7131 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
7132                               neon_uimm3_bare> {
7133   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7134 }
7135
7136 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
7137                               neon_uimm2_bare> {
7138   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7139 }
7140
7141 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
7142                                        ValueType OpTy,ValueType NaTy,
7143                                        ValueType ExTy, Operand OpLImm,
7144                                        Operand OpNImm> {
7145 def  : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
7146         (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
7147
7148 def : Pat<(ResTy (Neon_vduplane
7149             (NaTy VPR64:$Rn), OpNImm:$Imm)),
7150           (ResTy (DUPELT
7151             (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
7152 }
7153 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
7154                              neon_uimm4_bare, neon_uimm3_bare>;
7155 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
7156                              neon_uimm4_bare, neon_uimm3_bare>;
7157 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
7158                              neon_uimm3_bare, neon_uimm2_bare>;
7159 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
7160                              neon_uimm3_bare, neon_uimm2_bare>;
7161 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
7162                              neon_uimm2_bare, neon_uimm1_bare>;
7163 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
7164                              neon_uimm2_bare, neon_uimm1_bare>;
7165 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
7166                              neon_uimm1_bare, neon_uimm0_bare>;
7167 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
7168                              neon_uimm2_bare, neon_uimm1_bare>;
7169 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
7170                              neon_uimm2_bare, neon_uimm1_bare>;
7171 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
7172                              neon_uimm1_bare, neon_uimm0_bare>;
7173
7174 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
7175           (v2f32 (DUPELT2s
7176             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7177             (i64 0)))>;
7178 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
7179           (v4f32 (DUPELT4s
7180             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7181             (i64 0)))>;
7182 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
7183           (v2f64 (DUPELT2d
7184             (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
7185             (i64 0)))>;
7186
7187 multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy,
7188                              ValueType OpTy, RegisterClass OpRC,
7189                              Operand OpNImm, SubRegIndex SubIndex> {
7190 def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)),
7191           (ResTy (DUPELT
7192             (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>;
7193 }
7194
7195 defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>;
7196 defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>;
7197 defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>;
7198 defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>;
7199 defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>;
7200
7201 class NeonI_DUP<bit Q, string asmop, string rdlane,
7202                 RegisterOperand ResVPR, ValueType ResTy,
7203                 RegisterClass OpGPR, ValueType OpTy>
7204   : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
7205                asmop # "\t$Rd" # rdlane # ", $Rn",
7206                [(set (ResTy ResVPR:$Rd),
7207                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
7208                NoItinerary>;
7209
7210 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
7211   let Inst{20-16} = 0b00001;
7212   // bits 17-20 are unspecified, but should be set to zero.
7213 }
7214
7215 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
7216   let Inst{20-16} = 0b00010;
7217   // bits 18-20 are unspecified, but should be set to zero.
7218 }
7219
7220 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
7221   let Inst{20-16} = 0b00100;
7222   // bits 19-20 are unspecified, but should be set to zero.
7223 }
7224
7225 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
7226   let Inst{20-16} = 0b01000;
7227   // bit 20 is unspecified, but should be set to zero.
7228 }
7229
7230 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
7231   let Inst{20-16} = 0b00001;
7232   // bits 17-20 are unspecified, but should be set to zero.
7233 }
7234
7235 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
7236   let Inst{20-16} = 0b00010;
7237   // bits 18-20 are unspecified, but should be set to zero.
7238 }
7239
7240 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
7241   let Inst{20-16} = 0b00100;
7242   // bits 19-20 are unspecified, but should be set to zero.
7243 }
7244
7245 // patterns for CONCAT_VECTORS
7246 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
7247 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
7248           (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
7249 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
7250           (INSELd
7251             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7252             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
7253             (i64 1),
7254             (i64 0))>;
7255 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
7256           (DUPELT2d
7257             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7258             (i64 0))> ;
7259 }
7260
7261 defm : Concat_Vector_Pattern<v16i8, v8i8>;
7262 defm : Concat_Vector_Pattern<v8i16, v4i16>;
7263 defm : Concat_Vector_Pattern<v4i32, v2i32>;
7264 defm : Concat_Vector_Pattern<v2i64, v1i64>;
7265 defm : Concat_Vector_Pattern<v4f32, v2f32>;
7266 defm : Concat_Vector_Pattern<v2f64, v1f64>;
7267
7268 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)),
7269           (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>;
7270 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
7271           (EXTRACT_SUBREG 
7272             (v4i32 (INSELs
7273               (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)),
7274               (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
7275               (i64 1),
7276               (i64 0))),
7277             sub_64)>;
7278 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))),
7279           (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
7280
7281 //patterns for EXTRACT_SUBVECTOR
7282 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
7283           (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7284 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
7285           (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7286 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
7287           (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7288 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
7289           (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7290 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
7291           (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7292 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
7293           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7294
7295 // The followings are for instruction class (3V Elem)
7296
7297 // Variant 1
7298
7299 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
7300              string asmop, string ResS, string OpS, string EleOpS,
7301              Operand OpImm, RegisterOperand ResVPR,
7302              RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7303   : NeonI_2VElem<q, u, size, opcode,
7304                  (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
7305                                          EleOpVPR:$Re, OpImm:$Index),
7306                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7307                  ", $Re." # EleOpS # "[$Index]",
7308                  [],
7309                  NoItinerary> {
7310   bits<3> Index;
7311   bits<5> Re;
7312
7313   let Constraints = "$src = $Rd";
7314 }
7315
7316 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
7317   // vector register class for element is always 128-bit to cover the max index
7318   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7319                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7320     let Inst{11} = {Index{1}};
7321     let Inst{21} = {Index{0}};
7322     let Inst{20-16} = Re;
7323   }
7324
7325   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7326                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7327     let Inst{11} = {Index{1}};
7328     let Inst{21} = {Index{0}};
7329     let Inst{20-16} = Re;
7330   }
7331
7332   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7333   def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7334                      neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7335     let Inst{11} = {Index{2}};
7336     let Inst{21} = {Index{1}};
7337     let Inst{20} = {Index{0}};
7338     let Inst{19-16} = Re{3-0};
7339   }
7340
7341   def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7342                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7343     let Inst{11} = {Index{2}};
7344     let Inst{21} = {Index{1}};
7345     let Inst{20} = {Index{0}};
7346     let Inst{19-16} = Re{3-0};
7347   }
7348 }
7349
7350 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7351 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7352
7353 // Pattern for lane in 128-bit vector
7354 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7355                    RegisterOperand ResVPR, RegisterOperand OpVPR,
7356                    RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7357                    ValueType EleOpTy>
7358   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7359           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7360         (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7361
7362 // Pattern for lane in 64-bit vector
7363 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7364                   RegisterOperand ResVPR, RegisterOperand OpVPR,
7365                   RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7366                   ValueType EleOpTy>
7367   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7368           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7369         (INST ResVPR:$src, OpVPR:$Rn,
7370           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7371
7372 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7373 {
7374   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7375                      op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7376
7377   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7378                      op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7379
7380   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7381                      op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7382
7383   def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7384                      op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7385
7386   // Index can only be half of the max value for lane in 64-bit vector
7387
7388   def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7389                     op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7390
7391   def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7392                     op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7393 }
7394
7395 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7396 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7397
7398 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7399                  string asmop, string ResS, string OpS, string EleOpS,
7400                  Operand OpImm, RegisterOperand ResVPR,
7401                  RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7402   : NeonI_2VElem<q, u, size, opcode,
7403                  (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7404                                          EleOpVPR:$Re, OpImm:$Index),
7405                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7406                  ", $Re." # EleOpS # "[$Index]",
7407                  [],
7408                  NoItinerary> {
7409   bits<3> Index;
7410   bits<5> Re;
7411 }
7412
7413 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7414   // vector register class for element is always 128-bit to cover the max index
7415   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7416                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7417     let Inst{11} = {Index{1}};
7418     let Inst{21} = {Index{0}};
7419     let Inst{20-16} = Re;
7420   }
7421
7422   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7423                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7424     let Inst{11} = {Index{1}};
7425     let Inst{21} = {Index{0}};
7426     let Inst{20-16} = Re;
7427   }
7428
7429   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7430   def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7431                          neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7432     let Inst{11} = {Index{2}};
7433     let Inst{21} = {Index{1}};
7434     let Inst{20} = {Index{0}};
7435     let Inst{19-16} = Re{3-0};
7436   }
7437
7438   def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7439                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7440     let Inst{11} = {Index{2}};
7441     let Inst{21} = {Index{1}};
7442     let Inst{20} = {Index{0}};
7443     let Inst{19-16} = Re{3-0};
7444   }
7445 }
7446
7447 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7448 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7449 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7450
7451 // Pattern for lane in 128-bit vector
7452 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7453                        RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7454                        ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7455   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7456           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7457         (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7458
7459 // Pattern for lane in 64-bit vector
7460 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7461                       RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7462                       ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7463   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7464           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7465         (INST OpVPR:$Rn,
7466           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7467
7468 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7469   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7470                          op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7471
7472   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7473                          op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7474
7475   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7476                          op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7477
7478   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7479                          op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7480
7481   // Index can only be half of the max value for lane in 64-bit vector
7482
7483   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7484                         op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7485
7486   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7487                         op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7488 }
7489
7490 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7491 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7492 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7493
7494 // Variant 2
7495
7496 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7497   // vector register class for element is always 128-bit to cover the max index
7498   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7499                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7500     let Inst{11} = {Index{1}};
7501     let Inst{21} = {Index{0}};
7502     let Inst{20-16} = Re;
7503   }
7504
7505   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7506                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7507     let Inst{11} = {Index{1}};
7508     let Inst{21} = {Index{0}};
7509     let Inst{20-16} = Re;
7510   }
7511
7512   // _1d2d doesn't exist!
7513
7514   def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7515                          neon_uimm1_bare, VPR128, VPR128, VPR128> {
7516     let Inst{11} = {Index{0}};
7517     let Inst{21} = 0b0;
7518     let Inst{20-16} = Re;
7519   }
7520 }
7521
7522 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7523 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7524
7525 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7526                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7527                          ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7528                          SDPatternOperator coreop>
7529   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7530           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7531         (INST OpVPR:$Rn,
7532           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7533
7534 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7535   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7536                          op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7537
7538   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7539                          op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7540
7541   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7542                          op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7543
7544   // Index can only be half of the max value for lane in 64-bit vector
7545
7546   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7547                         op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7548
7549   def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7550                            op, VPR128, VPR64, v2f64, v2f64, v1f64,
7551                            BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7552 }
7553
7554 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7555 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7556
7557 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7558                        (v2f32 VPR64:$Rn))),
7559           (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7560
7561 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7562                        (v4f32 VPR128:$Rn))),
7563           (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7564
7565 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7566                        (v2f64 VPR128:$Rn))),
7567           (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7568
7569 // The followings are patterns using fma
7570 // -ffp-contract=fast generates fma
7571
7572 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7573   // vector register class for element is always 128-bit to cover the max index
7574   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7575                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7576     let Inst{11} = {Index{1}};
7577     let Inst{21} = {Index{0}};
7578     let Inst{20-16} = Re;
7579   }
7580
7581   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7582                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7583     let Inst{11} = {Index{1}};
7584     let Inst{21} = {Index{0}};
7585     let Inst{20-16} = Re;
7586   }
7587
7588   // _1d2d doesn't exist!
7589
7590   def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7591                      neon_uimm1_bare, VPR128, VPR128, VPR128> {
7592     let Inst{11} = {Index{0}};
7593     let Inst{21} = 0b0;
7594     let Inst{20-16} = Re;
7595   }
7596 }
7597
7598 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7599 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7600
7601 // Pattern for lane in 128-bit vector
7602 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7603                        RegisterOperand ResVPR, RegisterOperand OpVPR,
7604                        ValueType ResTy, ValueType OpTy,
7605                        SDPatternOperator coreop>
7606   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7607                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7608         (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7609
7610 // Pattern for lane 0
7611 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7612                       RegisterOperand ResVPR, ValueType ResTy>
7613   : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7614                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7615                    (ResTy ResVPR:$src))),
7616         (INST ResVPR:$src, ResVPR:$Rn,
7617               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7618
7619 // Pattern for lane in 64-bit vector
7620 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7621                       RegisterOperand ResVPR, RegisterOperand OpVPR,
7622                       ValueType ResTy, ValueType OpTy,
7623                       SDPatternOperator coreop>
7624   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7625                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7626         (INST ResVPR:$src, ResVPR:$Rn,
7627           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7628
7629 // Pattern for lane in 64-bit vector
7630 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7631                            SDPatternOperator op,
7632                            RegisterOperand ResVPR, RegisterOperand OpVPR,
7633                            ValueType ResTy, ValueType OpTy,
7634                            SDPatternOperator coreop>
7635   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7636                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7637         (INST ResVPR:$src, ResVPR:$Rn,
7638           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7639
7640
7641 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7642   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7643                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7644                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7645
7646   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7647                         op, VPR64, v2f32>;
7648
7649   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7650                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7651                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7652
7653   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7654                         op, VPR128, v4f32>;
7655
7656   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7657                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7658                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7659
7660   // Index can only be half of the max value for lane in 64-bit vector
7661
7662   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7663                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7664                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7665
7666   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7667                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7668                              BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7669 }
7670
7671 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7672
7673 // Pattern for lane 0
7674 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7675                       RegisterOperand ResVPR, ValueType ResTy>
7676   : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7677                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7678                    (ResTy ResVPR:$src))),
7679         (INST ResVPR:$src, ResVPR:$Rn,
7680               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7681
7682 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7683 {
7684   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7685                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7686                          BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7687
7688   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7689                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7690                          BinOpFrag<(Neon_vduplane
7691                                      (fneg node:$LHS), node:$RHS)>>;
7692
7693   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7694                         op, VPR64, v2f32>;
7695
7696   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7697                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7698                          BinOpFrag<(fneg (Neon_vduplane
7699                                      node:$LHS, node:$RHS))>>;
7700
7701   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7702                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7703                          BinOpFrag<(Neon_vduplane
7704                                      (fneg node:$LHS), node:$RHS)>>;
7705
7706   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7707                         op, VPR128, v4f32>;
7708
7709   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7710                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7711                          BinOpFrag<(fneg (Neon_vduplane
7712                                      node:$LHS, node:$RHS))>>;
7713
7714   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7715                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7716                          BinOpFrag<(Neon_vduplane
7717                                      (fneg node:$LHS), node:$RHS)>>;
7718
7719   // Index can only be half of the max value for lane in 64-bit vector
7720
7721   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7722                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7723                         BinOpFrag<(fneg (Neon_vduplane
7724                                     node:$LHS, node:$RHS))>>;
7725
7726   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7727                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7728                         BinOpFrag<(Neon_vduplane
7729                                     (fneg node:$LHS), node:$RHS)>>;
7730
7731   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7732                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7733                         BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7734
7735   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7736                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7737                         BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7738
7739   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7740                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7741                              BinOpFrag<(fneg (Neon_combine_2d
7742                                          node:$LHS, node:$RHS))>>;
7743
7744   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7745                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7746                              BinOpFrag<(Neon_combine_2d
7747                                          (fneg node:$LHS), (fneg node:$RHS))>>;
7748 }
7749
7750 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7751
7752 // Variant 3: Long type
7753 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7754 //      SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7755
7756 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7757   // vector register class for element is always 128-bit to cover the max index
7758   def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7759                      neon_uimm2_bare, VPR128, VPR64, VPR128> {
7760     let Inst{11} = {Index{1}};
7761     let Inst{21} = {Index{0}};
7762     let Inst{20-16} = Re;
7763   }
7764
7765   def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7766                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7767     let Inst{11} = {Index{1}};
7768     let Inst{21} = {Index{0}};
7769     let Inst{20-16} = Re;
7770   }
7771
7772   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7773   def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7774                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7775     let Inst{11} = {Index{2}};
7776     let Inst{21} = {Index{1}};
7777     let Inst{20} = {Index{0}};
7778     let Inst{19-16} = Re{3-0};
7779   }
7780
7781   def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7782                      neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7783     let Inst{11} = {Index{2}};
7784     let Inst{21} = {Index{1}};
7785     let Inst{20} = {Index{0}};
7786     let Inst{19-16} = Re{3-0};
7787   }
7788 }
7789
7790 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7791 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7792 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7793 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7794 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7795 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7796
7797 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7798   // vector register class for element is always 128-bit to cover the max index
7799   def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7800                          neon_uimm2_bare, VPR128, VPR64, VPR128> {
7801     let Inst{11} = {Index{1}};
7802     let Inst{21} = {Index{0}};
7803     let Inst{20-16} = Re;
7804   }
7805
7806   def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7807                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7808     let Inst{11} = {Index{1}};
7809     let Inst{21} = {Index{0}};
7810     let Inst{20-16} = Re;
7811   }
7812
7813   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7814   def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7815                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7816     let Inst{11} = {Index{2}};
7817     let Inst{21} = {Index{1}};
7818     let Inst{20} = {Index{0}};
7819     let Inst{19-16} = Re{3-0};
7820   }
7821
7822   def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7823                          neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7824     let Inst{11} = {Index{2}};
7825     let Inst{21} = {Index{1}};
7826     let Inst{20} = {Index{0}};
7827     let Inst{19-16} = Re{3-0};
7828   }
7829 }
7830
7831 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7832 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7833 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7834
7835 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7836           (FMOVdd $src)>;
7837
7838 // Pattern for lane in 128-bit vector
7839 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7840                      RegisterOperand EleOpVPR, ValueType ResTy,
7841                      ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7842                      SDPatternOperator hiop>
7843   : Pat<(ResTy (op (ResTy VPR128:$src),
7844           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7845           (HalfOpTy (Neon_vduplane
7846                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7847         (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7848
7849 // Pattern for lane in 64-bit vector
7850 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7851                     RegisterOperand EleOpVPR, ValueType ResTy,
7852                     ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7853                     SDPatternOperator hiop>
7854   : Pat<(ResTy (op (ResTy VPR128:$src),
7855           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7856           (HalfOpTy (Neon_vduplane
7857                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7858         (INST VPR128:$src, VPR128:$Rn,
7859           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7860
7861 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7862                      ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7863                      SDPatternOperator hiop, Instruction DupInst>
7864   : Pat<(ResTy (op (ResTy VPR128:$src),
7865           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7866           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7867         (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7868
7869 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7870   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7871                      op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7872
7873   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7874                      op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7875
7876   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7877                        op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7878
7879   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7880                        op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7881
7882   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7883                        op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7884
7885   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7886                        op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7887
7888   // Index can only be half of the max value for lane in 64-bit vector
7889
7890   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7891                     op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7892
7893   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7894                     op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7895
7896   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7897                       op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7898
7899   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7900                       op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7901 }
7902
7903 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7904 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7905 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7906 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7907
7908 // Pattern for lane in 128-bit vector
7909 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7910                          RegisterOperand EleOpVPR, ValueType ResTy,
7911                          ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7912                          SDPatternOperator hiop>
7913   : Pat<(ResTy (op
7914           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7915           (HalfOpTy (Neon_vduplane
7916                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7917         (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7918
7919 // Pattern for lane in 64-bit vector
7920 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7921                         RegisterOperand EleOpVPR, ValueType ResTy,
7922                         ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7923                         SDPatternOperator hiop>
7924   : Pat<(ResTy (op
7925           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7926           (HalfOpTy (Neon_vduplane
7927                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7928         (INST VPR128:$Rn,
7929           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7930
7931 // Pattern for fixed lane 0
7932 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7933                          ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7934                          SDPatternOperator hiop, Instruction DupInst>
7935   : Pat<(ResTy (op
7936           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7937           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7938         (INST VPR128:$Rn, (DupInst $Re), 0)>;
7939
7940 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7941   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7942                          op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7943
7944   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7945                          op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7946
7947   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7948                          op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7949
7950   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7951                            op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7952
7953   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7954                            op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7955
7956   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7957                            op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7958
7959   // Index can only be half of the max value for lane in 64-bit vector
7960
7961   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7962                         op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7963
7964   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7965                         op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7966
7967   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7968                           op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7969
7970   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7971                           op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7972 }
7973
7974 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7975 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7976 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7977
7978 multiclass NI_qdma<SDPatternOperator op> {
7979   def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7980                     (op node:$Ra,
7981                       (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7982
7983   def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7984                     (op node:$Ra,
7985                       (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7986 }
7987
7988 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7989 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7990
7991 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7992   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7993                      !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7994                      v4i32, v4i16, v8i16>;
7995
7996   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7997                      !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7998                      v2i64, v2i32, v4i32>;
7999
8000   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
8001                        !cast<PatFrag>(op # "_4s"), VPR128Lo,
8002                        v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
8003
8004   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
8005                        !cast<PatFrag>(op # "_2d"), VPR128,
8006                        v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
8007
8008   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
8009                        !cast<PatFrag>(op # "_4s"),
8010                        v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
8011
8012   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
8013                        !cast<PatFrag>(op # "_2d"),
8014                        v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
8015
8016   // Index can only be half of the max value for lane in 64-bit vector
8017
8018   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
8019                     !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
8020                     v4i32, v4i16, v4i16>;
8021
8022   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
8023                     !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
8024                     v2i64, v2i32, v2i32>;
8025
8026   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
8027                       !cast<PatFrag>(op # "_4s"), VPR64Lo,
8028                       v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
8029
8030   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
8031                       !cast<PatFrag>(op # "_2d"), VPR64,
8032                       v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
8033 }
8034
8035 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
8036 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
8037
8038 // End of implementation for instruction class (3V Elem)
8039
8040 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
8041                 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
8042                 SDPatternOperator Neon_Rev>
8043   : NeonI_2VMisc<Q, U, size, opcode,
8044                (outs ResVPR:$Rd), (ins ResVPR:$Rn),
8045                asmop # "\t$Rd." # Res # ", $Rn." # Res,
8046                [(set (ResTy ResVPR:$Rd),
8047                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
8048                NoItinerary> ;
8049
8050 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
8051                           v16i8, Neon_rev64>;
8052 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
8053                          v8i16, Neon_rev64>;
8054 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
8055                          v4i32, Neon_rev64>;
8056 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
8057                          v8i8, Neon_rev64>;
8058 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
8059                          v4i16, Neon_rev64>;
8060 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
8061                          v2i32, Neon_rev64>;
8062
8063 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
8064 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
8065
8066 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
8067                           v16i8, Neon_rev32>;
8068 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
8069                           v8i16, Neon_rev32>;
8070 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
8071                          v8i8, Neon_rev32>;
8072 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
8073                          v4i16, Neon_rev32>;
8074
8075 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
8076                           v16i8, Neon_rev16>;
8077 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
8078                          v8i8, Neon_rev16>;
8079
8080 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
8081                              SDPatternOperator Neon_Padd> {
8082   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8083                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8084                            asmop # "\t$Rd.8h, $Rn.16b",
8085                            [(set (v8i16 VPR128:$Rd),
8086                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
8087                            NoItinerary>;
8088
8089   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8090                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8091                           asmop # "\t$Rd.4h, $Rn.8b",
8092                           [(set (v4i16 VPR64:$Rd),
8093                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
8094                           NoItinerary>;
8095
8096   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8097                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8098                            asmop # "\t$Rd.4s, $Rn.8h",
8099                            [(set (v4i32 VPR128:$Rd),
8100                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
8101                            NoItinerary>;
8102
8103   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8104                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8105                           asmop # "\t$Rd.2s, $Rn.4h",
8106                           [(set (v2i32 VPR64:$Rd),
8107                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
8108                           NoItinerary>;
8109
8110   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8111                            (outs VPR128:$Rd), (ins VPR128:$Rn),
8112                            asmop # "\t$Rd.2d, $Rn.4s",
8113                            [(set (v2i64 VPR128:$Rd),
8114                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
8115                            NoItinerary>;
8116
8117   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8118                           (outs VPR64:$Rd), (ins VPR64:$Rn),
8119                           asmop # "\t$Rd.1d, $Rn.2s",
8120                           [(set (v1i64 VPR64:$Rd),
8121                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
8122                           NoItinerary>;
8123 }
8124
8125 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
8126                                 int_arm_neon_vpaddls>;
8127 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
8128                                 int_arm_neon_vpaddlu>;
8129
8130 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
8131           (SADDLP2s1d $Rn)>;
8132 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
8133           (UADDLP2s1d $Rn)>;
8134
8135 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
8136                              SDPatternOperator Neon_Padd> {
8137   let Constraints = "$src = $Rd" in {
8138     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8139                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8140                              asmop # "\t$Rd.8h, $Rn.16b",
8141                              [(set (v8i16 VPR128:$Rd),
8142                                 (v8i16 (Neon_Padd
8143                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
8144                              NoItinerary>;
8145
8146     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8147                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8148                             asmop # "\t$Rd.4h, $Rn.8b",
8149                             [(set (v4i16 VPR64:$Rd),
8150                                (v4i16 (Neon_Padd
8151                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
8152                             NoItinerary>;
8153
8154     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8155                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8156                             asmop # "\t$Rd.4s, $Rn.8h",
8157                             [(set (v4i32 VPR128:$Rd),
8158                                (v4i32 (Neon_Padd
8159                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
8160                             NoItinerary>;
8161
8162     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8163                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8164                             asmop # "\t$Rd.2s, $Rn.4h",
8165                             [(set (v2i32 VPR64:$Rd),
8166                                (v2i32 (Neon_Padd
8167                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
8168                             NoItinerary>;
8169
8170     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8171                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8172                             asmop # "\t$Rd.2d, $Rn.4s",
8173                             [(set (v2i64 VPR128:$Rd),
8174                                (v2i64 (Neon_Padd
8175                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
8176                             NoItinerary>;
8177
8178     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8179                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8180                             asmop # "\t$Rd.1d, $Rn.2s",
8181                             [(set (v1i64 VPR64:$Rd),
8182                                (v1i64 (Neon_Padd
8183                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
8184                             NoItinerary>;
8185   }
8186 }
8187
8188 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
8189                                    int_arm_neon_vpadals>;
8190 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
8191                                    int_arm_neon_vpadalu>;
8192
8193 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
8194   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8195                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8196                          asmop # "\t$Rd.16b, $Rn.16b",
8197                          [], NoItinerary>;
8198
8199   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8200                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8201                         asmop # "\t$Rd.8h, $Rn.8h",
8202                         [], NoItinerary>;
8203
8204   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8205                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8206                         asmop # "\t$Rd.4s, $Rn.4s",
8207                         [], NoItinerary>;
8208
8209   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8210                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8211                         asmop # "\t$Rd.2d, $Rn.2d",
8212                         [], NoItinerary>;
8213
8214   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8215                          (outs VPR64:$Rd), (ins VPR64:$Rn),
8216                          asmop # "\t$Rd.8b, $Rn.8b",
8217                          [], NoItinerary>;
8218
8219   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8220                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8221                         asmop # "\t$Rd.4h, $Rn.4h",
8222                         [], NoItinerary>;
8223
8224   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8225                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8226                         asmop # "\t$Rd.2s, $Rn.2s",
8227                         [], NoItinerary>;
8228 }
8229
8230 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
8231 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
8232 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
8233 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
8234
8235 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
8236                                           SDPatternOperator Neon_Op> {
8237   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
8238             (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
8239
8240   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
8241             (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
8242
8243   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
8244             (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
8245
8246   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
8247             (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
8248
8249   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
8250             (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
8251
8252   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
8253             (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
8254
8255   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
8256             (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
8257 }
8258
8259 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
8260 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
8261 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
8262
8263 def : Pat<(v16i8 (sub
8264             (v16i8 Neon_AllZero),
8265             (v16i8 VPR128:$Rn))),
8266           (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
8267 def : Pat<(v8i8 (sub
8268             (v8i8 Neon_AllZero),
8269             (v8i8 VPR64:$Rn))),
8270           (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
8271 def : Pat<(v8i16 (sub
8272             (v8i16 (bitconvert (v16i8 Neon_AllZero))),
8273             (v8i16 VPR128:$Rn))),
8274           (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
8275 def : Pat<(v4i16 (sub
8276             (v4i16 (bitconvert (v8i8 Neon_AllZero))),
8277             (v4i16 VPR64:$Rn))),
8278           (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
8279 def : Pat<(v4i32 (sub
8280             (v4i32 (bitconvert (v16i8 Neon_AllZero))),
8281             (v4i32 VPR128:$Rn))),
8282           (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
8283 def : Pat<(v2i32 (sub
8284             (v2i32 (bitconvert (v8i8 Neon_AllZero))),
8285             (v2i32 VPR64:$Rn))),
8286           (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
8287 def : Pat<(v2i64 (sub
8288             (v2i64 (bitconvert (v16i8 Neon_AllZero))),
8289             (v2i64 VPR128:$Rn))),
8290           (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
8291
8292 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
8293   let Constraints = "$src = $Rd" in {
8294     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8295                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8296                            asmop # "\t$Rd.16b, $Rn.16b",
8297                            [], NoItinerary>;
8298
8299     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8300                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8301                           asmop # "\t$Rd.8h, $Rn.8h",
8302                           [], NoItinerary>;
8303
8304     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8305                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8306                           asmop # "\t$Rd.4s, $Rn.4s",
8307                           [], NoItinerary>;
8308
8309     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8310                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8311                           asmop # "\t$Rd.2d, $Rn.2d",
8312                           [], NoItinerary>;
8313
8314     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8315                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8316                           asmop # "\t$Rd.8b, $Rn.8b",
8317                           [], NoItinerary>;
8318
8319     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8320                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8321                           asmop # "\t$Rd.4h, $Rn.4h",
8322                           [], NoItinerary>;
8323
8324     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8325                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8326                           asmop # "\t$Rd.2s, $Rn.2s",
8327                           [], NoItinerary>;
8328   }
8329 }
8330
8331 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
8332 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
8333
8334 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
8335                                            SDPatternOperator Neon_Op> {
8336   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
8337             (v16i8 (!cast<Instruction>(Prefix # 16b)
8338               (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
8339
8340   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
8341             (v8i16 (!cast<Instruction>(Prefix # 8h)
8342               (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
8343
8344   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
8345             (v4i32 (!cast<Instruction>(Prefix # 4s)
8346               (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
8347
8348   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
8349             (v2i64 (!cast<Instruction>(Prefix # 2d)
8350               (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8351
8352   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8353             (v8i8 (!cast<Instruction>(Prefix # 8b)
8354               (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8355
8356   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8357             (v4i16 (!cast<Instruction>(Prefix # 4h)
8358               (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8359
8360   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8361             (v2i32 (!cast<Instruction>(Prefix # 2s)
8362               (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8363 }
8364
8365 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8366 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8367
8368 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8369                           SDPatternOperator Neon_Op> {
8370   def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8371                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8372                          asmop # "\t$Rd.16b, $Rn.16b",
8373                          [(set (v16i8 VPR128:$Rd),
8374                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8375                          NoItinerary>;
8376
8377   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8378                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8379                         asmop # "\t$Rd.8h, $Rn.8h",
8380                         [(set (v8i16 VPR128:$Rd),
8381                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8382                         NoItinerary>;
8383
8384   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8385                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8386                         asmop # "\t$Rd.4s, $Rn.4s",
8387                         [(set (v4i32 VPR128:$Rd),
8388                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8389                         NoItinerary>;
8390
8391   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8392                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8393                         asmop # "\t$Rd.8b, $Rn.8b",
8394                         [(set (v8i8 VPR64:$Rd),
8395                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8396                         NoItinerary>;
8397
8398   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8399                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8400                         asmop # "\t$Rd.4h, $Rn.4h",
8401                         [(set (v4i16 VPR64:$Rd),
8402                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8403                         NoItinerary>;
8404
8405   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8406                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8407                         asmop # "\t$Rd.2s, $Rn.2s",
8408                         [(set (v2i32 VPR64:$Rd),
8409                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8410                         NoItinerary>;
8411 }
8412
8413 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8414 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8415
8416 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8417                               bits<5> Opcode> {
8418   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8419                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8420                          asmop # "\t$Rd.16b, $Rn.16b",
8421                          [], NoItinerary>;
8422
8423   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8424                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8425                         asmop # "\t$Rd.8b, $Rn.8b",
8426                         [], NoItinerary>;
8427 }
8428
8429 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8430 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8431 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8432
8433 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8434                     (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8435 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8436                     (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8437
8438 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8439           (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8440 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8441           (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8442
8443 def : Pat<(v16i8 (xor
8444             (v16i8 VPR128:$Rn),
8445             (v16i8 Neon_AllOne))),
8446           (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8447 def : Pat<(v8i8 (xor
8448             (v8i8 VPR64:$Rn),
8449             (v8i8 Neon_AllOne))),
8450           (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8451 def : Pat<(v8i16 (xor
8452             (v8i16 VPR128:$Rn),
8453             (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8454           (NOT16b VPR128:$Rn)>;
8455 def : Pat<(v4i16 (xor
8456             (v4i16 VPR64:$Rn),
8457             (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8458           (NOT8b VPR64:$Rn)>;
8459 def : Pat<(v4i32 (xor
8460             (v4i32 VPR128:$Rn),
8461             (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8462           (NOT16b VPR128:$Rn)>;
8463 def : Pat<(v2i32 (xor
8464             (v2i32 VPR64:$Rn),
8465             (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8466           (NOT8b VPR64:$Rn)>;
8467 def : Pat<(v2i64 (xor
8468             (v2i64 VPR128:$Rn),
8469             (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8470           (NOT16b VPR128:$Rn)>;
8471
8472 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8473           (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8474 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8475           (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8476
8477 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8478                                 SDPatternOperator Neon_Op> {
8479   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8480                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8481                         asmop # "\t$Rd.4s, $Rn.4s",
8482                         [(set (v4f32 VPR128:$Rd),
8483                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8484                         NoItinerary>;
8485
8486   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8487                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8488                         asmop # "\t$Rd.2d, $Rn.2d",
8489                         [(set (v2f64 VPR128:$Rd),
8490                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8491                         NoItinerary>;
8492
8493   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8494                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8495                         asmop # "\t$Rd.2s, $Rn.2s",
8496                         [(set (v2f32 VPR64:$Rd),
8497                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8498                         NoItinerary>;
8499 }
8500
8501 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8502 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8503
8504 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8505   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8506                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8507                           asmop # "\t$Rd.8b, $Rn.8h",
8508                           [], NoItinerary>;
8509
8510   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8511                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8512                           asmop # "\t$Rd.4h, $Rn.4s",
8513                           [], NoItinerary>;
8514
8515   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8516                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8517                           asmop # "\t$Rd.2s, $Rn.2d",
8518                           [], NoItinerary>;
8519
8520   let Constraints = "$Rd = $src" in {
8521     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8522                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8523                              asmop # "2\t$Rd.16b, $Rn.8h",
8524                              [], NoItinerary>;
8525
8526     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8527                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8528                             asmop # "2\t$Rd.8h, $Rn.4s",
8529                             [], NoItinerary>;
8530
8531     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8532                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8533                             asmop # "2\t$Rd.4s, $Rn.2d",
8534                             [], NoItinerary>;
8535   }
8536 }
8537
8538 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8539 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8540 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8541 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8542
8543 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8544                                         SDPatternOperator Neon_Op> {
8545   def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8546             (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8547
8548   def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8549             (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8550
8551   def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8552             (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8553
8554   def : Pat<(v16i8 (concat_vectors
8555               (v8i8 VPR64:$src),
8556               (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8557             (!cast<Instruction>(Prefix # 8h16b)
8558               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8559               VPR128:$Rn)>;
8560
8561   def : Pat<(v8i16 (concat_vectors
8562               (v4i16 VPR64:$src),
8563               (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8564             (!cast<Instruction>(Prefix # 4s8h)
8565               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8566               VPR128:$Rn)>;
8567
8568   def : Pat<(v4i32 (concat_vectors
8569               (v2i32 VPR64:$src),
8570               (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8571             (!cast<Instruction>(Prefix # 2d4s)
8572               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8573               VPR128:$Rn)>;
8574 }
8575
8576 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8577 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8578 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8579 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8580
8581 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8582   let DecoderMethod = "DecodeSHLLInstruction" in {
8583     def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8584                             (outs VPR128:$Rd),
8585                             (ins VPR64:$Rn, uimm_exact8:$Imm),
8586                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8587                             [], NoItinerary>;
8588
8589     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8590                             (outs VPR128:$Rd),
8591                             (ins VPR64:$Rn, uimm_exact16:$Imm),
8592                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8593                             [], NoItinerary>;
8594
8595     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8596                             (outs VPR128:$Rd),
8597                             (ins VPR64:$Rn, uimm_exact32:$Imm),
8598                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8599                             [], NoItinerary>;
8600
8601     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8602                             (outs VPR128:$Rd),
8603                             (ins VPR128:$Rn, uimm_exact8:$Imm),
8604                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8605                             [], NoItinerary>;
8606
8607     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8608                             (outs VPR128:$Rd),
8609                             (ins VPR128:$Rn, uimm_exact16:$Imm),
8610                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8611                             [], NoItinerary>;
8612
8613     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8614                             (outs VPR128:$Rd),
8615                             (ins VPR128:$Rn, uimm_exact32:$Imm),
8616                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8617                             [], NoItinerary>;
8618   }
8619 }
8620
8621 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8622
8623 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8624                           SDPatternOperator ExtOp, Operand Neon_Imm,
8625                           string suffix>
8626   : Pat<(DesTy (shl
8627           (DesTy (ExtOp (OpTy VPR64:$Rn))),
8628             (DesTy (Neon_vdup
8629               (i32 Neon_Imm:$Imm))))),
8630         (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8631
8632 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8633                                SDPatternOperator ExtOp, Operand Neon_Imm,
8634                                string suffix, PatFrag GetHigh>
8635   : Pat<(DesTy (shl
8636           (DesTy (ExtOp
8637             (OpTy (GetHigh VPR128:$Rn)))),
8638               (DesTy (Neon_vdup
8639                 (i32 Neon_Imm:$Imm))))),
8640         (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8641
8642 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8643 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8644 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8645 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8646 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8647 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8648 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8649                                Neon_High16B>;
8650 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8651                                Neon_High16B>;
8652 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8653                                Neon_High8H>;
8654 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8655                                Neon_High8H>;
8656 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8657                                Neon_High4S>;
8658 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8659                                Neon_High4S>;
8660
8661 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8662   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8663                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8664                           asmop # "\t$Rd.4h, $Rn.4s",
8665                           [], NoItinerary>;
8666
8667   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8668                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8669                           asmop # "\t$Rd.2s, $Rn.2d",
8670                           [], NoItinerary>;
8671
8672   let Constraints = "$src = $Rd" in {
8673     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8674                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8675                             asmop # "2\t$Rd.8h, $Rn.4s",
8676                             [], NoItinerary>;
8677
8678     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8679                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8680                             asmop # "2\t$Rd.4s, $Rn.2d",
8681                             [], NoItinerary>;
8682   }
8683 }
8684
8685 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8686
8687 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8688                                        SDPatternOperator f32_to_f16_Op,
8689                                        SDPatternOperator f64_to_f32_Op> {
8690
8691   def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8692               (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8693
8694   def : Pat<(v8i16 (concat_vectors
8695                 (v4i16 VPR64:$src),
8696                 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8697                   (!cast<Instruction>(prefix # "4s8h")
8698                     (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8699                     (v4f32 VPR128:$Rn))>;
8700
8701   def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8702             (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8703
8704   def : Pat<(v4f32 (concat_vectors
8705               (v2f32 VPR64:$src),
8706               (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8707                 (!cast<Instruction>(prefix # "2d4s")
8708                   (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8709                   (v2f64 VPR128:$Rn))>;
8710 }
8711
8712 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8713
8714 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8715                                  bits<5> opcode> {
8716   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8717                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8718                           asmop # "\t$Rd.2s, $Rn.2d",
8719                           [], NoItinerary>;
8720
8721   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8722                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8723                           asmop # "2\t$Rd.4s, $Rn.2d",
8724                           [], NoItinerary> {
8725     let Constraints = "$src = $Rd";
8726   }
8727
8728   def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8729             (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8730
8731   def : Pat<(v4f32 (concat_vectors
8732               (v2f32 VPR64:$src),
8733               (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8734             (!cast<Instruction>(prefix # "2d4s")
8735                (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8736                VPR128:$Rn)>;
8737 }
8738
8739 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8740
8741 def Neon_High4Float : PatFrag<(ops node:$in),
8742                               (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8743
8744 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8745   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8746                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8747                           asmop # "\t$Rd.4s, $Rn.4h",
8748                           [], NoItinerary>;
8749
8750   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8751                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8752                           asmop # "\t$Rd.2d, $Rn.2s",
8753                           [], NoItinerary>;
8754
8755   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8756                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8757                           asmop # "2\t$Rd.4s, $Rn.8h",
8758                           [], NoItinerary>;
8759
8760   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8761                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8762                           asmop # "2\t$Rd.2d, $Rn.4s",
8763                           [], NoItinerary>;
8764 }
8765
8766 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8767
8768 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8769   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8770             (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8771
8772   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8773               (v4i16 (Neon_High8H
8774                 (v8i16 VPR128:$Rn))))),
8775             (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8776
8777   def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8778             (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8779
8780   def : Pat<(v2f64 (fextend
8781               (v2f32 (Neon_High4Float
8782                 (v4f32 VPR128:$Rn))))),
8783             (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8784 }
8785
8786 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8787
8788 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8789                                 ValueType ResTy4s, ValueType OpTy4s,
8790                                 ValueType ResTy2d, ValueType OpTy2d,
8791                                 ValueType ResTy2s, ValueType OpTy2s,
8792                                 SDPatternOperator Neon_Op> {
8793
8794   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8795                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8796                         asmop # "\t$Rd.4s, $Rn.4s",
8797                         [(set (ResTy4s VPR128:$Rd),
8798                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8799                         NoItinerary>;
8800
8801   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8802                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8803                         asmop # "\t$Rd.2d, $Rn.2d",
8804                         [(set (ResTy2d VPR128:$Rd),
8805                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8806                         NoItinerary>;
8807
8808   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8809                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8810                         asmop # "\t$Rd.2s, $Rn.2s",
8811                         [(set (ResTy2s VPR64:$Rd),
8812                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8813                         NoItinerary>;
8814 }
8815
8816 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8817                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8818   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8819                                 v2f64, v2i32, v2f32, Neon_Op>;
8820 }
8821
8822 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8823                                      int_arm_neon_vcvtns>;
8824 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8825                                      int_arm_neon_vcvtnu>;
8826 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8827                                      int_arm_neon_vcvtps>;
8828 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8829                                      int_arm_neon_vcvtpu>;
8830 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8831                                      int_arm_neon_vcvtms>;
8832 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8833                                      int_arm_neon_vcvtmu>;
8834 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8835 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8836 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8837                                      int_arm_neon_vcvtas>;
8838 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8839                                      int_arm_neon_vcvtau>;
8840
8841 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8842                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8843   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8844                                 v2i64, v2f32, v2i32, Neon_Op>;
8845 }
8846
8847 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8848 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8849
8850 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8851                                  bits<5> opcode, SDPatternOperator Neon_Op> {
8852   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8853                                 v2f64, v2f32, v2f32, Neon_Op>;
8854 }
8855
8856 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8857                                      int_aarch64_neon_frintn>;
8858 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8859 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8860 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8861 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8862 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8863 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8864 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8865                                     int_arm_neon_vrecpe>;
8866 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8867                                      int_arm_neon_vrsqrte>;
8868 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8869
8870 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8871                                bits<5> opcode, SDPatternOperator Neon_Op> {
8872   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8873                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8874                         asmop # "\t$Rd.4s, $Rn.4s",
8875                         [(set (v4i32 VPR128:$Rd),
8876                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8877                         NoItinerary>;
8878
8879   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8880                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8881                         asmop # "\t$Rd.2s, $Rn.2s",
8882                         [(set (v2i32 VPR64:$Rd),
8883                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8884                         NoItinerary>;
8885 }
8886
8887 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8888                                   int_arm_neon_vrecpe>;
8889 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8890                                    int_arm_neon_vrsqrte>;
8891
8892 // Crypto Class
8893 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8894                          string asmop, SDPatternOperator opnode>
8895   : NeonI_Crypto_AES<size, opcode,
8896                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8897                      asmop # "\t$Rd.16b, $Rn.16b",
8898                      [(set (v16i8 VPR128:$Rd),
8899                         (v16i8 (opnode (v16i8 VPR128:$src),
8900                                        (v16i8 VPR128:$Rn))))],
8901                      NoItinerary>{
8902   let Constraints = "$src = $Rd";
8903   let Predicates = [HasNEON, HasCrypto];
8904 }
8905
8906 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8907 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8908
8909 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8910                       string asmop, SDPatternOperator opnode>
8911   : NeonI_Crypto_AES<size, opcode,
8912                      (outs VPR128:$Rd), (ins VPR128:$Rn),
8913                      asmop # "\t$Rd.16b, $Rn.16b",
8914                      [(set (v16i8 VPR128:$Rd),
8915                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8916                      NoItinerary>;
8917
8918 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8919 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8920
8921 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8922                          string asmop, SDPatternOperator opnode>
8923   : NeonI_Crypto_SHA<size, opcode,
8924                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8925                      asmop # "\t$Rd.4s, $Rn.4s",
8926                      [(set (v4i32 VPR128:$Rd),
8927                         (v4i32 (opnode (v4i32 VPR128:$src),
8928                                        (v4i32 VPR128:$Rn))))],
8929                      NoItinerary> {
8930   let Constraints = "$src = $Rd";
8931   let Predicates = [HasNEON, HasCrypto];
8932 }
8933
8934 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8935                                  int_arm_neon_sha1su1>;
8936 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8937                                    int_arm_neon_sha256su0>;
8938
8939 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8940                          string asmop, SDPatternOperator opnode>
8941   : NeonI_Crypto_SHA<size, opcode,
8942                      (outs FPR32:$Rd), (ins FPR32:$Rn),
8943                      asmop # "\t$Rd, $Rn",
8944                      [], NoItinerary> {
8945   let Predicates = [HasNEON, HasCrypto];
8946   let hasSideEffects = 0;
8947 }
8948
8949 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8950 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
8951           (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>;
8952
8953
8954 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8955                            SDPatternOperator opnode>
8956   : NeonI_Crypto_3VSHA<size, opcode,
8957                        (outs VPR128:$Rd),
8958                        (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8959                        asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8960                        [(set (v4i32 VPR128:$Rd),
8961                           (v4i32 (opnode (v4i32 VPR128:$src),
8962                                          (v4i32 VPR128:$Rn),
8963                                          (v4i32 VPR128:$Rm))))],
8964                        NoItinerary> {
8965   let Constraints = "$src = $Rd";
8966   let Predicates = [HasNEON, HasCrypto];
8967 }
8968
8969 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8970                                    int_arm_neon_sha1su0>;
8971 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8972                                      int_arm_neon_sha256su1>;
8973
8974 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8975                            SDPatternOperator opnode>
8976   : NeonI_Crypto_3VSHA<size, opcode,
8977                        (outs FPR128:$Rd),
8978                        (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8979                        asmop # "\t$Rd, $Rn, $Rm.4s",
8980                        [(set (v4i32 FPR128:$Rd),
8981                           (v4i32 (opnode (v4i32 FPR128:$src),
8982                                          (v4i32 FPR128:$Rn),
8983                                          (v4i32 VPR128:$Rm))))],
8984                        NoItinerary> {
8985   let Constraints = "$src = $Rd";
8986   let Predicates = [HasNEON, HasCrypto];
8987 }
8988
8989 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8990                                    int_arm_neon_sha256h>;
8991 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8992                                     int_arm_neon_sha256h2>;
8993
8994 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
8995   : NeonI_Crypto_3VSHA<size, opcode,
8996                        (outs FPR128:$Rd),
8997                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8998                        asmop # "\t$Rd, $Rn, $Rm.4s",
8999                        [], NoItinerary> {
9000   let Constraints = "$src = $Rd";
9001   let hasSideEffects = 0;
9002   let Predicates = [HasNEON, HasCrypto];
9003 }
9004
9005 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">;
9006 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">;
9007 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">;
9008
9009 def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9010           (SHA1C v4i32:$hash_abcd,
9011                  (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9012 def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9013           (SHA1M v4i32:$hash_abcd,
9014                  (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9015 def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9016           (SHA1P v4i32:$hash_abcd,
9017                  (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9018
9019 // Additional patterns to match shl to USHL.
9020 def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9021           (USHLvvv_8B $Rn, $Rm)>;
9022 def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9023           (USHLvvv_4H $Rn, $Rm)>;
9024 def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9025           (USHLvvv_2S $Rn, $Rm)>;
9026 def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9027           (USHLddd $Rn, $Rm)>;
9028 def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9029           (USHLvvv_16B $Rn, $Rm)>;
9030 def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9031           (USHLvvv_8H $Rn, $Rm)>;
9032 def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9033           (USHLvvv_4S $Rn, $Rm)>;
9034 def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9035           (USHLvvv_2D $Rn, $Rm)>;
9036
9037 def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9038           (EXTRACT_SUBREG
9039               (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9040                           (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
9041               sub_8)>;
9042 def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9043           (EXTRACT_SUBREG
9044               (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9045                           (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
9046               sub_16)>;
9047 def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9048           (EXTRACT_SUBREG
9049               (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9050                           (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
9051               sub_32)>;
9052
9053 // Additional patterns to match sra, srl.
9054 // For a vector right shift by vector, the shift amounts of SSHL/USHL are
9055 // negative. Negate the vector of shift amount first.
9056 def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9057           (USHLvvv_8B $Rn, (NEG8b $Rm))>;
9058 def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9059           (USHLvvv_4H $Rn, (NEG4h $Rm))>;
9060 def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9061           (USHLvvv_2S $Rn, (NEG2s $Rm))>;
9062 def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9063           (USHLddd $Rn, (NEGdd $Rm))>;
9064 def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9065           (USHLvvv_16B $Rn, (NEG16b $Rm))>;
9066 def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9067           (USHLvvv_8H $Rn, (NEG8h $Rm))>;
9068 def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9069           (USHLvvv_4S $Rn, (NEG4s $Rm))>;
9070 def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9071           (USHLvvv_2D $Rn, (NEG2d $Rm))>;
9072
9073 def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9074           (EXTRACT_SUBREG
9075               (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9076                           (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
9077               sub_8)>;
9078 def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9079           (EXTRACT_SUBREG
9080               (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9081                           (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
9082               sub_16)>;
9083 def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9084           (EXTRACT_SUBREG
9085               (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9086                           (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
9087               sub_32)>;
9088
9089 def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9090           (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
9091 def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9092           (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
9093 def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9094           (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
9095 def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9096           (SSHLddd $Rn, (NEGdd $Rm))>;
9097 def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9098           (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
9099 def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9100           (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
9101 def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9102           (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
9103 def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9104           (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
9105
9106 def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9107           (EXTRACT_SUBREG
9108               (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9109                           (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
9110               sub_8)>;
9111 def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9112           (EXTRACT_SUBREG
9113               (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9114                           (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
9115               sub_16)>;
9116 def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9117           (EXTRACT_SUBREG
9118               (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9119                           (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
9120               sub_32)>;
9121
9122 //
9123 // Patterns for handling half-precision values
9124 //
9125
9126 // Convert between f16 value and f32 value
9127 def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))),
9128           (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>;
9129 def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))),
9130           (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>;
9131
9132 // Convert f16 value coming in as i16 value to f32
9133 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
9134           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9135 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
9136           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9137
9138 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
9139             f32_to_f16 (f32 FPR32:$Rn))))))),
9140           (f32 FPR32:$Rn)>;
9141
9142 // Patterns for vector extract of half-precision FP value in i16 storage type
9143 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9144             (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
9145           (FCVTsh (f16 (DUPhv_H
9146             (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9147             neon_uimm2_bare:$Imm)))>;
9148
9149 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9150             (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
9151           (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
9152
9153 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
9154 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9155             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9156             (neon_uimm3_bare:$Imm))),
9157           (v8i16 (INSELh (v8i16 VPR128:$Rn),
9158             (v8i16 (SUBREG_TO_REG (i64 0),
9159               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9160               sub_16)),
9161             neon_uimm3_bare:$Imm, 0))>;
9162
9163 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9164             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9165             (neon_uimm2_bare:$Imm))),
9166           (v4i16 (EXTRACT_SUBREG
9167             (v8i16 (INSELh
9168               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9169               (v8i16 (SUBREG_TO_REG (i64 0),
9170                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9171                 sub_16)),
9172               neon_uimm2_bare:$Imm, 0)),
9173             sub_64))>;
9174
9175 // Patterns for vector insert of half-precision FP value in i16 storage type
9176 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9177             (i32 (assertsext (i32 (fp_to_sint
9178               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9179             (neon_uimm3_bare:$Imm))),
9180           (v8i16 (INSELh (v8i16 VPR128:$Rn),
9181             (v8i16 (SUBREG_TO_REG (i64 0),
9182               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9183               sub_16)),
9184             neon_uimm3_bare:$Imm, 0))>;
9185
9186 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9187             (i32 (assertsext (i32 (fp_to_sint
9188               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9189             (neon_uimm2_bare:$Imm))),
9190           (v4i16 (EXTRACT_SUBREG
9191             (v8i16 (INSELh
9192               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9193               (v8i16 (SUBREG_TO_REG (i64 0),
9194                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9195                 sub_16)),
9196               neon_uimm2_bare:$Imm, 0)),
9197             sub_64))>;
9198
9199 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9200             (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9201               (neon_uimm3_bare:$Imm1))),
9202           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9203             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9204
9205 // Patterns for vector copy of half-precision FP value in i16 storage type
9206 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9207             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9208               (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9209               65535)))))))),
9210             (neon_uimm3_bare:$Imm1))),
9211           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9212             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9213
9214 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9215             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9216               (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
9217               65535)))))))),
9218             (neon_uimm3_bare:$Imm1))),
9219           (v4i16 (EXTRACT_SUBREG
9220             (v8i16 (INSELh
9221               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9222               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
9223               neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
9224             sub_64))>;
9225
9226