AArch64: use RegisterOperand for NEON registers.
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl       : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18                       [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19                       SDTCisSameAs<0, 3>]>>;
20
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
23
24 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
25
26 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
27
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
31
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
35
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
39
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
43
44 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, 
45                     [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
46
47 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
48                                      SDTCisVT<2, i32>]>;
49 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
50 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
51
52
53 //===----------------------------------------------------------------------===//
54 // Multiclasses
55 //===----------------------------------------------------------------------===//
56
57 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
58                                 string asmop, SDPatternOperator opnode8B,
59                                 SDPatternOperator opnode16B,
60                                 bit Commutable = 0>
61 {
62   let isCommutable = Commutable in {
63     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
64                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66                [(set (v8i8 VPR64:$Rd),
67                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
68                NoItinerary>;
69
70     def _16B : NeonI_3VSame<0b1, u, size, opcode,
71                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73                [(set (v16i8 VPR128:$Rd),
74                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
75                NoItinerary>;
76   }
77
78 }
79
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81                                   string asmop, SDPatternOperator opnode,
82                                   bit Commutable = 0>
83 {
84   let isCommutable = Commutable in {
85     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
86               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
87               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
88               [(set (v4i16 VPR64:$Rd),
89                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
90               NoItinerary>;
91
92     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
93               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
94               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
95               [(set (v8i16 VPR128:$Rd),
96                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
97               NoItinerary>;
98
99     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
100               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
101               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
102               [(set (v2i32 VPR64:$Rd),
103                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
104               NoItinerary>;
105
106     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
107               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
108               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
109               [(set (v4i32 VPR128:$Rd),
110                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
111               NoItinerary>;
112   }
113 }
114 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
115                                   string asmop, SDPatternOperator opnode,
116                                   bit Commutable = 0>
117    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable>
118 {
119   let isCommutable = Commutable in {
120     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
121                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123                [(set (v8i8 VPR64:$Rd),
124                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
125                NoItinerary>;
126
127     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
128                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130                [(set (v16i8 VPR128:$Rd),
131                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
132                NoItinerary>;
133   }
134 }
135
136 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
137                                    string asmop, SDPatternOperator opnode,
138                                    bit Commutable = 0>
139    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable>
140 {
141   let isCommutable = Commutable in {
142     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
143               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
144               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
145               [(set (v2i64 VPR128:$Rd),
146                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
147               NoItinerary>;
148   }
149 }
150
151 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
152 // but Result types can be integer or floating point types.
153 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
154                                  string asmop, SDPatternOperator opnode2S,
155                                  SDPatternOperator opnode4S,
156                                  SDPatternOperator opnode2D,
157                                  ValueType ResTy2S, ValueType ResTy4S,
158                                  ValueType ResTy2D, bit Commutable = 0>
159 {
160   let isCommutable = Commutable in {
161     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
162               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
163               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
164               [(set (ResTy2S VPR64:$Rd),
165                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
166               NoItinerary>;
167
168     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
169               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
170               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
171               [(set (ResTy4S VPR128:$Rd),
172                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
173               NoItinerary>;
174
175     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
176               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
177               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
178               [(set (ResTy2D VPR128:$Rd),
179                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
180                NoItinerary>;
181   }
182 }
183
184 //===----------------------------------------------------------------------===//
185 // Instruction Definitions
186 //===----------------------------------------------------------------------===//
187
188 // Vector Arithmetic Instructions
189
190 // Vector Add (Integer and Floating-Point)
191
192 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
193 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
194                                      v2f32, v4f32, v2f64, 1>;
195
196 // Vector Sub (Integer and Floating-Point)
197
198 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
199 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
200                                      v2f32, v4f32, v2f64, 0>;
201
202 // Vector Multiply (Integer and Floating-Point)
203
204 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
205 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
206                                      v2f32, v4f32, v2f64, 1>;
207
208 // Vector Multiply (Polynomial)
209
210 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
211                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
212
213 // Vector Multiply-accumulate and Multiply-subtract (Integer)
214
215 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
216 // two operands constraints.
217 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
218   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, 
219   bits<5> opcode, SDPatternOperator opnode>
220   : NeonI_3VSame<q, u, size, opcode,
221     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
222     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
223     [(set (OpTy VPRC:$Rd),
224        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
225     NoItinerary> {
226   let Constraints = "$src = $Rd";
227 }
228
229 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
230                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
231
232 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
234
235
236 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
237                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
238 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
239                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
240 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
241                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
242 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
243                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
244 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
245                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
246 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
247                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
248
249 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
250                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
251 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
252                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
253 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
254                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
255 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
256                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
257 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
258                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
259 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
260                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
261
262 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
263
264 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
265                         (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
266
267 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268                         (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
269
270 let Predicates = [HasNEON, UseFusedMAC] in {
271 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
272                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
273 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
274                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
275 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
276                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
277
278 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
279                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
280 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
281                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
282 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
283                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
284 }
285
286 // We're also allowed to match the fma instruction regardless of compile
287 // options.
288 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
289           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
290 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
291           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
292 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
293           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
294
295 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
296           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
297 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
298           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
299 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
300           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
301
302 // Vector Divide (Floating-Point)
303
304 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
305                                      v2f32, v4f32, v2f64, 0>;
306
307 // Vector Bitwise Operations
308
309 // Vector Bitwise AND
310
311 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
312
313 // Vector Bitwise Exclusive OR
314
315 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
316
317 // Vector Bitwise OR
318
319 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
320
321 // ORR disassembled as MOV if Vn==Vm
322
323 // Vector Move - register
324 // Alias for ORR if Vn=Vm.
325 // FIXME: This is actually the preferred syntax but TableGen can't deal with
326 // custom printing of aliases.
327 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
328                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
329 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
330                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
331
332 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
333   ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
334   ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
335   unsigned EltBits;
336   uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
337     OpCmodeConstVal->getZExtValue(), EltBits);
338   return (EltBits == 8 && EltVal == 0xff);
339 }]>;
340
341
342 def Neon_not8B  : PatFrag<(ops node:$in),
343                           (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
344 def Neon_not16B : PatFrag<(ops node:$in),
345                           (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
346
347 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
348                          (or node:$Rn, (Neon_not8B node:$Rm))>;
349
350 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
351                           (or node:$Rn, (Neon_not16B node:$Rm))>;
352
353 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
354                          (and node:$Rn, (Neon_not8B node:$Rm))>;
355
356 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
357                           (and node:$Rn, (Neon_not16B node:$Rm))>;
358
359
360 // Vector Bitwise OR NOT - register
361
362 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
363                                    Neon_orn8B, Neon_orn16B, 0>;
364
365 // Vector Bitwise Bit Clear (AND NOT) - register
366
367 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
368                                    Neon_bic8B, Neon_bic16B, 0>;
369
370 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
371                                    SDPatternOperator opnode16B,
372                                    Instruction INST8B,
373                                    Instruction INST16B> {
374   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
375             (INST8B VPR64:$Rn, VPR64:$Rm)>;
376   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
377             (INST8B VPR64:$Rn, VPR64:$Rm)>;
378   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
379             (INST8B VPR64:$Rn, VPR64:$Rm)>;
380   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
381             (INST16B VPR128:$Rn, VPR128:$Rm)>;
382   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
383             (INST16B VPR128:$Rn, VPR128:$Rm)>;
384   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
385             (INST16B VPR128:$Rn, VPR128:$Rm)>;
386 }
387
388 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
389 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
390 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
391 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
392 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
393 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
394
395 //   Vector Bitwise Select
396 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
397                                               0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
398
399 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
400                                               0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
401
402 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
403                                    Instruction INST8B,
404                                    Instruction INST16B> {
405   // Disassociate type from instruction definition
406   def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
407             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
408   def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
409             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
410   def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
411             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
412   def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
413             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
414   def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
415             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
416   def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
417             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
418
419   // Allow to match BSL instruction pattern with non-constant operand
420   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
421                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
422           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
423   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
424                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
425           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
426   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
427                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
428           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
429   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
430                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
431           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
432   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
433                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
434           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
435   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
436                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
437           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
438   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
439                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
440           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
441   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
442                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
443           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
444
445   // Allow to match llvm.arm.* intrinsics.
446   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
447                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
448             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
449   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
450                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
451             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
452   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
453                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
454             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
455   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
456                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
457             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
458   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
459                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
460             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
461   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
462                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
463             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
464   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
465                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
466             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
467   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
468                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
469             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
471                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
472             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
473   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
474                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
475             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
477                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
478             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
479 }
480
481 // Additional patterns for bitwise instruction BSL
482 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
483
484 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
485                            (Neon_bsl node:$src, node:$Rn, node:$Rm),
486                            [{ (void)N; return false; }]>;
487
488 // Vector Bitwise Insert if True
489
490 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
491                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
492 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
493                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
494
495 // Vector Bitwise Insert if False
496
497 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
498                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
499 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
500                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
501
502 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
503
504 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
505                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
506 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
507                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
508
509 // Vector Absolute Difference and Accumulate (Unsigned)
510 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
511                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
512 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
513                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
514 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
515                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
516 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
517                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
518 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
519                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
520 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
521                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
522
523 // Vector Absolute Difference and Accumulate (Signed)
524 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
525                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
526 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
527                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
528 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
529                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
530 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
531                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
532 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
533                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
534 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
535                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
536
537
538 // Vector Absolute Difference (Signed, Unsigned)
539 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
540 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
541
542 // Vector Absolute Difference (Floating Point)
543 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
544                                     int_arm_neon_vabds, int_arm_neon_vabds,
545                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
546
547 // Vector Reciprocal Step (Floating Point)
548 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
549                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
550                                        int_arm_neon_vrecps,
551                                        v2f32, v4f32, v2f64, 0>;
552
553 // Vector Reciprocal Square Root Step (Floating Point)
554 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
555                                         int_arm_neon_vrsqrts,
556                                         int_arm_neon_vrsqrts,
557                                         int_arm_neon_vrsqrts,
558                                         v2f32, v4f32, v2f64, 0>;
559
560 // Vector Comparisons
561
562 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
563                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
564 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
565                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
566 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
567                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
568 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
569                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
570 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
571                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
572
573 // NeonI_compare_aliases class: swaps register operands to implement
574 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
575 class NeonI_compare_aliases<string asmop, string asmlane,
576                             Instruction inst, RegisterOperand VPRC>
577   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
578                     ", $Rm" # asmlane,
579                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
580
581 // Vector Comparisons (Integer)
582
583 // Vector Compare Mask Equal (Integer)
584 let isCommutable =1 in {
585 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
586 }
587
588 // Vector Compare Mask Higher or Same (Unsigned Integer)
589 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
590
591 // Vector Compare Mask Greater Than or Equal (Integer)
592 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
593
594 // Vector Compare Mask Higher (Unsigned Integer)
595 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
596
597 // Vector Compare Mask Greater Than (Integer)
598 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
599
600 // Vector Compare Mask Bitwise Test (Integer)
601 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
602
603 // Vector Compare Mask Less or Same (Unsigned Integer)
604 // CMLS is alias for CMHS with operands reversed.
605 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
606 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
607 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
608 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
609 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
610 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
611 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
612
613 // Vector Compare Mask Less Than or Equal (Integer)
614 // CMLE is alias for CMGE with operands reversed.
615 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
616 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
617 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
618 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
619 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
620 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
621 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
622
623 // Vector Compare Mask Lower (Unsigned Integer)
624 // CMLO is alias for CMHI with operands reversed.
625 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
626 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
627 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
628 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
629 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
630 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
631 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
632
633 // Vector Compare Mask Less Than (Integer)
634 // CMLT is alias for CMGT with operands reversed.
635 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
636 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
637 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
638 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
639 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
640 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
641 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
642
643
644 def neon_uimm0_asmoperand : AsmOperandClass
645 {
646   let Name = "UImm0";
647   let PredicateMethod = "isUImm<0>";
648   let RenderMethod = "addImmOperands";
649 }
650
651 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
652   let ParserMatchClass = neon_uimm0_asmoperand;
653   let PrintMethod = "printNeonUImm0Operand";
654
655 }
656
657 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
658 {
659   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
660              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
661              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
662              [(set (v8i8 VPR64:$Rd),
663                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
664              NoItinerary>;
665
666   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
667              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
668              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
669              [(set (v16i8 VPR128:$Rd),
670                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
671              NoItinerary>;
672
673   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
674             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
675             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
676             [(set (v4i16 VPR64:$Rd),
677                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
678             NoItinerary>;
679
680   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
681             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
682             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
683             [(set (v8i16 VPR128:$Rd),
684                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
685             NoItinerary>;
686
687   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
688             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
689             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
690             [(set (v2i32 VPR64:$Rd),
691                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
692             NoItinerary>;
693
694   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
695             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
696             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
697             [(set (v4i32 VPR128:$Rd),
698                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
699             NoItinerary>;
700
701   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
702             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
703             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
704             [(set (v2i64 VPR128:$Rd),
705                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
706             NoItinerary>;
707 }
708
709 // Vector Compare Mask Equal to Zero (Integer)
710 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
711
712 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
713 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
714
715 // Vector Compare Mask Greater Than Zero (Signed Integer)
716 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
717
718 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
719 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
720
721 // Vector Compare Mask Less Than Zero (Signed Integer)
722 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
723
724 // Vector Comparisons (Floating Point)
725
726 // Vector Compare Mask Equal (Floating Point)
727 let isCommutable =1 in {
728 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
729                                       Neon_cmeq, Neon_cmeq,
730                                       v2i32, v4i32, v2i64, 0>;
731 }
732
733 // Vector Compare Mask Greater Than Or Equal (Floating Point)
734 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
735                                       Neon_cmge, Neon_cmge,
736                                       v2i32, v4i32, v2i64, 0>;
737
738 // Vector Compare Mask Greater Than (Floating Point)
739 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
740                                       Neon_cmgt, Neon_cmgt,
741                                       v2i32, v4i32, v2i64, 0>;
742
743 // Vector Compare Mask Less Than Or Equal (Floating Point)
744 // FCMLE is alias for FCMGE with operands reversed.
745 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
746 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
747 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
748
749 // Vector Compare Mask Less Than (Floating Point)
750 // FCMLT is alias for FCMGT with operands reversed.
751 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
752 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
753 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
754
755
756 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
757                               string asmop, CondCode CC>
758 {
759   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
760             (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
761             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
762             [(set (v2i32 VPR64:$Rd),
763                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
764             NoItinerary>;
765
766   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
767             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
768             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
769             [(set (v4i32 VPR128:$Rd),
770                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
771             NoItinerary>;
772
773   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
774             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
775             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
776             [(set (v2i64 VPR128:$Rd),
777                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
778             NoItinerary>;
779 }
780
781 // Vector Compare Mask Equal to Zero (Floating Point)
782 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
783
784 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
785 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
786
787 // Vector Compare Mask Greater Than Zero (Floating Point)
788 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
789
790 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
791 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
792
793 // Vector Compare Mask Less Than Zero (Floating Point)
794 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
795
796 // Vector Absolute Comparisons (Floating Point)
797
798 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
799 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
800                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
801                                       int_aarch64_neon_vacgeq,
802                                       v2i32, v4i32, v2i64, 0>;
803
804 // Vector Absolute Compare Mask Greater Than (Floating Point)
805 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
806                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
807                                       int_aarch64_neon_vacgtq,
808                                       v2i32, v4i32, v2i64, 0>;
809
810 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
811 // FACLE is alias for FACGE with operands reversed.
812 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
813 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
814 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
815
816 // Vector Absolute Compare Mask Less Than (Floating Point)
817 // FACLT is alias for FACGT with operands reversed.
818 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
819 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
820 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
821
822 // Vector halving add (Integer Signed, Unsigned)
823 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
824                                         int_arm_neon_vhadds, 1>;
825 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
826                                         int_arm_neon_vhaddu, 1>;
827
828 // Vector halving sub (Integer Signed, Unsigned)
829 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
830                                         int_arm_neon_vhsubs, 0>;
831 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
832                                         int_arm_neon_vhsubu, 0>;
833
834 // Vector rouding halving add (Integer Signed, Unsigned)
835 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
836                                          int_arm_neon_vrhadds, 1>;
837 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
838                                          int_arm_neon_vrhaddu, 1>;
839
840 // Vector Saturating add (Integer Signed, Unsigned)
841 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
842                    int_arm_neon_vqadds, 1>;
843 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
844                    int_arm_neon_vqaddu, 1>;
845
846 // Vector Saturating sub (Integer Signed, Unsigned)
847 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
848                    int_arm_neon_vqsubs, 1>;
849 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
850                    int_arm_neon_vqsubu, 1>;
851
852 // Vector Shift Left (Signed and Unsigned Integer)
853 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
854                  int_arm_neon_vshifts, 1>;
855 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
856                  int_arm_neon_vshiftu, 1>;
857
858 // Vector Saturating Shift Left (Signed and Unsigned Integer)
859 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
860                   int_arm_neon_vqshifts, 1>;
861 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
862                   int_arm_neon_vqshiftu, 1>;
863
864 // Vector Rouding Shift Left (Signed and Unsigned Integer)
865 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
866                   int_arm_neon_vrshifts, 1>;
867 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
868                   int_arm_neon_vrshiftu, 1>;
869
870 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
871 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
872                    int_arm_neon_vqrshifts, 1>;
873 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
874                    int_arm_neon_vqrshiftu, 1>;
875
876 // Vector Maximum (Signed and Unsigned Integer)
877 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
878 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
879
880 // Vector Minimum (Signed and Unsigned Integer)
881 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
882 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
883
884 // Vector Maximum (Floating Point)
885 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
886                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
887                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
888
889 // Vector Minimum (Floating Point)
890 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
891                                      int_arm_neon_vmins, int_arm_neon_vmins,
892                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
893
894 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
895 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
896                                        int_aarch64_neon_vmaxnm,
897                                        int_aarch64_neon_vmaxnm,
898                                        int_aarch64_neon_vmaxnm,
899                                        v2f32, v4f32, v2f64, 1>;
900
901 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
902 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
903                                        int_aarch64_neon_vminnm,
904                                        int_aarch64_neon_vminnm,
905                                        int_aarch64_neon_vminnm,
906                                        v2f32, v4f32, v2f64, 1>;
907
908 // Vector Maximum Pairwise (Signed and Unsigned Integer)
909 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
910 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
911
912 // Vector Minimum Pairwise (Signed and Unsigned Integer)
913 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
914 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
915
916 // Vector Maximum Pairwise (Floating Point)
917 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
918                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
919                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
920
921 // Vector Minimum Pairwise (Floating Point)
922 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
923                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
924                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
925
926 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
927 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
928                                        int_aarch64_neon_vpmaxnm,
929                                        int_aarch64_neon_vpmaxnm,
930                                        int_aarch64_neon_vpmaxnm,
931                                        v2f32, v4f32, v2f64, 1>;
932
933 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
934 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
935                                        int_aarch64_neon_vpminnm,
936                                        int_aarch64_neon_vpminnm,
937                                        int_aarch64_neon_vpminnm,
938                                        v2f32, v4f32, v2f64, 1>;
939
940 // Vector Addition Pairwise (Integer)
941 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
942
943 // Vector Addition Pairwise (Floating Point)
944 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
945                                        int_arm_neon_vpadd,
946                                        int_arm_neon_vpadd,
947                                        int_arm_neon_vpadd,
948                                        v2f32, v4f32, v2f64, 1>;
949
950 // Vector Saturating Doubling Multiply High
951 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
952                     int_arm_neon_vqdmulh, 1>;
953
954 // Vector Saturating Rouding Doubling Multiply High
955 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
956                      int_arm_neon_vqrdmulh, 1>;
957
958 // Vector Multiply Extended (Floating Point)
959 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
960                                       int_aarch64_neon_vmulx,
961                                       int_aarch64_neon_vmulx,
962                                       int_aarch64_neon_vmulx,
963                                       v2f32, v4f32, v2f64, 1>;
964
965 // Vector Immediate Instructions
966
967 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
968 {
969   def _asmoperand : AsmOperandClass
970     {
971       let Name = "NeonMovImmShift" # PREFIX;
972       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
973       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
974     }
975 }
976
977 // Definition of vector immediates shift operands
978
979 // The selectable use-cases extract the shift operation
980 // information from the OpCmode fields encoded in the immediate.
981 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
982   uint64_t OpCmode = N->getZExtValue();
983   unsigned ShiftImm;
984   unsigned ShiftOnesIn;
985   unsigned HasShift =
986     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
987   if (!HasShift) return SDValue();
988   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
989 }]>;
990
991 // Vector immediates shift operands which accept LSL and MSL
992 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
993 // or 0, 8 (LSLH) or 8, 16 (MSL).
994 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
995 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
996 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
997 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
998
999 multiclass neon_mov_imm_shift_operands<string PREFIX,
1000                                        string HALF, string ISHALF, code pred>
1001 {
1002    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1003     {
1004       let PrintMethod =
1005         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1006       let DecoderMethod =
1007         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1008       let ParserMatchClass =
1009         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1010     }
1011 }
1012
1013 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1014   unsigned ShiftImm;
1015   unsigned ShiftOnesIn;
1016   unsigned HasShift =
1017     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1018   return (HasShift && !ShiftOnesIn);
1019 }]>;
1020
1021 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1022   unsigned ShiftImm;
1023   unsigned ShiftOnesIn;
1024   unsigned HasShift =
1025     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1026   return (HasShift && ShiftOnesIn);
1027 }]>;
1028
1029 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1030   unsigned ShiftImm;
1031   unsigned ShiftOnesIn;
1032   unsigned HasShift =
1033     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1034   return (HasShift && !ShiftOnesIn);
1035 }]>;
1036
1037 def neon_uimm8_asmoperand : AsmOperandClass
1038 {
1039   let Name = "UImm8";
1040   let PredicateMethod = "isUImm<8>";
1041   let RenderMethod = "addImmOperands";
1042 }
1043
1044 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1045   let ParserMatchClass = neon_uimm8_asmoperand;
1046   let PrintMethod = "printNeonUImm8Operand";
1047 }
1048
1049 def neon_uimm64_mask_asmoperand : AsmOperandClass
1050 {
1051   let Name = "NeonUImm64Mask";
1052   let PredicateMethod = "isNeonUImm64Mask";
1053   let RenderMethod = "addNeonUImm64MaskOperands";
1054 }
1055
1056 // MCOperand for 64-bit bytemask with each byte having only the
1057 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1058 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1059   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1060   let PrintMethod = "printNeonUImm64MaskOperand";
1061 }
1062
1063 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1064                                    SDPatternOperator opnode>
1065 {
1066     // shift zeros, per word
1067     def _2S  : NeonI_1VModImm<0b0, op,
1068                               (outs VPR64:$Rd),
1069                               (ins neon_uimm8:$Imm,
1070                                 neon_mov_imm_LSL_operand:$Simm),
1071                               !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1072                               [(set (v2i32 VPR64:$Rd),
1073                                  (v2i32 (opnode (timm:$Imm),
1074                                    (neon_mov_imm_LSL_operand:$Simm))))],
1075                               NoItinerary> {
1076        bits<2> Simm;
1077        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1078      }
1079
1080     def _4S  : NeonI_1VModImm<0b1, op,
1081                               (outs VPR128:$Rd),
1082                               (ins neon_uimm8:$Imm,
1083                                 neon_mov_imm_LSL_operand:$Simm),
1084                               !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1085                               [(set (v4i32 VPR128:$Rd),
1086                                  (v4i32 (opnode (timm:$Imm),
1087                                    (neon_mov_imm_LSL_operand:$Simm))))],
1088                               NoItinerary> {
1089       bits<2> Simm;
1090       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1091     }
1092
1093     // shift zeros, per halfword
1094     def _4H  : NeonI_1VModImm<0b0, op,
1095                               (outs VPR64:$Rd),
1096                               (ins neon_uimm8:$Imm,
1097                                 neon_mov_imm_LSLH_operand:$Simm),
1098                               !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1099                               [(set (v4i16 VPR64:$Rd),
1100                                  (v4i16 (opnode (timm:$Imm),
1101                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1102                               NoItinerary> {
1103       bit  Simm;
1104       let cmode = {0b1, 0b0, Simm, 0b0};
1105     }
1106
1107     def _8H  : NeonI_1VModImm<0b1, op,
1108                               (outs VPR128:$Rd),
1109                               (ins neon_uimm8:$Imm,
1110                                 neon_mov_imm_LSLH_operand:$Simm),
1111                               !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1112                               [(set (v8i16 VPR128:$Rd),
1113                                  (v8i16 (opnode (timm:$Imm),
1114                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1115                               NoItinerary> {
1116       bit Simm;
1117       let cmode = {0b1, 0b0, Simm, 0b0};
1118      }
1119 }
1120
1121 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1122                                                    SDPatternOperator opnode,
1123                                                    SDPatternOperator neonopnode>
1124 {
1125   let Constraints = "$src = $Rd" in {
1126     // shift zeros, per word
1127     def _2S  : NeonI_1VModImm<0b0, op,
1128                  (outs VPR64:$Rd),
1129                  (ins VPR64:$src, neon_uimm8:$Imm,
1130                    neon_mov_imm_LSL_operand:$Simm),
1131                  !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1132                  [(set (v2i32 VPR64:$Rd),
1133                     (v2i32 (opnode (v2i32 VPR64:$src),
1134                       (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1135                         neon_mov_imm_LSL_operand:$Simm)))))))],
1136                  NoItinerary> {
1137       bits<2> Simm;
1138       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1139     }
1140
1141     def _4S  : NeonI_1VModImm<0b1, op,
1142                  (outs VPR128:$Rd),
1143                  (ins VPR128:$src, neon_uimm8:$Imm,
1144                    neon_mov_imm_LSL_operand:$Simm),
1145                  !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1146                  [(set (v4i32 VPR128:$Rd),
1147                     (v4i32 (opnode (v4i32 VPR128:$src),
1148                       (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1149                         neon_mov_imm_LSL_operand:$Simm)))))))],
1150                  NoItinerary> {
1151       bits<2> Simm;
1152       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1153     }
1154
1155     // shift zeros, per halfword
1156     def _4H  : NeonI_1VModImm<0b0, op,
1157                  (outs VPR64:$Rd),
1158                  (ins VPR64:$src, neon_uimm8:$Imm,
1159                    neon_mov_imm_LSLH_operand:$Simm),
1160                  !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1161                  [(set (v4i16 VPR64:$Rd),
1162                     (v4i16 (opnode (v4i16 VPR64:$src),
1163                        (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1164                           neon_mov_imm_LSL_operand:$Simm)))))))],
1165                  NoItinerary> {
1166       bit  Simm;
1167       let cmode = {0b1, 0b0, Simm, 0b1};
1168     }
1169
1170     def _8H  : NeonI_1VModImm<0b1, op,
1171                  (outs VPR128:$Rd),
1172                  (ins VPR128:$src, neon_uimm8:$Imm,
1173                    neon_mov_imm_LSLH_operand:$Simm),
1174                  !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1175                  [(set (v8i16 VPR128:$Rd),
1176                     (v8i16 (opnode (v8i16 VPR128:$src),
1177                       (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1178                         neon_mov_imm_LSL_operand:$Simm)))))))],
1179                  NoItinerary> {
1180       bit Simm;
1181       let cmode = {0b1, 0b0, Simm, 0b1};
1182     }
1183   }
1184 }
1185
1186 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1187                                    SDPatternOperator opnode>
1188 {
1189     // shift ones, per word
1190     def _2S  : NeonI_1VModImm<0b0, op,
1191                              (outs VPR64:$Rd),
1192                              (ins neon_uimm8:$Imm,
1193                                neon_mov_imm_MSL_operand:$Simm),
1194                              !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1195                               [(set (v2i32 VPR64:$Rd),
1196                                  (v2i32 (opnode (timm:$Imm),
1197                                    (neon_mov_imm_MSL_operand:$Simm))))],
1198                              NoItinerary> {
1199        bit Simm;
1200        let cmode = {0b1, 0b1, 0b0, Simm};
1201      }
1202
1203    def _4S  : NeonI_1VModImm<0b1, op,
1204                               (outs VPR128:$Rd),
1205                               (ins neon_uimm8:$Imm,
1206                                 neon_mov_imm_MSL_operand:$Simm),
1207                               !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1208                               [(set (v4i32 VPR128:$Rd),
1209                                  (v4i32 (opnode (timm:$Imm),
1210                                    (neon_mov_imm_MSL_operand:$Simm))))],
1211                               NoItinerary> {
1212      bit Simm;
1213      let cmode = {0b1, 0b1, 0b0, Simm};
1214    }
1215 }
1216
1217 // Vector Move Immediate Shifted
1218 let isReMaterializable = 1 in {
1219 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1220 }
1221
1222 // Vector Move Inverted Immediate Shifted
1223 let isReMaterializable = 1 in {
1224 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1225 }
1226
1227 // Vector Bitwise Bit Clear (AND NOT) - immediate
1228 let isReMaterializable = 1 in {
1229 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1230                                                          and, Neon_mvni>;
1231 }
1232
1233 // Vector Bitwise OR - immedidate
1234
1235 let isReMaterializable = 1 in {
1236 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1237                                                            or, Neon_movi>;
1238 }
1239
1240 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1241 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1242 // BIC immediate instructions selection requires additional patterns to
1243 // transform Neon_movi operands into BIC immediate operands
1244
1245 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1246   uint64_t OpCmode = N->getZExtValue();
1247   unsigned ShiftImm;
1248   unsigned ShiftOnesIn;
1249   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1250   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1251   // Transform encoded shift amount 0 to 1 and 1 to 0.
1252   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1253 }]>;
1254
1255 def neon_mov_imm_LSLH_transform_operand
1256   : ImmLeaf<i32, [{
1257     unsigned ShiftImm;
1258     unsigned ShiftOnesIn;
1259     unsigned HasShift =
1260       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1261     return (HasShift && !ShiftOnesIn); }],
1262   neon_mov_imm_LSLH_transform_XFORM>;
1263
1264 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1265 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1266 def : Pat<(v4i16 (and VPR64:$src,
1267             (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1268           (BICvi_lsl_4H VPR64:$src, 0,
1269             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1270
1271 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1272 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1273 def : Pat<(v8i16 (and VPR128:$src,
1274             (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1275           (BICvi_lsl_8H VPR128:$src, 0,
1276             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1277
1278
1279 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1280                                    SDPatternOperator neonopnode,
1281                                    Instruction INST4H,
1282                                    Instruction INST8H> {
1283   def : Pat<(v8i8 (opnode VPR64:$src,
1284                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1285                       neon_mov_imm_LSLH_operand:$Simm))))),
1286             (INST4H VPR64:$src, neon_uimm8:$Imm,
1287               neon_mov_imm_LSLH_operand:$Simm)>;
1288   def : Pat<(v1i64 (opnode VPR64:$src,
1289                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1290                     neon_mov_imm_LSLH_operand:$Simm))))),
1291           (INST4H VPR64:$src, neon_uimm8:$Imm,
1292             neon_mov_imm_LSLH_operand:$Simm)>;
1293
1294   def : Pat<(v16i8 (opnode VPR128:$src,
1295                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1296                      neon_mov_imm_LSLH_operand:$Simm))))),
1297           (INST8H VPR128:$src, neon_uimm8:$Imm,
1298             neon_mov_imm_LSLH_operand:$Simm)>;
1299   def : Pat<(v4i32 (opnode VPR128:$src,
1300                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1301                      neon_mov_imm_LSLH_operand:$Simm))))),
1302           (INST8H VPR128:$src, neon_uimm8:$Imm,
1303             neon_mov_imm_LSLH_operand:$Simm)>;
1304   def : Pat<(v2i64 (opnode VPR128:$src,
1305                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1306                      neon_mov_imm_LSLH_operand:$Simm))))),
1307           (INST8H VPR128:$src, neon_uimm8:$Imm,
1308             neon_mov_imm_LSLH_operand:$Simm)>;
1309 }
1310
1311 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1312 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1313
1314 // Additional patterns for Vector Bitwise OR - immedidate
1315 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1316
1317
1318 // Vector Move Immediate Masked
1319 let isReMaterializable = 1 in {
1320 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1321 }
1322
1323 // Vector Move Inverted Immediate Masked
1324 let isReMaterializable = 1 in {
1325 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1326 }
1327
1328 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1329                                 Instruction inst, RegisterOperand VPRC>
1330   : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1331                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1332
1333 // Aliases for Vector Move Immediate Shifted
1334 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1335 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1336 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1337 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1338
1339 // Aliases for Vector Move Inverted Immediate Shifted
1340 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1341 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1342 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1343 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1344
1345 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1346 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1347 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1348 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1349 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1350
1351 // Aliases for Vector Bitwise OR - immedidate
1352 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1353 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1354 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1355 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1356
1357 //  Vector Move Immediate - per byte
1358 let isReMaterializable = 1 in {
1359 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1360                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1361                                "movi\t$Rd.8b, $Imm",
1362                                [(set (v8i8 VPR64:$Rd),
1363                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1364                                 NoItinerary> {
1365   let cmode = 0b1110;
1366 }
1367
1368 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1369                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1370                                 "movi\t$Rd.16b, $Imm",
1371                                 [(set (v16i8 VPR128:$Rd),
1372                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1373                                  NoItinerary> {
1374   let cmode = 0b1110;
1375 }
1376 }
1377
1378 // Vector Move Immediate - bytemask, per double word
1379 let isReMaterializable = 1 in {
1380 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1381                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1382                                "movi\t $Rd.2d, $Imm",
1383                                [(set (v2i64 VPR128:$Rd),
1384                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1385                                NoItinerary> {
1386   let cmode = 0b1110;
1387 }
1388 }
1389
1390 // Vector Move Immediate - bytemask, one doubleword
1391
1392 let isReMaterializable = 1 in {
1393 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1394                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1395                            "movi\t $Rd, $Imm",
1396                            [(set (f64 FPR64:$Rd),
1397                               (f64 (bitconvert
1398                                 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1399                            NoItinerary> {
1400   let cmode = 0b1110;
1401 }
1402 }
1403
1404 // Vector Floating Point Move Immediate
1405
1406 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1407                       Operand immOpType, bit q, bit op>
1408   : NeonI_1VModImm<q, op,
1409                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1410                    "fmov\t$Rd" # asmlane # ", $Imm",
1411                    [(set (OpTy VPRC:$Rd),
1412                       (OpTy (Neon_fmovi (timm:$Imm))))],
1413                    NoItinerary> {
1414      let cmode = 0b1111;
1415    }
1416
1417 let isReMaterializable = 1 in {
1418 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1419 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1420 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1421 }
1422
1423 // Vector Shift (Immediate) 
1424 // Immediate in [0, 63]
1425 def imm0_63 : Operand<i32> {
1426   let ParserMatchClass = uimm6_asmoperand;
1427 }
1428
1429 // Shift Right Immediate - A shift right immediate is encoded differently from
1430 // other shift immediates. The immh:immb field is encoded like so:
1431 //
1432 //    Offset    Encoding
1433 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1434 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1435 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1436 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1437 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1438   let Name = "ShrImm" # OFFSET;
1439   let RenderMethod = "addImmOperands";
1440   let DiagnosticType = "ShrImm" # OFFSET;
1441 }
1442
1443 class shr_imm<string OFFSET> : Operand<i32> {
1444   let EncoderMethod = "getShiftRightImm" # OFFSET;
1445   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1446   let ParserMatchClass = 
1447     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1448 }
1449
1450 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1451 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1452 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1453 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1454
1455 def shr_imm8 : shr_imm<"8">;
1456 def shr_imm16 : shr_imm<"16">;
1457 def shr_imm32 : shr_imm<"32">;
1458 def shr_imm64 : shr_imm<"64">;
1459
1460 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1461                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1462   : NeonI_2VShiftImm<q, u, opcode,
1463                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1464                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1465                      [(set (Ty VPRC:$Rd),
1466                         (Ty (OpNode (Ty VPRC:$Rn),
1467                           (Ty (Neon_dupImm (i32 imm:$Imm))))))],
1468                      NoItinerary>;
1469
1470 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1471   // 64-bit vector types.
1472   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1473     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1474   }
1475
1476   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1477     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1478   }
1479
1480   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1481     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1482   }
1483
1484   // 128-bit vector types.
1485   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1486     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1487   }
1488
1489   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1490     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1491   }
1492
1493   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1494     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1495   }
1496
1497   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1498     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1499   }
1500 }
1501
1502 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1503   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1504                      OpNode> {
1505     let Inst{22-19} = 0b0001;
1506   }
1507
1508   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1509                      OpNode> {
1510     let Inst{22-20} = 0b001;
1511   }
1512
1513   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1514                      OpNode> {
1515      let Inst{22-21} = 0b01;
1516   }
1517
1518   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1519                       OpNode> {
1520                       let Inst{22-19} = 0b0001;
1521                     }
1522
1523   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1524                      OpNode> {
1525                      let Inst{22-20} = 0b001;
1526                     }
1527
1528   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1529                      OpNode> {
1530                       let Inst{22-21} = 0b01;
1531                     }
1532
1533   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1534                      OpNode> {
1535                       let Inst{22} = 0b1;
1536                     }
1537 }
1538
1539 // Shift left
1540 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1541
1542 // Shift right
1543 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1544 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1545
1546 def Neon_top16B : PatFrag<(ops node:$in),
1547                           (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1548 def Neon_top8H : PatFrag<(ops node:$in),
1549                          (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1550 def Neon_top4S : PatFrag<(ops node:$in),
1551                          (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1552
1553 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1554                    string SrcT, ValueType DestTy, ValueType SrcTy,
1555                    Operand ImmTy, SDPatternOperator ExtOp>
1556   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1557                      (ins VPR64:$Rn, ImmTy:$Imm),
1558                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1559                      [(set (DestTy VPR128:$Rd),
1560                         (DestTy (shl
1561                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1562                             (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1563                      NoItinerary>;
1564
1565 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1566                        string SrcT, ValueType DestTy, ValueType SrcTy,
1567                        int StartIndex, Operand ImmTy,
1568                        SDPatternOperator ExtOp, PatFrag getTop>
1569   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1570                      (ins VPR128:$Rn, ImmTy:$Imm),
1571                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1572                      [(set (DestTy VPR128:$Rd),
1573                         (DestTy (shl
1574                           (DestTy (ExtOp
1575                             (SrcTy (getTop VPR128:$Rn)))),
1576                               (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1577                      NoItinerary>;
1578
1579 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1580                          SDNode ExtOp> {
1581   // 64-bit vector types.
1582   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1583                          uimm3, ExtOp> {
1584     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1585   }
1586
1587   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1588                          uimm4, ExtOp> {
1589     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1590   }
1591
1592   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1593                          uimm5, ExtOp> {
1594     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1595   }
1596
1597   // 128-bit vector types
1598   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1599                               v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
1600     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1601   }
1602
1603   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1604                              v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
1605     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1606   }
1607
1608   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1609                              v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
1610     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1611   }
1612
1613   // Use other patterns to match when the immediate is 0.
1614   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1615             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1616
1617   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1618             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1619
1620   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1621             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1622
1623   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))),
1624             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1625
1626   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))),
1627             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1628
1629   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))),
1630             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1631 }
1632
1633 // Shift left long
1634 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1635 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1636
1637 // Rounding/Saturating shift
1638 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1639                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1640                   SDPatternOperator OpNode>
1641   : NeonI_2VShiftImm<q, u, opcode,
1642                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1643                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1644                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1645                         (i32 imm:$Imm))))],
1646                      NoItinerary>;
1647
1648 // shift right (vector by immediate)
1649 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1650                            SDPatternOperator OpNode> {
1651   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1652                          OpNode> {
1653     let Inst{22-19} = 0b0001;
1654   }
1655
1656   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1657                          OpNode> {
1658     let Inst{22-20} = 0b001;
1659   }
1660
1661   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1662                          OpNode> {
1663     let Inst{22-21} = 0b01;
1664   }
1665
1666   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1667                          OpNode> {
1668     let Inst{22-19} = 0b0001;
1669   }
1670
1671   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1672                         OpNode> {
1673     let Inst{22-20} = 0b001;
1674   }
1675
1676   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1677                         OpNode> {
1678     let Inst{22-21} = 0b01;
1679   }
1680
1681   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1682                         OpNode> {
1683     let Inst{22} = 0b1;
1684   }
1685 }
1686
1687 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1688                           SDPatternOperator OpNode> {
1689   // 64-bit vector types.
1690   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1691                         OpNode> {
1692     let Inst{22-19} = 0b0001;
1693   }
1694
1695   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1696                         OpNode> {
1697     let Inst{22-20} = 0b001;
1698   }
1699
1700   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1701                         OpNode> {
1702     let Inst{22-21} = 0b01;
1703   }
1704
1705   // 128-bit vector types.
1706   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1707                          OpNode> {
1708     let Inst{22-19} = 0b0001;
1709   }
1710
1711   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1712                         OpNode> {
1713     let Inst{22-20} = 0b001;
1714   }
1715
1716   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1717                         OpNode> {
1718     let Inst{22-21} = 0b01;
1719   }
1720
1721   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1722                         OpNode> {
1723     let Inst{22} = 0b1;
1724   }
1725 }
1726
1727 // Rounding shift right
1728 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1729                                 int_aarch64_neon_vsrshr>;
1730 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1731                                 int_aarch64_neon_vurshr>;
1732
1733 // Saturating shift left unsigned
1734 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1735
1736 // Saturating shift left
1737 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1738 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1739
1740 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1741                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1742                   SDNode OpNode>
1743   : NeonI_2VShiftImm<q, u, opcode,
1744            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1745            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1746            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1747               (Ty (OpNode (Ty VPRC:$Rn),
1748                 (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
1749            NoItinerary> {
1750   let Constraints = "$src = $Rd";
1751 }
1752
1753 // Shift Right accumulate
1754 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1755   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1756                         OpNode> {
1757     let Inst{22-19} = 0b0001;
1758   }
1759
1760   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1761                         OpNode> {
1762     let Inst{22-20} = 0b001;
1763   }
1764
1765   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1766                         OpNode> {
1767     let Inst{22-21} = 0b01;
1768   }
1769
1770   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1771                          OpNode> {
1772     let Inst{22-19} = 0b0001;
1773   }
1774
1775   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1776                         OpNode> {
1777     let Inst{22-20} = 0b001;
1778   }
1779
1780   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1781                         OpNode> {
1782     let Inst{22-21} = 0b01;
1783   }
1784
1785   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1786                         OpNode> {
1787     let Inst{22} = 0b1;
1788   }
1789 }
1790
1791 // Shift right and accumulate
1792 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1793 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1794
1795 // Rounding shift accumulate
1796 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1797                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1798                     SDPatternOperator OpNode>
1799   : NeonI_2VShiftImm<q, u, opcode,
1800                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1801                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1802                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1803                         (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1804                      NoItinerary> {
1805   let Constraints = "$src = $Rd";
1806 }
1807
1808 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1809                              SDPatternOperator OpNode> {
1810   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1811                           OpNode> {
1812     let Inst{22-19} = 0b0001;
1813   }
1814
1815   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1816                           OpNode> {
1817     let Inst{22-20} = 0b001;
1818   }
1819
1820   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1821                           OpNode> {
1822     let Inst{22-21} = 0b01;
1823   }
1824
1825   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1826                            OpNode> {
1827     let Inst{22-19} = 0b0001;
1828   }
1829
1830   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1831                           OpNode> {
1832     let Inst{22-20} = 0b001;
1833   }
1834
1835   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1836                           OpNode> {
1837     let Inst{22-21} = 0b01;
1838   }
1839
1840   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1841                           OpNode> {
1842     let Inst{22} = 0b1;
1843   }
1844 }
1845
1846 // Rounding shift right and accumulate
1847 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1848 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1849
1850 // Shift insert by immediate
1851 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1852                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1853                   SDPatternOperator OpNode>
1854     : NeonI_2VShiftImm<q, u, opcode,
1855            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1856            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1857            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1858              (i32 imm:$Imm))))],
1859            NoItinerary> {
1860   let Constraints = "$src = $Rd";
1861 }
1862
1863 // shift left insert (vector by immediate)
1864 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1865   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1866                         int_aarch64_neon_vsli> {
1867     let Inst{22-19} = 0b0001;
1868   }
1869
1870   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1871                         int_aarch64_neon_vsli> {
1872     let Inst{22-20} = 0b001;
1873   }
1874
1875   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1876                         int_aarch64_neon_vsli> {
1877     let Inst{22-21} = 0b01;
1878   }
1879
1880     // 128-bit vector types
1881   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1882                          int_aarch64_neon_vsli> {
1883     let Inst{22-19} = 0b0001;
1884   }
1885
1886   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1887                         int_aarch64_neon_vsli> {
1888     let Inst{22-20} = 0b001;
1889   }
1890
1891   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1892                         int_aarch64_neon_vsli> {
1893     let Inst{22-21} = 0b01;
1894   }
1895
1896   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1897                         int_aarch64_neon_vsli> {
1898     let Inst{22} = 0b1;
1899   }
1900 }
1901
1902 // shift right insert (vector by immediate)
1903 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1904     // 64-bit vector types.
1905   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1906                         int_aarch64_neon_vsri> {
1907     let Inst{22-19} = 0b0001;
1908   }
1909
1910   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1911                         int_aarch64_neon_vsri> {
1912     let Inst{22-20} = 0b001;
1913   }
1914
1915   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1916                         int_aarch64_neon_vsri> {
1917     let Inst{22-21} = 0b01;
1918   }
1919
1920     // 128-bit vector types
1921   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1922                          int_aarch64_neon_vsri> {
1923     let Inst{22-19} = 0b0001;
1924   }
1925
1926   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1927                         int_aarch64_neon_vsri> {
1928     let Inst{22-20} = 0b001;
1929   }
1930
1931   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1932                         int_aarch64_neon_vsri> {
1933     let Inst{22-21} = 0b01;
1934   }
1935
1936   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1937                         int_aarch64_neon_vsri> {
1938     let Inst{22} = 0b1;
1939   }
1940 }
1941
1942 // Shift left and insert
1943 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1944
1945 // Shift right and insert
1946 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1947
1948 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1949                     string SrcT, Operand ImmTy>
1950   : NeonI_2VShiftImm<q, u, opcode,
1951                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1952                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1953                      [], NoItinerary>;
1954
1955 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1956                        string SrcT, Operand ImmTy>
1957   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1958                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
1959                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1960                      [], NoItinerary> {
1961   let Constraints = "$src = $Rd";
1962 }
1963
1964 // left long shift by immediate
1965 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
1966   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
1967     let Inst{22-19} = 0b0001;
1968   }
1969
1970   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
1971     let Inst{22-20} = 0b001;
1972   }
1973
1974   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
1975     let Inst{22-21} = 0b01;
1976   }
1977
1978   // Shift Narrow High
1979   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
1980                               shr_imm8> {
1981     let Inst{22-19} = 0b0001;
1982   }
1983
1984   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
1985                              shr_imm16> {
1986     let Inst{22-20} = 0b001;
1987   }
1988
1989   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
1990                              shr_imm32> {
1991     let Inst{22-21} = 0b01;
1992   }
1993 }
1994
1995 // Shift right narrow
1996 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
1997
1998 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
1999 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2000 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2001 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2002 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2003 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2004 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2005 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2006
2007 def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
2008                            (v2i64 (concat_vectors (v1i64 node:$Rm),
2009                                                   (v1i64 node:$Rn)))>;
2010
2011 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2012                              (v8i16 (srl (v8i16 node:$lhs),
2013                                (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2014 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2015                              (v4i32 (srl (v4i32 node:$lhs),
2016                                (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2017 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2018                              (v2i64 (srl (v2i64 node:$lhs),
2019                                (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2020 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2021                              (v8i16 (sra (v8i16 node:$lhs),
2022                                (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2023 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2024                              (v4i32 (sra (v4i32 node:$lhs),
2025                                (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2026 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2027                              (v2i64 (sra (v2i64 node:$lhs),
2028                                (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2029
2030 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2031 multiclass Neon_shiftNarrow_patterns<string shr> {
2032   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2033               imm:$Imm))),
2034             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2035   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2036               imm:$Imm))),
2037             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2038   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2039               imm:$Imm))),
2040             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2041
2042   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2043               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2044                 VPR128:$Rn, imm:$Imm)))))),
2045             (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2046                          VPR128:$Rn, imm:$Imm)>;
2047   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2048               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2049                 VPR128:$Rn, imm:$Imm)))))),
2050             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2051                         VPR128:$Rn, imm:$Imm)>;
2052   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2053               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2054                 VPR128:$Rn, imm:$Imm)))))),
2055             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2056                         VPR128:$Rn, imm:$Imm)>;
2057 }
2058
2059 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2060   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2061             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2062   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2063             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2064   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2065             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2066
2067   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2068                 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2069             (!cast<Instruction>(prefix # "_16B")
2070                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2071                 VPR128:$Rn, imm:$Imm)>;
2072   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2073                 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2074             (!cast<Instruction>(prefix # "_8H")
2075                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2076                 VPR128:$Rn, imm:$Imm)>;
2077   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2078                 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2079             (!cast<Instruction>(prefix # "_4S")
2080                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2081                   VPR128:$Rn, imm:$Imm)>;
2082 }
2083
2084 defm : Neon_shiftNarrow_patterns<"lshr">;
2085 defm : Neon_shiftNarrow_patterns<"ashr">;
2086
2087 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2088 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2089 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2090 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2091 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2092 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2093 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2094
2095 // Convert fix-point and float-pointing
2096 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2097                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2098                 Operand ImmTy, SDPatternOperator IntOp>
2099   : NeonI_2VShiftImm<q, u, opcode,
2100                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2101                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2102                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2103                        (i32 imm:$Imm))))],
2104                      NoItinerary>;
2105
2106 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2107                               SDPatternOperator IntOp> {
2108   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2109                       shr_imm32, IntOp> {
2110     let Inst{22-21} = 0b01;
2111   }
2112
2113   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2114                       shr_imm32, IntOp> {
2115     let Inst{22-21} = 0b01;
2116   }
2117
2118   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2119                       shr_imm64, IntOp> {
2120     let Inst{22} = 0b1;
2121   }
2122 }
2123
2124 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2125                               SDPatternOperator IntOp> {
2126   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2127                       shr_imm32, IntOp> {
2128     let Inst{22-21} = 0b01;
2129   }
2130
2131   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2132                       shr_imm32, IntOp> {
2133     let Inst{22-21} = 0b01;
2134   }
2135
2136   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2137                       shr_imm64, IntOp> {
2138     let Inst{22} = 0b1;
2139   }
2140 }
2141
2142 // Convert fixed-point to floating-point
2143 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2144                                    int_arm_neon_vcvtfxs2fp>;
2145 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2146                                    int_arm_neon_vcvtfxu2fp>;
2147
2148 // Convert floating-point to fixed-point
2149 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2150                                    int_arm_neon_vcvtfp2fxs>;
2151 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2152                                    int_arm_neon_vcvtfp2fxu>;
2153
2154 multiclass Neon_sshll2_0<SDNode ext>
2155 {
2156   def _v8i8  : PatFrag<(ops node:$Rn),
2157                        (v8i16 (ext (v8i8 (Neon_top16B node:$Rn))))>;
2158   def _v4i16 : PatFrag<(ops node:$Rn),
2159                        (v4i32 (ext (v4i16 (Neon_top8H node:$Rn))))>;
2160   def _v2i32 : PatFrag<(ops node:$Rn),
2161                        (v2i64 (ext (v2i32 (Neon_top4S node:$Rn))))>;
2162 }
2163
2164 defm NI_sext_high : Neon_sshll2_0<sext>;
2165 defm NI_zext_high : Neon_sshll2_0<zext>;
2166
2167 // The followings are for instruction class (3V Diff)
2168
2169 // normal long/long2 pattern
2170 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2171                  string asmop, string ResS, string OpS,
2172                  SDPatternOperator opnode, SDPatternOperator ext,
2173                  RegisterOperand OpVPR,
2174                  ValueType ResTy, ValueType OpTy>
2175   : NeonI_3VDiff<q, u, size, opcode,
2176                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2177                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2178                  [(set (ResTy VPR128:$Rd),
2179                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2180                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2181                  NoItinerary>;
2182
2183 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2184                         string asmop, SDPatternOperator opnode,
2185                         bit Commutable = 0>
2186 {
2187   let isCommutable = Commutable in {
2188     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2189                            opnode, sext, VPR64, v8i16, v8i8>;
2190     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2191                            opnode, sext, VPR64, v4i32, v4i16>;
2192     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2193                            opnode, sext, VPR64, v2i64, v2i32>;
2194   }
2195 }
2196
2197 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2198                          string asmop, SDPatternOperator opnode,
2199                          bit Commutable = 0>
2200 {
2201   let isCommutable = Commutable in {
2202     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2203                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2204     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2205                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2206     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2207                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2208   }
2209 }
2210
2211 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2212                           string asmop, SDPatternOperator opnode,
2213                           bit Commutable = 0>
2214 {
2215   let isCommutable = Commutable in {
2216     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2217                            opnode, zext, VPR64, v8i16, v8i8>;
2218     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2219                            opnode, zext, VPR64, v4i32, v4i16>;
2220     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2221                            opnode, zext, VPR64, v2i64, v2i32>;
2222   }
2223 }
2224
2225 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2226                            string asmop, SDPatternOperator opnode,
2227                            bit Commutable = 0>
2228 {
2229   let isCommutable = Commutable in {
2230     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2231                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2232     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2233                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2234     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2235                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2236   }
2237 }
2238
2239 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2240 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2241
2242 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2243 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2244
2245 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2246 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2247
2248 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2249 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2250
2251 // normal wide/wide2 pattern
2252 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2253                  string asmop, string ResS, string OpS,
2254                  SDPatternOperator opnode, SDPatternOperator ext,
2255                  RegisterOperand OpVPR,
2256                  ValueType ResTy, ValueType OpTy>
2257   : NeonI_3VDiff<q, u, size, opcode,
2258                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2259                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2260                  [(set (ResTy VPR128:$Rd),
2261                     (ResTy (opnode (ResTy VPR128:$Rn),
2262                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2263                  NoItinerary>;
2264
2265 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2266                         string asmop, SDPatternOperator opnode>
2267 {
2268   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2269                          opnode, sext, VPR64, v8i16, v8i8>;
2270   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2271                          opnode, sext, VPR64, v4i32, v4i16>;
2272   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2273                          opnode, sext, VPR64, v2i64, v2i32>;
2274 }
2275
2276 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2277 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2278
2279 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2280                          string asmop, SDPatternOperator opnode>
2281 {
2282   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2283                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2284   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2285                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2286   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2287                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2288 }
2289
2290 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2291 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2292
2293 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2294                         string asmop, SDPatternOperator opnode>
2295 {
2296   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2297                          opnode, zext, VPR64, v8i16, v8i8>;
2298   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2299                          opnode, zext, VPR64, v4i32, v4i16>;
2300   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2301                          opnode, zext, VPR64, v2i64, v2i32>;
2302 }
2303
2304 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2305 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2306
2307 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2308                            string asmop, SDPatternOperator opnode>
2309 {
2310   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2311                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2312   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2313                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2314   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2315                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2316 }
2317
2318 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2319 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2320
2321 // Get the high half part of the vector element.
2322 multiclass NeonI_get_high
2323 {
2324   def _8h : PatFrag<(ops node:$Rn),
2325                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2326                                              (v8i16 (Neon_dupImm 8))))))>;
2327   def _4s : PatFrag<(ops node:$Rn),
2328                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2329                                               (v4i32 (Neon_dupImm 16))))))>;
2330   def _2d : PatFrag<(ops node:$Rn),
2331                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2332                                               (v2i64 (Neon_dupImm 32))))))>;
2333 }
2334
2335 defm NI_get_hi : NeonI_get_high;
2336
2337 // pattern for addhn/subhn with 2 operands
2338 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2339                            string asmop, string ResS, string OpS,
2340                            SDPatternOperator opnode, SDPatternOperator get_hi,
2341                            ValueType ResTy, ValueType OpTy>
2342   : NeonI_3VDiff<q, u, size, opcode,
2343                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2344                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2345                  [(set (ResTy VPR64:$Rd),
2346                     (ResTy (get_hi
2347                       (OpTy (opnode (OpTy VPR128:$Rn),
2348                                     (OpTy VPR128:$Rm))))))],
2349                  NoItinerary>;
2350
2351 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2352                                 string asmop, SDPatternOperator opnode,
2353                                 bit Commutable = 0>
2354 {
2355   let isCommutable = Commutable in {
2356     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2357                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2358     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2359                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2360     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2361                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2362   }
2363 }
2364
2365 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2366 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2367
2368 // pattern for operation with 2 operands
2369 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2370                     string asmop, string ResS, string OpS,
2371                     SDPatternOperator opnode,
2372                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2373                     ValueType ResTy, ValueType OpTy>
2374   : NeonI_3VDiff<q, u, size, opcode,
2375                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2376                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2377                  [(set (ResTy ResVPR:$Rd),
2378                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2379                  NoItinerary>;
2380
2381 // normal narrow pattern
2382 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2383                           string asmop, SDPatternOperator opnode,
2384                           bit Commutable = 0>
2385 {
2386   let isCommutable = Commutable in {
2387     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2388                               opnode, VPR64, VPR128, v8i8, v8i16>;
2389     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2390                               opnode, VPR64, VPR128, v4i16, v4i32>;
2391     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2392                               opnode, VPR64, VPR128, v2i32, v2i64>;
2393   }
2394 }
2395
2396 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2397 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2398
2399 // pattern for acle intrinsic with 3 operands
2400 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2401                      string asmop, string ResS, string OpS>
2402   : NeonI_3VDiff<q, u, size, opcode,
2403                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2404                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2405                  [], NoItinerary> {
2406   let Constraints = "$src = $Rd";
2407   let neverHasSideEffects = 1;
2408 }
2409
2410 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2411                              string asmop> {
2412   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2413   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2414   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2415 }
2416
2417 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2418 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2419
2420 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2421 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2422
2423 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2424 // part.
2425 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2426                         SDPatternOperator coreop>
2427   : Pat<(Neon_combine (v1i64 VPR64:$src),
2428                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2429                                                         (SrcTy VPR128:$Rm)))))),
2430         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2431               VPR128:$Rn, VPR128:$Rm)>;
2432
2433 // addhn2 patterns
2434 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2435           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2436 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2437           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2438 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2439           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2440
2441 // subhn2 patterns
2442 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2443           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2444 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2445           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2446 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2447           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2448
2449 // raddhn2 patterns
2450 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2451 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2452 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2453
2454 // rsubhn2 patterns
2455 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2456 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2457 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2458
2459 // pattern that need to extend result
2460 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2461                      string asmop, string ResS, string OpS,
2462                      SDPatternOperator opnode,
2463                      RegisterOperand OpVPR,
2464                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2465   : NeonI_3VDiff<q, u, size, opcode,
2466                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2467                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2468                  [(set (ResTy VPR128:$Rd),
2469                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2470                                                 (OpTy OpVPR:$Rm))))))],
2471                  NoItinerary>;
2472
2473 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2474                            string asmop, SDPatternOperator opnode,
2475                            bit Commutable = 0>
2476 {
2477   let isCommutable = Commutable in {
2478     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2479                                opnode, VPR64, v8i16, v8i8, v8i8>;
2480     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2481                                opnode, VPR64, v4i32, v4i16, v4i16>;
2482     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2483                                opnode, VPR64, v2i64, v2i32, v2i32>;
2484   }
2485 }
2486
2487 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2488 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2489
2490 multiclass NeonI_Op_High<SDPatternOperator op>
2491 {
2492   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2493                      (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>;
2494   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
2495                      (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>;
2496   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
2497                      (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>;
2498 }
2499
2500 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2501 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2502 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2503 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2504 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2505 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2506
2507 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2508                             string asmop, string opnode,
2509                             bit Commutable = 0>
2510 {
2511   let isCommutable = Commutable in {
2512     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2513                                 !cast<PatFrag>(opnode # "_16B"),
2514                                 VPR128, v8i16, v16i8, v8i8>;
2515     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2516                                 !cast<PatFrag>(opnode # "_8H"),
2517                                 VPR128, v4i32, v8i16, v4i16>;
2518     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2519                                 !cast<PatFrag>(opnode # "_4S"),
2520                                 VPR128, v2i64, v4i32, v2i32>;
2521   }
2522 }
2523
2524 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2525 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2526
2527 // For pattern that need two operators being chained.
2528 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2529                      string asmop, string ResS, string OpS, 
2530                      SDPatternOperator opnode, SDPatternOperator subop,
2531                      RegisterOperand OpVPR,
2532                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2533   : NeonI_3VDiff<q, u, size, opcode,
2534                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2535                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 
2536                  [(set (ResTy VPR128:$Rd),
2537                     (ResTy (opnode
2538                       (ResTy VPR128:$src), 
2539                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2540                                                  (OpTy OpVPR:$Rm))))))))],
2541                  NoItinerary> {
2542   let Constraints = "$src = $Rd";
2543 }
2544
2545 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2546                              string asmop, SDPatternOperator opnode,
2547                              SDPatternOperator subop>
2548 {
2549   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2550                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2551   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2552                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2553   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2554                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2555 }
2556
2557 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2558                                    add, int_arm_neon_vabds>;
2559 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2560                                    add, int_arm_neon_vabdu>;
2561
2562 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2563                               string asmop, SDPatternOperator opnode,
2564                               string subop>
2565 {
2566   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2567                              opnode, !cast<PatFrag>(subop # "_16B"), 
2568                              VPR128, v8i16, v16i8, v8i8>;
2569   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2570                              opnode, !cast<PatFrag>(subop # "_8H"), 
2571                              VPR128, v4i32, v8i16, v4i16>;
2572   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2573                              opnode, !cast<PatFrag>(subop # "_4S"), 
2574                              VPR128, v2i64, v4i32, v2i32>;
2575 }
2576
2577 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2578                                      "NI_sabdl_hi">;
2579 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2580                                      "NI_uabdl_hi">;
2581
2582 // Long pattern with 2 operands
2583 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2584                           string asmop, SDPatternOperator opnode,
2585                           bit Commutable = 0>
2586 {
2587   let isCommutable = Commutable in {
2588     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2589                               opnode, VPR128, VPR64, v8i16, v8i8>;
2590     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2591                               opnode, VPR128, VPR64, v4i32, v4i16>;
2592     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2593                               opnode, VPR128, VPR64, v2i64, v2i32>;
2594   }
2595 }
2596
2597 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2598 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2599
2600 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2601                            string asmop, string ResS, string OpS,
2602                            SDPatternOperator opnode,
2603                            ValueType ResTy, ValueType OpTy>
2604   : NeonI_3VDiff<q, u, size, opcode,
2605                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2606                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2607                  [(set (ResTy VPR128:$Rd),
2608                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2609                  NoItinerary>;
2610
2611
2612 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2613                                    string asmop, 
2614                                    string opnode,
2615                                    bit Commutable = 0>
2616 {
2617   let isCommutable = Commutable in {
2618     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2619                                       !cast<PatFrag>(opnode # "_16B"),
2620                                       v8i16, v16i8>;
2621     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2622                                      !cast<PatFrag>(opnode # "_8H"),
2623                                      v4i32, v8i16>;
2624     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2625                                      !cast<PatFrag>(opnode # "_4S"),
2626                                      v2i64, v4i32>;
2627   }
2628 }
2629
2630 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2631                                          "NI_smull_hi", 1>;
2632 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2633                                          "NI_umull_hi", 1>;
2634
2635 // Long pattern with 3 operands
2636 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2637                      string asmop, string ResS, string OpS,
2638                      SDPatternOperator opnode,
2639                      ValueType ResTy, ValueType OpTy>
2640   : NeonI_3VDiff<q, u, size, opcode,
2641                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2642                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2643                  [(set (ResTy VPR128:$Rd),
2644                     (ResTy (opnode
2645                       (ResTy VPR128:$src),
2646                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2647                NoItinerary> {
2648   let Constraints = "$src = $Rd";
2649 }
2650
2651 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2652                              string asmop, SDPatternOperator opnode>
2653 {
2654   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2655                              opnode, v8i16, v8i8>;
2656   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2657                              opnode, v4i32, v4i16>;
2658   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2659                              opnode, v2i64, v2i32>;
2660 }
2661
2662 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2663                          (add node:$Rd,
2664                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2665
2666 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2667                          (add node:$Rd,
2668                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2669
2670 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2671                          (sub node:$Rd,
2672                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2673
2674 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2675                          (sub node:$Rd,
2676                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2677
2678 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2679 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2680
2681 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2682 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2683
2684 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2685                            string asmop, string ResS, string OpS,
2686                            SDPatternOperator subop, SDPatternOperator opnode,
2687                            RegisterOperand OpVPR,
2688                            ValueType ResTy, ValueType OpTy>
2689   : NeonI_3VDiff<q, u, size, opcode,
2690                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2691                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2692                [(set (ResTy VPR128:$Rd),
2693                   (ResTy (subop
2694                     (ResTy VPR128:$src),
2695                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2696                NoItinerary> {
2697   let Constraints = "$src = $Rd";
2698 }
2699
2700 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2701                                    string asmop, 
2702                                    SDPatternOperator subop,
2703                                    string opnode>
2704 {
2705   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2706                                     subop, !cast<PatFrag>(opnode # "_16B"),
2707                                     VPR128, v8i16, v16i8>;
2708   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2709                                    subop, !cast<PatFrag>(opnode # "_8H"), 
2710                                    VPR128, v4i32, v8i16>;
2711   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2712                                    subop, !cast<PatFrag>(opnode # "_4S"),
2713                                    VPR128, v2i64, v4i32>;
2714 }
2715
2716 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2717                                           add, "NI_smull_hi">;
2718 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2719                                           add, "NI_umull_hi">;
2720
2721 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2722                                           sub, "NI_smull_hi">;
2723 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2724                                           sub, "NI_umull_hi">;
2725
2726 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2727                                     string asmop, SDPatternOperator opnode>
2728 {
2729   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2730                                    opnode, int_arm_neon_vqdmull,
2731                                    VPR64, v4i32, v4i16>;
2732   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2733                                    opnode, int_arm_neon_vqdmull,
2734                                    VPR64, v2i64, v2i32>;
2735 }
2736
2737 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2738                                            int_arm_neon_vqadds>;
2739 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2740                                            int_arm_neon_vqsubs>;
2741
2742 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2743                          string asmop, SDPatternOperator opnode,
2744                          bit Commutable = 0>
2745 {
2746   let isCommutable = Commutable in {
2747     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2748                               opnode, VPR128, VPR64, v4i32, v4i16>;
2749     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2750                               opnode, VPR128, VPR64, v2i64, v2i32>;
2751   }
2752 }
2753
2754 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2755                                 int_arm_neon_vqdmull, 1>;
2756
2757 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2758                                    string asmop, 
2759                                    string opnode,
2760                                    bit Commutable = 0>
2761 {
2762   let isCommutable = Commutable in {
2763     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2764                                      !cast<PatFrag>(opnode # "_8H"),
2765                                      v4i32, v8i16>;
2766     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2767                                      !cast<PatFrag>(opnode # "_4S"),
2768                                      v2i64, v4i32>;
2769   }
2770 }
2771
2772 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", 
2773                                            "NI_qdmull_hi", 1>;
2774
2775 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2776                                      string asmop, 
2777                                      SDPatternOperator opnode>
2778 {
2779   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2780                                    opnode, NI_qdmull_hi_8H,
2781                                    VPR128, v4i32, v8i16>;
2782   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2783                                    opnode, NI_qdmull_hi_4S,
2784                                    VPR128, v2i64, v4i32>;
2785 }
2786
2787 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2788                                              int_arm_neon_vqadds>;
2789 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2790                                              int_arm_neon_vqsubs>;
2791
2792 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2793                                    string asmop, SDPatternOperator opnode,
2794                                    bit Commutable = 0>
2795 {
2796   let isCommutable = Commutable in {
2797     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2798                               opnode, VPR128, VPR64, v8i16, v8i8>;
2799   }
2800 }
2801
2802 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2803
2804 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
2805                                    string asmop, 
2806                                    string opnode,
2807                                    bit Commutable = 0>
2808 {
2809   let isCommutable = Commutable in {
2810     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2811                                       !cast<PatFrag>(opnode # "_16B"),
2812                                       v8i16, v16i8>;
2813   }
2814 }
2815
2816 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
2817                                          "NI_pmull_hi", 1>;
2818
2819 // End of implementation for instruction class (3V Diff)
2820
2821 // Scalar Arithmetic
2822
2823 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
2824   : NeonI_Scalar3Same<u, 0b11, opcode,
2825                 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2826                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2827                 [],
2828                 NoItinerary>;
2829
2830 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
2831                                         string asmop, bit Commutable = 0>
2832 {
2833   let isCommutable = Commutable in {
2834     def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
2835                                 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
2836                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2837                                 [],
2838                                 NoItinerary>;
2839     def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
2840                                 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
2841                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2842                                 [],
2843                                 NoItinerary>;
2844     def sss : NeonI_Scalar3Same<u, 0b10, opcode,
2845                                 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
2846                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2847                                 [],
2848                                 NoItinerary>;
2849     def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
2850                                (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2851                                !strconcat(asmop, " $Rd, $Rn, $Rm"),
2852                                [],
2853                                NoItinerary>;
2854   }
2855 }
2856
2857 class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
2858   : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
2859         (INSTD VPR64:$Rn, VPR64:$Rm)>;
2860
2861 // Scalar Integer Add
2862 let isCommutable = 1 in {
2863 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
2864 }
2865
2866 // Scalar Integer Sub
2867 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
2868
2869 // Pattern for Scalar Integer Add and Sub with D register
2870 def : Neon_Scalar_D_size_patterns<add, ADDddd>;
2871 def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
2872
2873 // Scalar Integer Saturating Add (Signed, Unsigned)
2874 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
2875 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
2876
2877 // Scalar Integer Saturating Sub (Signed, Unsigned)
2878 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
2879 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
2880
2881 // Patterns for Scalar Integer Saturating Add, Sub with D register only
2882 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
2883 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
2884 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
2885 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
2886
2887 // Scalar Integer Shift Left (Signed, Unsigned)
2888 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
2889 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
2890
2891 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
2892 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
2893 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
2894
2895 // Scalar Integer Rouding Shift Left (Signed, Unsigned)
2896 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
2897 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
2898
2899 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2900 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
2901 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
2902
2903 // Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
2904 // Rounding Shift Left, Rounding Saturating Shift Left with D register only
2905 def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
2906 def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
2907 def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
2908 def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
2909 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
2910 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
2911 def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
2912 def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
2913 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
2914 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
2915
2916
2917 //===----------------------------------------------------------------------===//
2918 // Non-Instruction Patterns
2919 //===----------------------------------------------------------------------===//
2920
2921 // 64-bit vector bitcasts...
2922
2923 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
2924 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
2925 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
2926 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
2927
2928 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
2929 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
2930 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
2931 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
2932
2933 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
2934 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
2935 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
2936 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
2937
2938 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
2939 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
2940 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
2941 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
2942
2943 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
2944 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
2945 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
2946 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
2947
2948 // ..and 128-bit vector bitcasts...
2949
2950 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
2951 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
2952 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
2953 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
2954 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
2955
2956 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
2957 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
2958 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
2959 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
2960 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
2961
2962 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
2963 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
2964 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
2965 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
2966 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
2967
2968 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
2969 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
2970 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
2971 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
2972 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
2973
2974 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
2975 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
2976 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
2977 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
2978 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
2979
2980 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
2981 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
2982 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
2983 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
2984 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
2985
2986
2987 // ...and scalar bitcasts...
2988
2989 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
2990 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
2991 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
2992
2993 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
2994 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
2995 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
2996 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
2997 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
2998
2999 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
3000 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
3001 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
3002 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
3003 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
3004 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
3005
3006 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
3007 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
3008 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
3009 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
3010 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
3011
3012 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
3013 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
3014 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
3015 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
3016 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
3017 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;