Initial support for Neon scalar instructions.
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl       : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18                       [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19                       SDTCisSameAs<0, 3>]>>;
20
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
23
24 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
25
26 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
27
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
31
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
35
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
39
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
43
44 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, 
45                     [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
46
47 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
48                                      SDTCisVT<2, i32>]>;
49 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
50 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
51
52
53 //===----------------------------------------------------------------------===//
54 // Multiclasses
55 //===----------------------------------------------------------------------===//
56
57 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
58                                 string asmop, SDPatternOperator opnode8B,
59                                 SDPatternOperator opnode16B,
60                                 bit Commutable = 0>
61 {
62   let isCommutable = Commutable in {
63     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
64                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66                [(set (v8i8 VPR64:$Rd),
67                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
68                NoItinerary>;
69
70     def _16B : NeonI_3VSame<0b1, u, size, opcode,
71                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73                [(set (v16i8 VPR128:$Rd),
74                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
75                NoItinerary>;
76   }
77
78 }
79
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81                                   string asmop, SDPatternOperator opnode,
82                                   bit Commutable = 0>
83 {
84   let isCommutable = Commutable in {
85     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
86               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
87               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
88               [(set (v4i16 VPR64:$Rd),
89                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
90               NoItinerary>;
91
92     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
93               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
94               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
95               [(set (v8i16 VPR128:$Rd),
96                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
97               NoItinerary>;
98
99     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
100               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
101               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
102               [(set (v2i32 VPR64:$Rd),
103                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
104               NoItinerary>;
105
106     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
107               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
108               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
109               [(set (v4i32 VPR128:$Rd),
110                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
111               NoItinerary>;
112   }
113 }
114 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
115                                   string asmop, SDPatternOperator opnode,
116                                   bit Commutable = 0>
117    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable>
118 {
119   let isCommutable = Commutable in {
120     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
121                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123                [(set (v8i8 VPR64:$Rd),
124                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
125                NoItinerary>;
126
127     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
128                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130                [(set (v16i8 VPR128:$Rd),
131                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
132                NoItinerary>;
133   }
134 }
135
136 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
137                                    string asmop, SDPatternOperator opnode,
138                                    bit Commutable = 0>
139    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable>
140 {
141   let isCommutable = Commutable in {
142     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
143               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
144               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
145               [(set (v2i64 VPR128:$Rd),
146                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
147               NoItinerary>;
148   }
149 }
150
151 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
152 // but Result types can be integer or floating point types.
153 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
154                                  string asmop, SDPatternOperator opnode2S,
155                                  SDPatternOperator opnode4S,
156                                  SDPatternOperator opnode2D,
157                                  ValueType ResTy2S, ValueType ResTy4S,
158                                  ValueType ResTy2D, bit Commutable = 0>
159 {
160   let isCommutable = Commutable in {
161     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
162               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
163               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
164               [(set (ResTy2S VPR64:$Rd),
165                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
166               NoItinerary>;
167
168     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
169               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
170               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
171               [(set (ResTy4S VPR128:$Rd),
172                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
173               NoItinerary>;
174
175     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
176               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
177               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
178               [(set (ResTy2D VPR128:$Rd),
179                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
180                NoItinerary>;
181   }
182 }
183
184 //===----------------------------------------------------------------------===//
185 // Instruction Definitions
186 //===----------------------------------------------------------------------===//
187
188 // Vector Arithmetic Instructions
189
190 // Vector Add (Integer and Floating-Point)
191
192 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
193 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
194                                      v2f32, v4f32, v2f64, 1>;
195
196 // Vector Sub (Integer and Floating-Point)
197
198 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
199 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
200                                      v2f32, v4f32, v2f64, 0>;
201
202 // Vector Multiply (Integer and Floating-Point)
203
204 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
205 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
206                                      v2f32, v4f32, v2f64, 1>;
207
208 // Vector Multiply (Polynomial)
209
210 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
211                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
212
213 // Vector Multiply-accumulate and Multiply-subtract (Integer)
214
215 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
216 // two operands constraints.
217 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
218   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, 
219   bits<5> opcode, SDPatternOperator opnode>
220   : NeonI_3VSame<q, u, size, opcode,
221     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
222     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
223     [(set (OpTy VPRC:$Rd),
224        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
225     NoItinerary> {
226   let Constraints = "$src = $Rd";
227 }
228
229 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
230                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
231
232 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
234
235
236 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
237                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
238 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
239                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
240 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
241                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
242 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
243                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
244 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
245                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
246 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
247                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
248
249 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
250                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
251 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
252                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
253 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
254                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
255 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
256                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
257 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
258                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
259 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
260                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
261
262 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
263
264 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
265                         (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
266
267 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268                         (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
269
270 let Predicates = [HasNEON, UseFusedMAC] in {
271 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
272                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
273 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
274                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
275 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
276                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
277
278 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
279                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
280 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
281                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
282 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
283                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
284 }
285
286 // We're also allowed to match the fma instruction regardless of compile
287 // options.
288 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
289           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
290 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
291           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
292 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
293           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
294
295 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
296           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
297 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
298           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
299 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
300           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
301
302 // Vector Divide (Floating-Point)
303
304 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
305                                      v2f32, v4f32, v2f64, 0>;
306
307 // Vector Bitwise Operations
308
309 // Vector Bitwise AND
310
311 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
312
313 // Vector Bitwise Exclusive OR
314
315 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
316
317 // Vector Bitwise OR
318
319 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
320
321 // ORR disassembled as MOV if Vn==Vm
322
323 // Vector Move - register
324 // Alias for ORR if Vn=Vm.
325 // FIXME: This is actually the preferred syntax but TableGen can't deal with
326 // custom printing of aliases.
327 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
328                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
329 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
330                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
331
332 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
333   ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
334   ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
335   unsigned EltBits;
336   uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
337     OpCmodeConstVal->getZExtValue(), EltBits);
338   return (EltBits == 8 && EltVal == 0xff);
339 }]>;
340
341
342 def Neon_not8B  : PatFrag<(ops node:$in),
343                           (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
344 def Neon_not16B : PatFrag<(ops node:$in),
345                           (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
346
347 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
348                          (or node:$Rn, (Neon_not8B node:$Rm))>;
349
350 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
351                           (or node:$Rn, (Neon_not16B node:$Rm))>;
352
353 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
354                          (and node:$Rn, (Neon_not8B node:$Rm))>;
355
356 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
357                           (and node:$Rn, (Neon_not16B node:$Rm))>;
358
359
360 // Vector Bitwise OR NOT - register
361
362 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
363                                    Neon_orn8B, Neon_orn16B, 0>;
364
365 // Vector Bitwise Bit Clear (AND NOT) - register
366
367 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
368                                    Neon_bic8B, Neon_bic16B, 0>;
369
370 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
371                                    SDPatternOperator opnode16B,
372                                    Instruction INST8B,
373                                    Instruction INST16B> {
374   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
375             (INST8B VPR64:$Rn, VPR64:$Rm)>;
376   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
377             (INST8B VPR64:$Rn, VPR64:$Rm)>;
378   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
379             (INST8B VPR64:$Rn, VPR64:$Rm)>;
380   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
381             (INST16B VPR128:$Rn, VPR128:$Rm)>;
382   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
383             (INST16B VPR128:$Rn, VPR128:$Rm)>;
384   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
385             (INST16B VPR128:$Rn, VPR128:$Rm)>;
386 }
387
388 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
389 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
390 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
391 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
392 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
393 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
394
395 //   Vector Bitwise Select
396 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
397                                               0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
398
399 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
400                                               0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
401
402 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
403                                    Instruction INST8B,
404                                    Instruction INST16B> {
405   // Disassociate type from instruction definition
406   def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
407             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
408   def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
409             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
410   def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
411             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
412   def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
413             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
414   def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
415             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
416   def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
417             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
418
419   // Allow to match BSL instruction pattern with non-constant operand
420   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
421                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
422           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
423   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
424                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
425           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
426   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
427                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
428           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
429   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
430                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
431           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
432   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
433                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
434           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
435   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
436                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
437           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
438   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
439                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
440           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
441   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
442                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
443           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
444
445   // Allow to match llvm.arm.* intrinsics.
446   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
447                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
448             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
449   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
450                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
451             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
452   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
453                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
454             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
455   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
456                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
457             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
458   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
459                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
460             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
461   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
462                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
463             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
464   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
465                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
466             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
467   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
468                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
469             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
471                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
472             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
473   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
474                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
475             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
477                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
478             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
479 }
480
481 // Additional patterns for bitwise instruction BSL
482 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
483
484 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
485                            (Neon_bsl node:$src, node:$Rn, node:$Rm),
486                            [{ (void)N; return false; }]>;
487
488 // Vector Bitwise Insert if True
489
490 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
491                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
492 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
493                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
494
495 // Vector Bitwise Insert if False
496
497 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
498                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
499 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
500                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
501
502 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
503
504 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
505                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
506 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
507                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
508
509 // Vector Absolute Difference and Accumulate (Unsigned)
510 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
511                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
512 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
513                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
514 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
515                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
516 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
517                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
518 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
519                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
520 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
521                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
522
523 // Vector Absolute Difference and Accumulate (Signed)
524 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
525                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
526 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
527                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
528 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
529                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
530 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
531                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
532 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
533                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
534 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
535                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
536
537
538 // Vector Absolute Difference (Signed, Unsigned)
539 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
540 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
541
542 // Vector Absolute Difference (Floating Point)
543 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
544                                     int_arm_neon_vabds, int_arm_neon_vabds,
545                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
546
547 // Vector Reciprocal Step (Floating Point)
548 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
549                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
550                                        int_arm_neon_vrecps,
551                                        v2f32, v4f32, v2f64, 0>;
552
553 // Vector Reciprocal Square Root Step (Floating Point)
554 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
555                                         int_arm_neon_vrsqrts,
556                                         int_arm_neon_vrsqrts,
557                                         int_arm_neon_vrsqrts,
558                                         v2f32, v4f32, v2f64, 0>;
559
560 // Vector Comparisons
561
562 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
563                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
564 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
565                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
566 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
567                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
568 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
569                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
570 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
571                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
572
573 // NeonI_compare_aliases class: swaps register operands to implement
574 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
575 class NeonI_compare_aliases<string asmop, string asmlane,
576                             Instruction inst, RegisterOperand VPRC>
577   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
578                     ", $Rm" # asmlane,
579                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
580
581 // Vector Comparisons (Integer)
582
583 // Vector Compare Mask Equal (Integer)
584 let isCommutable =1 in {
585 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
586 }
587
588 // Vector Compare Mask Higher or Same (Unsigned Integer)
589 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
590
591 // Vector Compare Mask Greater Than or Equal (Integer)
592 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
593
594 // Vector Compare Mask Higher (Unsigned Integer)
595 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
596
597 // Vector Compare Mask Greater Than (Integer)
598 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
599
600 // Vector Compare Mask Bitwise Test (Integer)
601 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
602
603 // Vector Compare Mask Less or Same (Unsigned Integer)
604 // CMLS is alias for CMHS with operands reversed.
605 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
606 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
607 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
608 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
609 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
610 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
611 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
612
613 // Vector Compare Mask Less Than or Equal (Integer)
614 // CMLE is alias for CMGE with operands reversed.
615 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
616 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
617 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
618 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
619 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
620 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
621 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
622
623 // Vector Compare Mask Lower (Unsigned Integer)
624 // CMLO is alias for CMHI with operands reversed.
625 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
626 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
627 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
628 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
629 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
630 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
631 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
632
633 // Vector Compare Mask Less Than (Integer)
634 // CMLT is alias for CMGT with operands reversed.
635 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
636 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
637 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
638 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
639 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
640 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
641 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
642
643
644 def neon_uimm0_asmoperand : AsmOperandClass
645 {
646   let Name = "UImm0";
647   let PredicateMethod = "isUImm<0>";
648   let RenderMethod = "addImmOperands";
649 }
650
651 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
652   let ParserMatchClass = neon_uimm0_asmoperand;
653   let PrintMethod = "printNeonUImm0Operand";
654
655 }
656
657 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
658 {
659   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
660              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
661              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
662              [(set (v8i8 VPR64:$Rd),
663                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
664              NoItinerary>;
665
666   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
667              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
668              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
669              [(set (v16i8 VPR128:$Rd),
670                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
671              NoItinerary>;
672
673   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
674             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
675             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
676             [(set (v4i16 VPR64:$Rd),
677                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
678             NoItinerary>;
679
680   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
681             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
682             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
683             [(set (v8i16 VPR128:$Rd),
684                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
685             NoItinerary>;
686
687   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
688             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
689             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
690             [(set (v2i32 VPR64:$Rd),
691                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
692             NoItinerary>;
693
694   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
695             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
696             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
697             [(set (v4i32 VPR128:$Rd),
698                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
699             NoItinerary>;
700
701   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
702             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
703             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
704             [(set (v2i64 VPR128:$Rd),
705                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
706             NoItinerary>;
707 }
708
709 // Vector Compare Mask Equal to Zero (Integer)
710 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
711
712 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
713 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
714
715 // Vector Compare Mask Greater Than Zero (Signed Integer)
716 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
717
718 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
719 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
720
721 // Vector Compare Mask Less Than Zero (Signed Integer)
722 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
723
724 // Vector Comparisons (Floating Point)
725
726 // Vector Compare Mask Equal (Floating Point)
727 let isCommutable =1 in {
728 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
729                                       Neon_cmeq, Neon_cmeq,
730                                       v2i32, v4i32, v2i64, 0>;
731 }
732
733 // Vector Compare Mask Greater Than Or Equal (Floating Point)
734 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
735                                       Neon_cmge, Neon_cmge,
736                                       v2i32, v4i32, v2i64, 0>;
737
738 // Vector Compare Mask Greater Than (Floating Point)
739 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
740                                       Neon_cmgt, Neon_cmgt,
741                                       v2i32, v4i32, v2i64, 0>;
742
743 // Vector Compare Mask Less Than Or Equal (Floating Point)
744 // FCMLE is alias for FCMGE with operands reversed.
745 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
746 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
747 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
748
749 // Vector Compare Mask Less Than (Floating Point)
750 // FCMLT is alias for FCMGT with operands reversed.
751 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
752 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
753 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
754
755
756 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
757                               string asmop, CondCode CC>
758 {
759   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
760             (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
761             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
762             [(set (v2i32 VPR64:$Rd),
763                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
764             NoItinerary>;
765
766   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
767             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
768             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
769             [(set (v4i32 VPR128:$Rd),
770                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
771             NoItinerary>;
772
773   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
774             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
775             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
776             [(set (v2i64 VPR128:$Rd),
777                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
778             NoItinerary>;
779 }
780
781 // Vector Compare Mask Equal to Zero (Floating Point)
782 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
783
784 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
785 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
786
787 // Vector Compare Mask Greater Than Zero (Floating Point)
788 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
789
790 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
791 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
792
793 // Vector Compare Mask Less Than Zero (Floating Point)
794 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
795
796 // Vector Absolute Comparisons (Floating Point)
797
798 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
799 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
800                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
801                                       int_aarch64_neon_vacgeq,
802                                       v2i32, v4i32, v2i64, 0>;
803
804 // Vector Absolute Compare Mask Greater Than (Floating Point)
805 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
806                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
807                                       int_aarch64_neon_vacgtq,
808                                       v2i32, v4i32, v2i64, 0>;
809
810 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
811 // FACLE is alias for FACGE with operands reversed.
812 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
813 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
814 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
815
816 // Vector Absolute Compare Mask Less Than (Floating Point)
817 // FACLT is alias for FACGT with operands reversed.
818 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
819 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
820 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
821
822 // Vector halving add (Integer Signed, Unsigned)
823 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
824                                         int_arm_neon_vhadds, 1>;
825 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
826                                         int_arm_neon_vhaddu, 1>;
827
828 // Vector halving sub (Integer Signed, Unsigned)
829 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
830                                         int_arm_neon_vhsubs, 0>;
831 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
832                                         int_arm_neon_vhsubu, 0>;
833
834 // Vector rouding halving add (Integer Signed, Unsigned)
835 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
836                                          int_arm_neon_vrhadds, 1>;
837 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
838                                          int_arm_neon_vrhaddu, 1>;
839
840 // Vector Saturating add (Integer Signed, Unsigned)
841 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
842                    int_arm_neon_vqadds, 1>;
843 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
844                    int_arm_neon_vqaddu, 1>;
845
846 // Vector Saturating sub (Integer Signed, Unsigned)
847 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
848                    int_arm_neon_vqsubs, 1>;
849 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
850                    int_arm_neon_vqsubu, 1>;
851
852 // Vector Shift Left (Signed and Unsigned Integer)
853 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
854                  int_arm_neon_vshifts, 1>;
855 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
856                  int_arm_neon_vshiftu, 1>;
857
858 // Vector Saturating Shift Left (Signed and Unsigned Integer)
859 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
860                   int_arm_neon_vqshifts, 1>;
861 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
862                   int_arm_neon_vqshiftu, 1>;
863
864 // Vector Rouding Shift Left (Signed and Unsigned Integer)
865 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
866                   int_arm_neon_vrshifts, 1>;
867 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
868                   int_arm_neon_vrshiftu, 1>;
869
870 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
871 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
872                    int_arm_neon_vqrshifts, 1>;
873 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
874                    int_arm_neon_vqrshiftu, 1>;
875
876 // Vector Maximum (Signed and Unsigned Integer)
877 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
878 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
879
880 // Vector Minimum (Signed and Unsigned Integer)
881 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
882 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
883
884 // Vector Maximum (Floating Point)
885 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
886                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
887                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
888
889 // Vector Minimum (Floating Point)
890 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
891                                      int_arm_neon_vmins, int_arm_neon_vmins,
892                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
893
894 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
895 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
896                                        int_aarch64_neon_vmaxnm,
897                                        int_aarch64_neon_vmaxnm,
898                                        int_aarch64_neon_vmaxnm,
899                                        v2f32, v4f32, v2f64, 1>;
900
901 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
902 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
903                                        int_aarch64_neon_vminnm,
904                                        int_aarch64_neon_vminnm,
905                                        int_aarch64_neon_vminnm,
906                                        v2f32, v4f32, v2f64, 1>;
907
908 // Vector Maximum Pairwise (Signed and Unsigned Integer)
909 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
910 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
911
912 // Vector Minimum Pairwise (Signed and Unsigned Integer)
913 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
914 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
915
916 // Vector Maximum Pairwise (Floating Point)
917 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
918                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
919                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
920
921 // Vector Minimum Pairwise (Floating Point)
922 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
923                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
924                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
925
926 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
927 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
928                                        int_aarch64_neon_vpmaxnm,
929                                        int_aarch64_neon_vpmaxnm,
930                                        int_aarch64_neon_vpmaxnm,
931                                        v2f32, v4f32, v2f64, 1>;
932
933 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
934 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
935                                        int_aarch64_neon_vpminnm,
936                                        int_aarch64_neon_vpminnm,
937                                        int_aarch64_neon_vpminnm,
938                                        v2f32, v4f32, v2f64, 1>;
939
940 // Vector Addition Pairwise (Integer)
941 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
942
943 // Vector Addition Pairwise (Floating Point)
944 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
945                                        int_arm_neon_vpadd,
946                                        int_arm_neon_vpadd,
947                                        int_arm_neon_vpadd,
948                                        v2f32, v4f32, v2f64, 1>;
949
950 // Vector Saturating Doubling Multiply High
951 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
952                     int_arm_neon_vqdmulh, 1>;
953
954 // Vector Saturating Rouding Doubling Multiply High
955 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
956                      int_arm_neon_vqrdmulh, 1>;
957
958 // Vector Multiply Extended (Floating Point)
959 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
960                                       int_aarch64_neon_vmulx,
961                                       int_aarch64_neon_vmulx,
962                                       int_aarch64_neon_vmulx,
963                                       v2f32, v4f32, v2f64, 1>;
964
965 // Vector Immediate Instructions
966
967 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
968 {
969   def _asmoperand : AsmOperandClass
970     {
971       let Name = "NeonMovImmShift" # PREFIX;
972       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
973       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
974     }
975 }
976
977 // Definition of vector immediates shift operands
978
979 // The selectable use-cases extract the shift operation
980 // information from the OpCmode fields encoded in the immediate.
981 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
982   uint64_t OpCmode = N->getZExtValue();
983   unsigned ShiftImm;
984   unsigned ShiftOnesIn;
985   unsigned HasShift =
986     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
987   if (!HasShift) return SDValue();
988   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
989 }]>;
990
991 // Vector immediates shift operands which accept LSL and MSL
992 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
993 // or 0, 8 (LSLH) or 8, 16 (MSL).
994 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
995 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
996 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
997 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
998
999 multiclass neon_mov_imm_shift_operands<string PREFIX,
1000                                        string HALF, string ISHALF, code pred>
1001 {
1002    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1003     {
1004       let PrintMethod =
1005         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1006       let DecoderMethod =
1007         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1008       let ParserMatchClass =
1009         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1010     }
1011 }
1012
1013 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1014   unsigned ShiftImm;
1015   unsigned ShiftOnesIn;
1016   unsigned HasShift =
1017     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1018   return (HasShift && !ShiftOnesIn);
1019 }]>;
1020
1021 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1022   unsigned ShiftImm;
1023   unsigned ShiftOnesIn;
1024   unsigned HasShift =
1025     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1026   return (HasShift && ShiftOnesIn);
1027 }]>;
1028
1029 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1030   unsigned ShiftImm;
1031   unsigned ShiftOnesIn;
1032   unsigned HasShift =
1033     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1034   return (HasShift && !ShiftOnesIn);
1035 }]>;
1036
1037 def neon_uimm1_asmoperand : AsmOperandClass
1038 {
1039   let Name = "UImm1";
1040   let PredicateMethod = "isUImm<1>";
1041   let RenderMethod = "addImmOperands";
1042 }
1043
1044 def neon_uimm2_asmoperand : AsmOperandClass
1045 {
1046   let Name = "UImm2";
1047   let PredicateMethod = "isUImm<2>";
1048   let RenderMethod = "addImmOperands";
1049 }
1050
1051 def neon_uimm8_asmoperand : AsmOperandClass
1052 {
1053   let Name = "UImm8";
1054   let PredicateMethod = "isUImm<8>";
1055   let RenderMethod = "addImmOperands";
1056 }
1057
1058 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1059   let ParserMatchClass = neon_uimm8_asmoperand;
1060   let PrintMethod = "printNeonUImm8Operand";
1061 }
1062
1063 def neon_uimm64_mask_asmoperand : AsmOperandClass
1064 {
1065   let Name = "NeonUImm64Mask";
1066   let PredicateMethod = "isNeonUImm64Mask";
1067   let RenderMethod = "addNeonUImm64MaskOperands";
1068 }
1069
1070 // MCOperand for 64-bit bytemask with each byte having only the
1071 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1072 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1073   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1074   let PrintMethod = "printNeonUImm64MaskOperand";
1075 }
1076
1077 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1078                                    SDPatternOperator opnode>
1079 {
1080     // shift zeros, per word
1081     def _2S  : NeonI_1VModImm<0b0, op,
1082                               (outs VPR64:$Rd),
1083                               (ins neon_uimm8:$Imm,
1084                                 neon_mov_imm_LSL_operand:$Simm),
1085                               !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1086                               [(set (v2i32 VPR64:$Rd),
1087                                  (v2i32 (opnode (timm:$Imm),
1088                                    (neon_mov_imm_LSL_operand:$Simm))))],
1089                               NoItinerary> {
1090        bits<2> Simm;
1091        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1092      }
1093
1094     def _4S  : NeonI_1VModImm<0b1, op,
1095                               (outs VPR128:$Rd),
1096                               (ins neon_uimm8:$Imm,
1097                                 neon_mov_imm_LSL_operand:$Simm),
1098                               !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1099                               [(set (v4i32 VPR128:$Rd),
1100                                  (v4i32 (opnode (timm:$Imm),
1101                                    (neon_mov_imm_LSL_operand:$Simm))))],
1102                               NoItinerary> {
1103       bits<2> Simm;
1104       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1105     }
1106
1107     // shift zeros, per halfword
1108     def _4H  : NeonI_1VModImm<0b0, op,
1109                               (outs VPR64:$Rd),
1110                               (ins neon_uimm8:$Imm,
1111                                 neon_mov_imm_LSLH_operand:$Simm),
1112                               !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1113                               [(set (v4i16 VPR64:$Rd),
1114                                  (v4i16 (opnode (timm:$Imm),
1115                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1116                               NoItinerary> {
1117       bit  Simm;
1118       let cmode = {0b1, 0b0, Simm, 0b0};
1119     }
1120
1121     def _8H  : NeonI_1VModImm<0b1, op,
1122                               (outs VPR128:$Rd),
1123                               (ins neon_uimm8:$Imm,
1124                                 neon_mov_imm_LSLH_operand:$Simm),
1125                               !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1126                               [(set (v8i16 VPR128:$Rd),
1127                                  (v8i16 (opnode (timm:$Imm),
1128                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1129                               NoItinerary> {
1130       bit Simm;
1131       let cmode = {0b1, 0b0, Simm, 0b0};
1132      }
1133 }
1134
1135 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1136                                                    SDPatternOperator opnode,
1137                                                    SDPatternOperator neonopnode>
1138 {
1139   let Constraints = "$src = $Rd" in {
1140     // shift zeros, per word
1141     def _2S  : NeonI_1VModImm<0b0, op,
1142                  (outs VPR64:$Rd),
1143                  (ins VPR64:$src, neon_uimm8:$Imm,
1144                    neon_mov_imm_LSL_operand:$Simm),
1145                  !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1146                  [(set (v2i32 VPR64:$Rd),
1147                     (v2i32 (opnode (v2i32 VPR64:$src),
1148                       (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1149                         neon_mov_imm_LSL_operand:$Simm)))))))],
1150                  NoItinerary> {
1151       bits<2> Simm;
1152       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1153     }
1154
1155     def _4S  : NeonI_1VModImm<0b1, op,
1156                  (outs VPR128:$Rd),
1157                  (ins VPR128:$src, neon_uimm8:$Imm,
1158                    neon_mov_imm_LSL_operand:$Simm),
1159                  !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1160                  [(set (v4i32 VPR128:$Rd),
1161                     (v4i32 (opnode (v4i32 VPR128:$src),
1162                       (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1163                         neon_mov_imm_LSL_operand:$Simm)))))))],
1164                  NoItinerary> {
1165       bits<2> Simm;
1166       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1167     }
1168
1169     // shift zeros, per halfword
1170     def _4H  : NeonI_1VModImm<0b0, op,
1171                  (outs VPR64:$Rd),
1172                  (ins VPR64:$src, neon_uimm8:$Imm,
1173                    neon_mov_imm_LSLH_operand:$Simm),
1174                  !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1175                  [(set (v4i16 VPR64:$Rd),
1176                     (v4i16 (opnode (v4i16 VPR64:$src),
1177                        (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1178                           neon_mov_imm_LSL_operand:$Simm)))))))],
1179                  NoItinerary> {
1180       bit  Simm;
1181       let cmode = {0b1, 0b0, Simm, 0b1};
1182     }
1183
1184     def _8H  : NeonI_1VModImm<0b1, op,
1185                  (outs VPR128:$Rd),
1186                  (ins VPR128:$src, neon_uimm8:$Imm,
1187                    neon_mov_imm_LSLH_operand:$Simm),
1188                  !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1189                  [(set (v8i16 VPR128:$Rd),
1190                     (v8i16 (opnode (v8i16 VPR128:$src),
1191                       (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1192                         neon_mov_imm_LSL_operand:$Simm)))))))],
1193                  NoItinerary> {
1194       bit Simm;
1195       let cmode = {0b1, 0b0, Simm, 0b1};
1196     }
1197   }
1198 }
1199
1200 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1201                                    SDPatternOperator opnode>
1202 {
1203     // shift ones, per word
1204     def _2S  : NeonI_1VModImm<0b0, op,
1205                              (outs VPR64:$Rd),
1206                              (ins neon_uimm8:$Imm,
1207                                neon_mov_imm_MSL_operand:$Simm),
1208                              !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1209                               [(set (v2i32 VPR64:$Rd),
1210                                  (v2i32 (opnode (timm:$Imm),
1211                                    (neon_mov_imm_MSL_operand:$Simm))))],
1212                              NoItinerary> {
1213        bit Simm;
1214        let cmode = {0b1, 0b1, 0b0, Simm};
1215      }
1216
1217    def _4S  : NeonI_1VModImm<0b1, op,
1218                               (outs VPR128:$Rd),
1219                               (ins neon_uimm8:$Imm,
1220                                 neon_mov_imm_MSL_operand:$Simm),
1221                               !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1222                               [(set (v4i32 VPR128:$Rd),
1223                                  (v4i32 (opnode (timm:$Imm),
1224                                    (neon_mov_imm_MSL_operand:$Simm))))],
1225                               NoItinerary> {
1226      bit Simm;
1227      let cmode = {0b1, 0b1, 0b0, Simm};
1228    }
1229 }
1230
1231 // Vector Move Immediate Shifted
1232 let isReMaterializable = 1 in {
1233 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1234 }
1235
1236 // Vector Move Inverted Immediate Shifted
1237 let isReMaterializable = 1 in {
1238 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1239 }
1240
1241 // Vector Bitwise Bit Clear (AND NOT) - immediate
1242 let isReMaterializable = 1 in {
1243 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1244                                                          and, Neon_mvni>;
1245 }
1246
1247 // Vector Bitwise OR - immedidate
1248
1249 let isReMaterializable = 1 in {
1250 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1251                                                            or, Neon_movi>;
1252 }
1253
1254 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1255 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1256 // BIC immediate instructions selection requires additional patterns to
1257 // transform Neon_movi operands into BIC immediate operands
1258
1259 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1260   uint64_t OpCmode = N->getZExtValue();
1261   unsigned ShiftImm;
1262   unsigned ShiftOnesIn;
1263   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1264   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1265   // Transform encoded shift amount 0 to 1 and 1 to 0.
1266   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1267 }]>;
1268
1269 def neon_mov_imm_LSLH_transform_operand
1270   : ImmLeaf<i32, [{
1271     unsigned ShiftImm;
1272     unsigned ShiftOnesIn;
1273     unsigned HasShift =
1274       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1275     return (HasShift && !ShiftOnesIn); }],
1276   neon_mov_imm_LSLH_transform_XFORM>;
1277
1278 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1279 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1280 def : Pat<(v4i16 (and VPR64:$src,
1281             (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1282           (BICvi_lsl_4H VPR64:$src, 0,
1283             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1284
1285 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1286 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1287 def : Pat<(v8i16 (and VPR128:$src,
1288             (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1289           (BICvi_lsl_8H VPR128:$src, 0,
1290             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1291
1292
1293 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1294                                    SDPatternOperator neonopnode,
1295                                    Instruction INST4H,
1296                                    Instruction INST8H> {
1297   def : Pat<(v8i8 (opnode VPR64:$src,
1298                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1299                       neon_mov_imm_LSLH_operand:$Simm))))),
1300             (INST4H VPR64:$src, neon_uimm8:$Imm,
1301               neon_mov_imm_LSLH_operand:$Simm)>;
1302   def : Pat<(v1i64 (opnode VPR64:$src,
1303                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1304                     neon_mov_imm_LSLH_operand:$Simm))))),
1305           (INST4H VPR64:$src, neon_uimm8:$Imm,
1306             neon_mov_imm_LSLH_operand:$Simm)>;
1307
1308   def : Pat<(v16i8 (opnode VPR128:$src,
1309                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1310                      neon_mov_imm_LSLH_operand:$Simm))))),
1311           (INST8H VPR128:$src, neon_uimm8:$Imm,
1312             neon_mov_imm_LSLH_operand:$Simm)>;
1313   def : Pat<(v4i32 (opnode VPR128:$src,
1314                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1315                      neon_mov_imm_LSLH_operand:$Simm))))),
1316           (INST8H VPR128:$src, neon_uimm8:$Imm,
1317             neon_mov_imm_LSLH_operand:$Simm)>;
1318   def : Pat<(v2i64 (opnode VPR128:$src,
1319                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1320                      neon_mov_imm_LSLH_operand:$Simm))))),
1321           (INST8H VPR128:$src, neon_uimm8:$Imm,
1322             neon_mov_imm_LSLH_operand:$Simm)>;
1323 }
1324
1325 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1326 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1327
1328 // Additional patterns for Vector Bitwise OR - immedidate
1329 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1330
1331
1332 // Vector Move Immediate Masked
1333 let isReMaterializable = 1 in {
1334 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1335 }
1336
1337 // Vector Move Inverted Immediate Masked
1338 let isReMaterializable = 1 in {
1339 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1340 }
1341
1342 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1343                                 Instruction inst, RegisterOperand VPRC>
1344   : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1345                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1346
1347 // Aliases for Vector Move Immediate Shifted
1348 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1349 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1350 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1351 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1352
1353 // Aliases for Vector Move Inverted Immediate Shifted
1354 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1355 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1356 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1357 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1358
1359 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1360 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1361 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1362 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1363 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1364
1365 // Aliases for Vector Bitwise OR - immedidate
1366 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1367 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1368 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1369 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1370
1371 //  Vector Move Immediate - per byte
1372 let isReMaterializable = 1 in {
1373 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1374                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1375                                "movi\t$Rd.8b, $Imm",
1376                                [(set (v8i8 VPR64:$Rd),
1377                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1378                                 NoItinerary> {
1379   let cmode = 0b1110;
1380 }
1381
1382 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1383                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1384                                 "movi\t$Rd.16b, $Imm",
1385                                 [(set (v16i8 VPR128:$Rd),
1386                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1387                                  NoItinerary> {
1388   let cmode = 0b1110;
1389 }
1390 }
1391
1392 // Vector Move Immediate - bytemask, per double word
1393 let isReMaterializable = 1 in {
1394 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1395                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1396                                "movi\t $Rd.2d, $Imm",
1397                                [(set (v2i64 VPR128:$Rd),
1398                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1399                                NoItinerary> {
1400   let cmode = 0b1110;
1401 }
1402 }
1403
1404 // Vector Move Immediate - bytemask, one doubleword
1405
1406 let isReMaterializable = 1 in {
1407 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1408                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1409                            "movi\t $Rd, $Imm",
1410                            [(set (f64 FPR64:$Rd),
1411                               (f64 (bitconvert
1412                                 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1413                            NoItinerary> {
1414   let cmode = 0b1110;
1415 }
1416 }
1417
1418 // Vector Floating Point Move Immediate
1419
1420 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1421                       Operand immOpType, bit q, bit op>
1422   : NeonI_1VModImm<q, op,
1423                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1424                    "fmov\t$Rd" # asmlane # ", $Imm",
1425                    [(set (OpTy VPRC:$Rd),
1426                       (OpTy (Neon_fmovi (timm:$Imm))))],
1427                    NoItinerary> {
1428      let cmode = 0b1111;
1429    }
1430
1431 let isReMaterializable = 1 in {
1432 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1433 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1434 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1435 }
1436
1437 // Vector Shift (Immediate) 
1438 // Immediate in [0, 63]
1439 def imm0_63 : Operand<i32> {
1440   let ParserMatchClass = uimm6_asmoperand;
1441 }
1442
1443 // Shift Right Immediate - A shift right immediate is encoded differently from
1444 // other shift immediates. The immh:immb field is encoded like so:
1445 //
1446 //    Offset    Encoding
1447 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1448 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1449 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1450 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1451 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1452   let Name = "ShrImm" # OFFSET;
1453   let RenderMethod = "addImmOperands";
1454   let DiagnosticType = "ShrImm" # OFFSET;
1455 }
1456
1457 class shr_imm<string OFFSET> : Operand<i32> {
1458   let EncoderMethod = "getShiftRightImm" # OFFSET;
1459   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1460   let ParserMatchClass = 
1461     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1462 }
1463
1464 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1465 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1466 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1467 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1468
1469 def shr_imm8 : shr_imm<"8">;
1470 def shr_imm16 : shr_imm<"16">;
1471 def shr_imm32 : shr_imm<"32">;
1472 def shr_imm64 : shr_imm<"64">;
1473
1474 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1475                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1476   : NeonI_2VShiftImm<q, u, opcode,
1477                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1478                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1479                      [(set (Ty VPRC:$Rd),
1480                         (Ty (OpNode (Ty VPRC:$Rn),
1481                           (Ty (Neon_dupImm (i32 imm:$Imm))))))],
1482                      NoItinerary>;
1483
1484 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1485   // 64-bit vector types.
1486   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1487     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1488   }
1489
1490   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1491     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1492   }
1493
1494   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1495     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1496   }
1497
1498   // 128-bit vector types.
1499   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1500     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1501   }
1502
1503   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1504     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1505   }
1506
1507   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1508     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1509   }
1510
1511   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1512     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1513   }
1514 }
1515
1516 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1517   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1518                      OpNode> {
1519     let Inst{22-19} = 0b0001;
1520   }
1521
1522   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1523                      OpNode> {
1524     let Inst{22-20} = 0b001;
1525   }
1526
1527   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1528                      OpNode> {
1529      let Inst{22-21} = 0b01;
1530   }
1531
1532   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1533                       OpNode> {
1534                       let Inst{22-19} = 0b0001;
1535                     }
1536
1537   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1538                      OpNode> {
1539                      let Inst{22-20} = 0b001;
1540                     }
1541
1542   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1543                      OpNode> {
1544                       let Inst{22-21} = 0b01;
1545                     }
1546
1547   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1548                      OpNode> {
1549                       let Inst{22} = 0b1;
1550                     }
1551 }
1552
1553 // Shift left
1554 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1555
1556 // Shift right
1557 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1558 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1559
1560 def Neon_top16B : PatFrag<(ops node:$in),
1561                           (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1562 def Neon_top8H : PatFrag<(ops node:$in),
1563                          (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1564 def Neon_top4S : PatFrag<(ops node:$in),
1565                          (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1566
1567 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1568                    string SrcT, ValueType DestTy, ValueType SrcTy,
1569                    Operand ImmTy, SDPatternOperator ExtOp>
1570   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1571                      (ins VPR64:$Rn, ImmTy:$Imm),
1572                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1573                      [(set (DestTy VPR128:$Rd),
1574                         (DestTy (shl
1575                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1576                             (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1577                      NoItinerary>;
1578
1579 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1580                        string SrcT, ValueType DestTy, ValueType SrcTy,
1581                        int StartIndex, Operand ImmTy,
1582                        SDPatternOperator ExtOp, PatFrag getTop>
1583   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1584                      (ins VPR128:$Rn, ImmTy:$Imm),
1585                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1586                      [(set (DestTy VPR128:$Rd),
1587                         (DestTy (shl
1588                           (DestTy (ExtOp
1589                             (SrcTy (getTop VPR128:$Rn)))),
1590                               (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1591                      NoItinerary>;
1592
1593 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1594                          SDNode ExtOp> {
1595   // 64-bit vector types.
1596   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1597                          uimm3, ExtOp> {
1598     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1599   }
1600
1601   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1602                          uimm4, ExtOp> {
1603     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1604   }
1605
1606   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1607                          uimm5, ExtOp> {
1608     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1609   }
1610
1611   // 128-bit vector types
1612   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1613                               v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
1614     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1615   }
1616
1617   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1618                              v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
1619     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1620   }
1621
1622   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1623                              v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
1624     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1625   }
1626
1627   // Use other patterns to match when the immediate is 0.
1628   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1629             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1630
1631   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1632             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1633
1634   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1635             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1636
1637   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))),
1638             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1639
1640   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))),
1641             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1642
1643   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))),
1644             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1645 }
1646
1647 // Shift left long
1648 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1649 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1650
1651 // Rounding/Saturating shift
1652 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1653                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1654                   SDPatternOperator OpNode>
1655   : NeonI_2VShiftImm<q, u, opcode,
1656                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1657                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1658                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1659                         (i32 imm:$Imm))))],
1660                      NoItinerary>;
1661
1662 // shift right (vector by immediate)
1663 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1664                            SDPatternOperator OpNode> {
1665   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1666                          OpNode> {
1667     let Inst{22-19} = 0b0001;
1668   }
1669
1670   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1671                          OpNode> {
1672     let Inst{22-20} = 0b001;
1673   }
1674
1675   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1676                          OpNode> {
1677     let Inst{22-21} = 0b01;
1678   }
1679
1680   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1681                          OpNode> {
1682     let Inst{22-19} = 0b0001;
1683   }
1684
1685   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1686                         OpNode> {
1687     let Inst{22-20} = 0b001;
1688   }
1689
1690   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1691                         OpNode> {
1692     let Inst{22-21} = 0b01;
1693   }
1694
1695   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1696                         OpNode> {
1697     let Inst{22} = 0b1;
1698   }
1699 }
1700
1701 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1702                           SDPatternOperator OpNode> {
1703   // 64-bit vector types.
1704   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1705                         OpNode> {
1706     let Inst{22-19} = 0b0001;
1707   }
1708
1709   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1710                         OpNode> {
1711     let Inst{22-20} = 0b001;
1712   }
1713
1714   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1715                         OpNode> {
1716     let Inst{22-21} = 0b01;
1717   }
1718
1719   // 128-bit vector types.
1720   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1721                          OpNode> {
1722     let Inst{22-19} = 0b0001;
1723   }
1724
1725   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1726                         OpNode> {
1727     let Inst{22-20} = 0b001;
1728   }
1729
1730   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1731                         OpNode> {
1732     let Inst{22-21} = 0b01;
1733   }
1734
1735   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1736                         OpNode> {
1737     let Inst{22} = 0b1;
1738   }
1739 }
1740
1741 // Rounding shift right
1742 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1743                                 int_aarch64_neon_vsrshr>;
1744 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1745                                 int_aarch64_neon_vurshr>;
1746
1747 // Saturating shift left unsigned
1748 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1749
1750 // Saturating shift left
1751 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1752 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1753
1754 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1755                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1756                   SDNode OpNode>
1757   : NeonI_2VShiftImm<q, u, opcode,
1758            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1759            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1760            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1761               (Ty (OpNode (Ty VPRC:$Rn),
1762                 (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
1763            NoItinerary> {
1764   let Constraints = "$src = $Rd";
1765 }
1766
1767 // Shift Right accumulate
1768 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1769   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1770                         OpNode> {
1771     let Inst{22-19} = 0b0001;
1772   }
1773
1774   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1775                         OpNode> {
1776     let Inst{22-20} = 0b001;
1777   }
1778
1779   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1780                         OpNode> {
1781     let Inst{22-21} = 0b01;
1782   }
1783
1784   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1785                          OpNode> {
1786     let Inst{22-19} = 0b0001;
1787   }
1788
1789   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1790                         OpNode> {
1791     let Inst{22-20} = 0b001;
1792   }
1793
1794   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1795                         OpNode> {
1796     let Inst{22-21} = 0b01;
1797   }
1798
1799   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1800                         OpNode> {
1801     let Inst{22} = 0b1;
1802   }
1803 }
1804
1805 // Shift right and accumulate
1806 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1807 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1808
1809 // Rounding shift accumulate
1810 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1811                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1812                     SDPatternOperator OpNode>
1813   : NeonI_2VShiftImm<q, u, opcode,
1814                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1815                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1816                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1817                         (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1818                      NoItinerary> {
1819   let Constraints = "$src = $Rd";
1820 }
1821
1822 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1823                              SDPatternOperator OpNode> {
1824   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1825                           OpNode> {
1826     let Inst{22-19} = 0b0001;
1827   }
1828
1829   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1830                           OpNode> {
1831     let Inst{22-20} = 0b001;
1832   }
1833
1834   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1835                           OpNode> {
1836     let Inst{22-21} = 0b01;
1837   }
1838
1839   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1840                            OpNode> {
1841     let Inst{22-19} = 0b0001;
1842   }
1843
1844   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1845                           OpNode> {
1846     let Inst{22-20} = 0b001;
1847   }
1848
1849   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1850                           OpNode> {
1851     let Inst{22-21} = 0b01;
1852   }
1853
1854   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1855                           OpNode> {
1856     let Inst{22} = 0b1;
1857   }
1858 }
1859
1860 // Rounding shift right and accumulate
1861 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1862 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1863
1864 // Shift insert by immediate
1865 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1866                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1867                   SDPatternOperator OpNode>
1868     : NeonI_2VShiftImm<q, u, opcode,
1869            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1870            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1871            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1872              (i32 imm:$Imm))))],
1873            NoItinerary> {
1874   let Constraints = "$src = $Rd";
1875 }
1876
1877 // shift left insert (vector by immediate)
1878 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1879   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1880                         int_aarch64_neon_vsli> {
1881     let Inst{22-19} = 0b0001;
1882   }
1883
1884   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1885                         int_aarch64_neon_vsli> {
1886     let Inst{22-20} = 0b001;
1887   }
1888
1889   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1890                         int_aarch64_neon_vsli> {
1891     let Inst{22-21} = 0b01;
1892   }
1893
1894     // 128-bit vector types
1895   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1896                          int_aarch64_neon_vsli> {
1897     let Inst{22-19} = 0b0001;
1898   }
1899
1900   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1901                         int_aarch64_neon_vsli> {
1902     let Inst{22-20} = 0b001;
1903   }
1904
1905   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1906                         int_aarch64_neon_vsli> {
1907     let Inst{22-21} = 0b01;
1908   }
1909
1910   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1911                         int_aarch64_neon_vsli> {
1912     let Inst{22} = 0b1;
1913   }
1914 }
1915
1916 // shift right insert (vector by immediate)
1917 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1918     // 64-bit vector types.
1919   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1920                         int_aarch64_neon_vsri> {
1921     let Inst{22-19} = 0b0001;
1922   }
1923
1924   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1925                         int_aarch64_neon_vsri> {
1926     let Inst{22-20} = 0b001;
1927   }
1928
1929   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1930                         int_aarch64_neon_vsri> {
1931     let Inst{22-21} = 0b01;
1932   }
1933
1934     // 128-bit vector types
1935   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1936                          int_aarch64_neon_vsri> {
1937     let Inst{22-19} = 0b0001;
1938   }
1939
1940   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1941                         int_aarch64_neon_vsri> {
1942     let Inst{22-20} = 0b001;
1943   }
1944
1945   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1946                         int_aarch64_neon_vsri> {
1947     let Inst{22-21} = 0b01;
1948   }
1949
1950   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1951                         int_aarch64_neon_vsri> {
1952     let Inst{22} = 0b1;
1953   }
1954 }
1955
1956 // Shift left and insert
1957 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1958
1959 // Shift right and insert
1960 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1961
1962 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1963                     string SrcT, Operand ImmTy>
1964   : NeonI_2VShiftImm<q, u, opcode,
1965                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1966                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1967                      [], NoItinerary>;
1968
1969 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1970                        string SrcT, Operand ImmTy>
1971   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1972                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
1973                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1974                      [], NoItinerary> {
1975   let Constraints = "$src = $Rd";
1976 }
1977
1978 // left long shift by immediate
1979 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
1980   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
1981     let Inst{22-19} = 0b0001;
1982   }
1983
1984   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
1985     let Inst{22-20} = 0b001;
1986   }
1987
1988   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
1989     let Inst{22-21} = 0b01;
1990   }
1991
1992   // Shift Narrow High
1993   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
1994                               shr_imm8> {
1995     let Inst{22-19} = 0b0001;
1996   }
1997
1998   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
1999                              shr_imm16> {
2000     let Inst{22-20} = 0b001;
2001   }
2002
2003   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2004                              shr_imm32> {
2005     let Inst{22-21} = 0b01;
2006   }
2007 }
2008
2009 // Shift right narrow
2010 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2011
2012 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2013 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2014 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2015 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2016 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2017 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2018 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2019 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2020
2021 def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
2022                            (v2i64 (concat_vectors (v1i64 node:$Rm),
2023                                                   (v1i64 node:$Rn)))>;
2024
2025 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2026                              (v8i16 (srl (v8i16 node:$lhs),
2027                                (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2028 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2029                              (v4i32 (srl (v4i32 node:$lhs),
2030                                (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2031 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2032                              (v2i64 (srl (v2i64 node:$lhs),
2033                                (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2034 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2035                              (v8i16 (sra (v8i16 node:$lhs),
2036                                (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2037 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2038                              (v4i32 (sra (v4i32 node:$lhs),
2039                                (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2040 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2041                              (v2i64 (sra (v2i64 node:$lhs),
2042                                (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2043
2044 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2045 multiclass Neon_shiftNarrow_patterns<string shr> {
2046   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2047               imm:$Imm))),
2048             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2049   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2050               imm:$Imm))),
2051             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2052   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2053               imm:$Imm))),
2054             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2055
2056   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2057               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2058                 VPR128:$Rn, imm:$Imm)))))),
2059             (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2060                          VPR128:$Rn, imm:$Imm)>;
2061   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2062               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2063                 VPR128:$Rn, imm:$Imm)))))),
2064             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2065                         VPR128:$Rn, imm:$Imm)>;
2066   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2067               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2068                 VPR128:$Rn, imm:$Imm)))))),
2069             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2070                         VPR128:$Rn, imm:$Imm)>;
2071 }
2072
2073 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2074   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2075             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2076   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2077             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2078   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2079             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2080
2081   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2082                 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2083             (!cast<Instruction>(prefix # "_16B")
2084                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2085                 VPR128:$Rn, imm:$Imm)>;
2086   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2087                 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2088             (!cast<Instruction>(prefix # "_8H")
2089                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2090                 VPR128:$Rn, imm:$Imm)>;
2091   def : Pat<(Neon_combine (v1i64 VPR64:$src),
2092                 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2093             (!cast<Instruction>(prefix # "_4S")
2094                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2095                   VPR128:$Rn, imm:$Imm)>;
2096 }
2097
2098 defm : Neon_shiftNarrow_patterns<"lshr">;
2099 defm : Neon_shiftNarrow_patterns<"ashr">;
2100
2101 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2102 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2103 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2104 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2105 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2106 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2107 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2108
2109 // Convert fix-point and float-pointing
2110 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2111                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2112                 Operand ImmTy, SDPatternOperator IntOp>
2113   : NeonI_2VShiftImm<q, u, opcode,
2114                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2115                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2116                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2117                        (i32 imm:$Imm))))],
2118                      NoItinerary>;
2119
2120 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2121                               SDPatternOperator IntOp> {
2122   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2123                       shr_imm32, IntOp> {
2124     let Inst{22-21} = 0b01;
2125   }
2126
2127   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2128                       shr_imm32, IntOp> {
2129     let Inst{22-21} = 0b01;
2130   }
2131
2132   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2133                       shr_imm64, IntOp> {
2134     let Inst{22} = 0b1;
2135   }
2136 }
2137
2138 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2139                               SDPatternOperator IntOp> {
2140   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2141                       shr_imm32, IntOp> {
2142     let Inst{22-21} = 0b01;
2143   }
2144
2145   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2146                       shr_imm32, IntOp> {
2147     let Inst{22-21} = 0b01;
2148   }
2149
2150   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2151                       shr_imm64, IntOp> {
2152     let Inst{22} = 0b1;
2153   }
2154 }
2155
2156 // Convert fixed-point to floating-point
2157 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2158                                    int_arm_neon_vcvtfxs2fp>;
2159 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2160                                    int_arm_neon_vcvtfxu2fp>;
2161
2162 // Convert floating-point to fixed-point
2163 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2164                                    int_arm_neon_vcvtfp2fxs>;
2165 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2166                                    int_arm_neon_vcvtfp2fxu>;
2167
2168 multiclass Neon_sshll2_0<SDNode ext>
2169 {
2170   def _v8i8  : PatFrag<(ops node:$Rn),
2171                        (v8i16 (ext (v8i8 (Neon_top16B node:$Rn))))>;
2172   def _v4i16 : PatFrag<(ops node:$Rn),
2173                        (v4i32 (ext (v4i16 (Neon_top8H node:$Rn))))>;
2174   def _v2i32 : PatFrag<(ops node:$Rn),
2175                        (v2i64 (ext (v2i32 (Neon_top4S node:$Rn))))>;
2176 }
2177
2178 defm NI_sext_high : Neon_sshll2_0<sext>;
2179 defm NI_zext_high : Neon_sshll2_0<zext>;
2180
2181 // The followings are for instruction class (3V Diff)
2182
2183 // normal long/long2 pattern
2184 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2185                  string asmop, string ResS, string OpS,
2186                  SDPatternOperator opnode, SDPatternOperator ext,
2187                  RegisterOperand OpVPR,
2188                  ValueType ResTy, ValueType OpTy>
2189   : NeonI_3VDiff<q, u, size, opcode,
2190                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2191                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2192                  [(set (ResTy VPR128:$Rd),
2193                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2194                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2195                  NoItinerary>;
2196
2197 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2198                         string asmop, SDPatternOperator opnode,
2199                         bit Commutable = 0>
2200 {
2201   let isCommutable = Commutable in {
2202     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2203                            opnode, sext, VPR64, v8i16, v8i8>;
2204     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2205                            opnode, sext, VPR64, v4i32, v4i16>;
2206     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2207                            opnode, sext, VPR64, v2i64, v2i32>;
2208   }
2209 }
2210
2211 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2212                          string asmop, SDPatternOperator opnode,
2213                          bit Commutable = 0>
2214 {
2215   let isCommutable = Commutable in {
2216     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2217                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2218     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2219                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2220     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2221                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2222   }
2223 }
2224
2225 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2226                           string asmop, SDPatternOperator opnode,
2227                           bit Commutable = 0>
2228 {
2229   let isCommutable = Commutable in {
2230     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2231                            opnode, zext, VPR64, v8i16, v8i8>;
2232     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2233                            opnode, zext, VPR64, v4i32, v4i16>;
2234     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2235                            opnode, zext, VPR64, v2i64, v2i32>;
2236   }
2237 }
2238
2239 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2240                            string asmop, SDPatternOperator opnode,
2241                            bit Commutable = 0>
2242 {
2243   let isCommutable = Commutable in {
2244     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2245                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2246     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2247                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2248     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2249                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2250   }
2251 }
2252
2253 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2254 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2255
2256 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2257 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2258
2259 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2260 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2261
2262 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2263 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2264
2265 // normal wide/wide2 pattern
2266 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2267                  string asmop, string ResS, string OpS,
2268                  SDPatternOperator opnode, SDPatternOperator ext,
2269                  RegisterOperand OpVPR,
2270                  ValueType ResTy, ValueType OpTy>
2271   : NeonI_3VDiff<q, u, size, opcode,
2272                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2273                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2274                  [(set (ResTy VPR128:$Rd),
2275                     (ResTy (opnode (ResTy VPR128:$Rn),
2276                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2277                  NoItinerary>;
2278
2279 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2280                         string asmop, SDPatternOperator opnode>
2281 {
2282   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2283                          opnode, sext, VPR64, v8i16, v8i8>;
2284   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2285                          opnode, sext, VPR64, v4i32, v4i16>;
2286   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2287                          opnode, sext, VPR64, v2i64, v2i32>;
2288 }
2289
2290 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2291 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2292
2293 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2294                          string asmop, SDPatternOperator opnode>
2295 {
2296   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2297                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2298   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2299                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2300   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2301                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2302 }
2303
2304 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2305 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2306
2307 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2308                         string asmop, SDPatternOperator opnode>
2309 {
2310   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2311                          opnode, zext, VPR64, v8i16, v8i8>;
2312   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2313                          opnode, zext, VPR64, v4i32, v4i16>;
2314   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2315                          opnode, zext, VPR64, v2i64, v2i32>;
2316 }
2317
2318 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2319 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2320
2321 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2322                            string asmop, SDPatternOperator opnode>
2323 {
2324   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2325                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2326   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2327                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2328   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2329                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2330 }
2331
2332 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2333 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2334
2335 // Get the high half part of the vector element.
2336 multiclass NeonI_get_high
2337 {
2338   def _8h : PatFrag<(ops node:$Rn),
2339                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2340                                              (v8i16 (Neon_dupImm 8))))))>;
2341   def _4s : PatFrag<(ops node:$Rn),
2342                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2343                                               (v4i32 (Neon_dupImm 16))))))>;
2344   def _2d : PatFrag<(ops node:$Rn),
2345                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2346                                               (v2i64 (Neon_dupImm 32))))))>;
2347 }
2348
2349 defm NI_get_hi : NeonI_get_high;
2350
2351 // pattern for addhn/subhn with 2 operands
2352 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2353                            string asmop, string ResS, string OpS,
2354                            SDPatternOperator opnode, SDPatternOperator get_hi,
2355                            ValueType ResTy, ValueType OpTy>
2356   : NeonI_3VDiff<q, u, size, opcode,
2357                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2358                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2359                  [(set (ResTy VPR64:$Rd),
2360                     (ResTy (get_hi
2361                       (OpTy (opnode (OpTy VPR128:$Rn),
2362                                     (OpTy VPR128:$Rm))))))],
2363                  NoItinerary>;
2364
2365 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2366                                 string asmop, SDPatternOperator opnode,
2367                                 bit Commutable = 0>
2368 {
2369   let isCommutable = Commutable in {
2370     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2371                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2372     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2373                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2374     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2375                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2376   }
2377 }
2378
2379 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2380 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2381
2382 // pattern for operation with 2 operands
2383 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2384                     string asmop, string ResS, string OpS,
2385                     SDPatternOperator opnode,
2386                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2387                     ValueType ResTy, ValueType OpTy>
2388   : NeonI_3VDiff<q, u, size, opcode,
2389                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2390                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2391                  [(set (ResTy ResVPR:$Rd),
2392                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2393                  NoItinerary>;
2394
2395 // normal narrow pattern
2396 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2397                           string asmop, SDPatternOperator opnode,
2398                           bit Commutable = 0>
2399 {
2400   let isCommutable = Commutable in {
2401     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2402                               opnode, VPR64, VPR128, v8i8, v8i16>;
2403     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2404                               opnode, VPR64, VPR128, v4i16, v4i32>;
2405     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2406                               opnode, VPR64, VPR128, v2i32, v2i64>;
2407   }
2408 }
2409
2410 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2411 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2412
2413 // pattern for acle intrinsic with 3 operands
2414 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2415                      string asmop, string ResS, string OpS>
2416   : NeonI_3VDiff<q, u, size, opcode,
2417                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2418                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2419                  [], NoItinerary> {
2420   let Constraints = "$src = $Rd";
2421   let neverHasSideEffects = 1;
2422 }
2423
2424 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2425                              string asmop> {
2426   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2427   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2428   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2429 }
2430
2431 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2432 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2433
2434 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2435 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2436
2437 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2438 // part.
2439 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2440                         SDPatternOperator coreop>
2441   : Pat<(Neon_combine (v1i64 VPR64:$src),
2442                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2443                                                         (SrcTy VPR128:$Rm)))))),
2444         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2445               VPR128:$Rn, VPR128:$Rm)>;
2446
2447 // addhn2 patterns
2448 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2449           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2450 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2451           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2452 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2453           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2454
2455 // subhn2 patterns
2456 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2457           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2458 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2459           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2460 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2461           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2462
2463 // raddhn2 patterns
2464 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2465 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2466 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2467
2468 // rsubhn2 patterns
2469 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2470 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2471 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2472
2473 // pattern that need to extend result
2474 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2475                      string asmop, string ResS, string OpS,
2476                      SDPatternOperator opnode,
2477                      RegisterOperand OpVPR,
2478                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2479   : NeonI_3VDiff<q, u, size, opcode,
2480                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2481                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2482                  [(set (ResTy VPR128:$Rd),
2483                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2484                                                 (OpTy OpVPR:$Rm))))))],
2485                  NoItinerary>;
2486
2487 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2488                            string asmop, SDPatternOperator opnode,
2489                            bit Commutable = 0>
2490 {
2491   let isCommutable = Commutable in {
2492     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2493                                opnode, VPR64, v8i16, v8i8, v8i8>;
2494     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2495                                opnode, VPR64, v4i32, v4i16, v4i16>;
2496     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2497                                opnode, VPR64, v2i64, v2i32, v2i32>;
2498   }
2499 }
2500
2501 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2502 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2503
2504 multiclass NeonI_Op_High<SDPatternOperator op>
2505 {
2506   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2507                      (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>;
2508   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
2509                      (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>;
2510   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
2511                      (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>;
2512
2513 }
2514
2515 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2516 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2517 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2518 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2519 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2520 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2521
2522 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2523                             string asmop, string opnode,
2524                             bit Commutable = 0>
2525 {
2526   let isCommutable = Commutable in {
2527     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2528                                 !cast<PatFrag>(opnode # "_16B"),
2529                                 VPR128, v8i16, v16i8, v8i8>;
2530     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2531                                 !cast<PatFrag>(opnode # "_8H"),
2532                                 VPR128, v4i32, v8i16, v4i16>;
2533     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2534                                 !cast<PatFrag>(opnode # "_4S"),
2535                                 VPR128, v2i64, v4i32, v2i32>;
2536   }
2537 }
2538
2539 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2540 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2541
2542 // For pattern that need two operators being chained.
2543 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2544                      string asmop, string ResS, string OpS, 
2545                      SDPatternOperator opnode, SDPatternOperator subop,
2546                      RegisterOperand OpVPR,
2547                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2548   : NeonI_3VDiff<q, u, size, opcode,
2549                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2550                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 
2551                  [(set (ResTy VPR128:$Rd),
2552                     (ResTy (opnode
2553                       (ResTy VPR128:$src), 
2554                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2555                                                  (OpTy OpVPR:$Rm))))))))],
2556                  NoItinerary> {
2557   let Constraints = "$src = $Rd";
2558 }
2559
2560 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2561                              string asmop, SDPatternOperator opnode,
2562                              SDPatternOperator subop>
2563 {
2564   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2565                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2566   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2567                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2568   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2569                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2570 }
2571
2572 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2573                                    add, int_arm_neon_vabds>;
2574 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2575                                    add, int_arm_neon_vabdu>;
2576
2577 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2578                               string asmop, SDPatternOperator opnode,
2579                               string subop>
2580 {
2581   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2582                              opnode, !cast<PatFrag>(subop # "_16B"), 
2583                              VPR128, v8i16, v16i8, v8i8>;
2584   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2585                              opnode, !cast<PatFrag>(subop # "_8H"), 
2586                              VPR128, v4i32, v8i16, v4i16>;
2587   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2588                              opnode, !cast<PatFrag>(subop # "_4S"), 
2589                              VPR128, v2i64, v4i32, v2i32>;
2590 }
2591
2592 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2593                                      "NI_sabdl_hi">;
2594 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2595                                      "NI_uabdl_hi">;
2596
2597 // Long pattern with 2 operands
2598 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2599                           string asmop, SDPatternOperator opnode,
2600                           bit Commutable = 0>
2601 {
2602   let isCommutable = Commutable in {
2603     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2604                               opnode, VPR128, VPR64, v8i16, v8i8>;
2605     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2606                               opnode, VPR128, VPR64, v4i32, v4i16>;
2607     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2608                               opnode, VPR128, VPR64, v2i64, v2i32>;
2609   }
2610 }
2611
2612 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2613 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2614
2615 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2616                            string asmop, string ResS, string OpS,
2617                            SDPatternOperator opnode,
2618                            ValueType ResTy, ValueType OpTy>
2619   : NeonI_3VDiff<q, u, size, opcode,
2620                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2621                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2622                  [(set (ResTy VPR128:$Rd),
2623                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2624                  NoItinerary>;
2625
2626
2627 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2628                                    string asmop, 
2629                                    string opnode,
2630                                    bit Commutable = 0>
2631 {
2632   let isCommutable = Commutable in {
2633     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2634                                       !cast<PatFrag>(opnode # "_16B"),
2635                                       v8i16, v16i8>;
2636     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637                                      !cast<PatFrag>(opnode # "_8H"),
2638                                      v4i32, v8i16>;
2639     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2640                                      !cast<PatFrag>(opnode # "_4S"),
2641                                      v2i64, v4i32>;
2642   }
2643 }
2644
2645 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2646                                          "NI_smull_hi", 1>;
2647 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2648                                          "NI_umull_hi", 1>;
2649
2650 // Long pattern with 3 operands
2651 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2652                      string asmop, string ResS, string OpS,
2653                      SDPatternOperator opnode,
2654                      ValueType ResTy, ValueType OpTy>
2655   : NeonI_3VDiff<q, u, size, opcode,
2656                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2657                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2658                  [(set (ResTy VPR128:$Rd),
2659                     (ResTy (opnode
2660                       (ResTy VPR128:$src),
2661                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2662                NoItinerary> {
2663   let Constraints = "$src = $Rd";
2664 }
2665
2666 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2667                              string asmop, SDPatternOperator opnode>
2668 {
2669   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2670                              opnode, v8i16, v8i8>;
2671   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2672                              opnode, v4i32, v4i16>;
2673   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2674                              opnode, v2i64, v2i32>;
2675 }
2676
2677 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2678                          (add node:$Rd,
2679                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2680
2681 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2682                          (add node:$Rd,
2683                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2684
2685 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2686                          (sub node:$Rd,
2687                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2688
2689 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2690                          (sub node:$Rd,
2691                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2692
2693 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2694 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2695
2696 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2697 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2698
2699 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2700                            string asmop, string ResS, string OpS,
2701                            SDPatternOperator subop, SDPatternOperator opnode,
2702                            RegisterOperand OpVPR,
2703                            ValueType ResTy, ValueType OpTy>
2704   : NeonI_3VDiff<q, u, size, opcode,
2705                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2706                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2707                [(set (ResTy VPR128:$Rd),
2708                   (ResTy (subop
2709                     (ResTy VPR128:$src),
2710                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2711                NoItinerary> {
2712   let Constraints = "$src = $Rd";
2713 }
2714
2715 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2716                                    string asmop, 
2717                                    SDPatternOperator subop,
2718                                    string opnode>
2719 {
2720   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2721                                     subop, !cast<PatFrag>(opnode # "_16B"),
2722                                     VPR128, v8i16, v16i8>;
2723   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2724                                    subop, !cast<PatFrag>(opnode # "_8H"), 
2725                                    VPR128, v4i32, v8i16>;
2726   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2727                                    subop, !cast<PatFrag>(opnode # "_4S"),
2728                                    VPR128, v2i64, v4i32>;
2729 }
2730
2731 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2732                                           add, "NI_smull_hi">;
2733 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2734                                           add, "NI_umull_hi">;
2735
2736 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2737                                           sub, "NI_smull_hi">;
2738 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2739                                           sub, "NI_umull_hi">;
2740
2741 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2742                                     string asmop, SDPatternOperator opnode>
2743 {
2744   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2745                                    opnode, int_arm_neon_vqdmull,
2746                                    VPR64, v4i32, v4i16>;
2747   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2748                                    opnode, int_arm_neon_vqdmull,
2749                                    VPR64, v2i64, v2i32>;
2750 }
2751
2752 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2753                                            int_arm_neon_vqadds>;
2754 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2755                                            int_arm_neon_vqsubs>;
2756
2757 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2758                          string asmop, SDPatternOperator opnode,
2759                          bit Commutable = 0>
2760 {
2761   let isCommutable = Commutable in {
2762     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2763                               opnode, VPR128, VPR64, v4i32, v4i16>;
2764     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2765                               opnode, VPR128, VPR64, v2i64, v2i32>;
2766   }
2767 }
2768
2769 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2770                                 int_arm_neon_vqdmull, 1>;
2771
2772 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2773                                    string asmop, 
2774                                    string opnode,
2775                                    bit Commutable = 0>
2776 {
2777   let isCommutable = Commutable in {
2778     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2779                                      !cast<PatFrag>(opnode # "_8H"),
2780                                      v4i32, v8i16>;
2781     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2782                                      !cast<PatFrag>(opnode # "_4S"),
2783                                      v2i64, v4i32>;
2784   }
2785 }
2786
2787 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", 
2788                                            "NI_qdmull_hi", 1>;
2789
2790 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2791                                      string asmop, 
2792                                      SDPatternOperator opnode>
2793 {
2794   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2795                                    opnode, NI_qdmull_hi_8H,
2796                                    VPR128, v4i32, v8i16>;
2797   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2798                                    opnode, NI_qdmull_hi_4S,
2799                                    VPR128, v2i64, v4i32>;
2800 }
2801
2802 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2803                                              int_arm_neon_vqadds>;
2804 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2805                                              int_arm_neon_vqsubs>;
2806
2807 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2808                                    string asmop, SDPatternOperator opnode,
2809                                    bit Commutable = 0>
2810 {
2811   let isCommutable = Commutable in {
2812     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2813                               opnode, VPR128, VPR64, v8i16, v8i8>;
2814   }
2815 }
2816
2817 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2818
2819 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
2820                                    string asmop, 
2821                                    string opnode,
2822                                    bit Commutable = 0>
2823 {
2824   let isCommutable = Commutable in {
2825     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2826                                       !cast<PatFrag>(opnode # "_16B"),
2827                                       v8i16, v16i8>;
2828   }
2829 }
2830
2831 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
2832                                          "NI_pmull_hi", 1>;
2833
2834 // End of implementation for instruction class (3V Diff)
2835
2836 // Scalar Arithmetic
2837
2838 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
2839   : NeonI_Scalar3Same<u, 0b11, opcode,
2840                 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2841                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2842                 [],
2843                 NoItinerary>;
2844
2845 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
2846                                         string asmop, bit Commutable = 0>
2847 {
2848   let isCommutable = Commutable in {
2849     def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
2850                                 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
2851                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2852                                 [],
2853                                 NoItinerary>;
2854     def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
2855                                 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
2856                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2857                                 [],
2858                                 NoItinerary>;
2859     def sss : NeonI_Scalar3Same<u, 0b10, opcode,
2860                                 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
2861                                 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2862                                 [],
2863                                 NoItinerary>;
2864     def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
2865                                (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2866                                !strconcat(asmop, " $Rd, $Rn, $Rm"),
2867                                [],
2868                                NoItinerary>;
2869   }
2870 }
2871
2872 multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
2873                                        Instruction INSTD> {
2874   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
2875             (INSTD FPR64:$Rn, FPR64:$Rm)>;        
2876 }
2877
2878 multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
2879                                           Instruction INSTB, Instruction INSTH,
2880                                           Instruction INSTS, Instruction INSTD>
2881   : Neon_Scalar_D_size_patterns<opnode, INSTD> {
2882   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
2883            (INSTB FPR8:$Rn, FPR8:$Rm)>;
2884
2885   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
2886            (INSTH FPR16:$Rn, FPR16:$Rm)>;
2887
2888   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
2889            (INSTS FPR32:$Rn, FPR32:$Rm)>;
2890 }
2891
2892 // Scalar Integer Add
2893 let isCommutable = 1 in {
2894 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
2895 }
2896
2897 // Scalar Integer Sub
2898 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
2899
2900 // Pattern for Scalar Integer Add and Sub with D register only
2901 defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
2902 defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
2903
2904 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
2905 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
2906 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
2907 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
2908 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
2909
2910 // Scalar Integer Saturating Add (Signed, Unsigned)
2911 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
2912 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
2913
2914 // Scalar Integer Saturating Sub (Signed, Unsigned)
2915 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
2916 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
2917
2918 // Patterns to match llvm.arm.* intrinsic for
2919 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
2920 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
2921 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
2922 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
2923 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
2924
2925 // Patterns to match llvm.aarch64.* intrinsic for
2926 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
2927 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
2928                                       SQADDsss, SQADDddd>;
2929 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
2930                                       UQADDsss, UQADDddd>;
2931 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
2932                                       SQSUBsss, SQSUBddd>;
2933 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
2934                                       UQSUBsss, UQSUBddd>;
2935
2936 // Scalar Integer Shift Left (Signed, Unsigned)
2937 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
2938 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
2939
2940 // Patterns to match llvm.arm.* intrinsic for
2941 // Scalar Integer Shift Left (Signed, Unsigned)
2942 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
2943 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
2944
2945 // Patterns to match llvm.aarch64.* intrinsic for
2946 // Scalar Integer Shift Left (Signed, Unsigned)
2947 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
2948 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
2949
2950 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
2951 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
2952 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
2953
2954 // Patterns to match llvm.aarch64.* intrinsic for
2955 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
2956 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
2957                                       SQSHLsss, SQSHLddd>;
2958 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
2959                                       UQSHLsss, UQSHLddd>;
2960
2961 // Patterns to match llvm.arm.* intrinsic for
2962 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
2963 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
2964 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
2965
2966 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2967 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
2968 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
2969
2970 // Patterns to match llvm.aarch64.* intrinsic for
2971 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2972 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
2973 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
2974
2975 // Patterns to match llvm.arm.* intrinsic for
2976 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2977 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
2978 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
2979
2980 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2981 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
2982 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
2983
2984 // Patterns to match llvm.aarch64.* intrinsic for
2985 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2986 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
2987                                       SQRSHLsss, SQRSHLddd>;
2988 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
2989                                       UQRSHLsss, UQRSHLddd>;
2990
2991 // Patterns to match llvm.arm.* intrinsic for
2992 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2993 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
2994 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
2995
2996 // Scalar Reduce Pairwise
2997
2998 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
2999                                      string asmop, bit Commutable = 0> {
3000   let isCommutable = Commutable in {
3001     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
3002                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
3003                                 !strconcat(asmop, " $Rd, $Rn.2d"),
3004                                 [],
3005                                 NoItinerary>;
3006   }
3007 }
3008
3009 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
3010                                      string asmop, bit Commutable = 0>
3011   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
3012   let isCommutable = Commutable in {
3013     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
3014                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
3015                                 !strconcat(asmop, " $Rd, $Rn.2s"),
3016                                 [],
3017                                 NoItinerary>;
3018   }
3019 }
3020
3021 // Scalar Reduce Addition Pairwise (Integer) with
3022 // Pattern to match llvm.arm.* intrinsic
3023 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
3024
3025 // Pattern to match llvm.aarch64.* intrinsic for
3026 // Scalar Reduce Addition Pairwise (Integer)
3027 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
3028           (ADDPvv_D_2D VPR128:$Rn)>;
3029
3030 // Scalar Reduce Addition Pairwise (Floating Point)
3031 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
3032
3033 // Scalar Reduce Maximum Pairwise (Floating Point)
3034 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
3035
3036 // Scalar Reduce Minimum Pairwise (Floating Point)
3037 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
3038
3039 // Scalar Reduce maxNum Pairwise (Floating Point)
3040 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
3041
3042 // Scalar Reduce minNum Pairwise (Floating Point)
3043 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
3044
3045 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
3046                                             SDPatternOperator opnodeD,
3047                                             Instruction INSTS,
3048                                             Instruction INSTD> {
3049   def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
3050             (INSTS VPR64:$Rn)>;
3051   def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
3052             (INSTD VPR128:$Rn)>;
3053 }
3054
3055 // Patterns to match llvm.aarch64.* intrinsic for
3056 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
3057 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
3058   int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
3059
3060 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
3061   int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
3062
3063 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
3064   int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
3065
3066 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
3067   int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
3068
3069 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, 
3070   int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
3071
3072
3073
3074 //===----------------------------------------------------------------------===//
3075 // Non-Instruction Patterns
3076 //===----------------------------------------------------------------------===//
3077
3078 // 64-bit vector bitcasts...
3079
3080 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
3081 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
3082 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
3083 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
3084
3085 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
3086 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
3087 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
3088 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
3089
3090 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
3091 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
3092 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
3093 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
3094
3095 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
3096 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
3097 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
3098 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
3099
3100 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
3101 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
3102 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
3103 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
3104
3105 // ..and 128-bit vector bitcasts...
3106
3107 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
3108 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
3109 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
3110 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
3111 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
3112
3113 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
3114 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
3115 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
3116 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
3117 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
3118
3119 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
3120 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
3121 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
3122 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
3123 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
3124
3125 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
3126 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
3127 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
3128 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
3129 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
3130
3131 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
3132 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
3133 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
3134 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
3135 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
3136
3137 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
3138 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
3139 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
3140 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
3141 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
3142
3143
3144 // ...and scalar bitcasts...
3145 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
3146 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
3147 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
3148 def : Pat<(f32 (bitconvert (v1f32  FPR32:$src))), (f32 FPR32:$src)>;
3149 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
3150
3151 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
3152 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
3153
3154 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
3155 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
3156 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
3157
3158 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
3159 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
3160 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
3161 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
3162 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
3163
3164 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
3165 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
3166 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
3167 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
3168 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
3169 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
3170
3171 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
3172 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
3173 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
3174 def : Pat<(v1f32 (bitconvert (f32  FPR32:$src))), (v1f32 FPR32:$src)>;
3175 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
3176
3177 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
3178 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
3179
3180 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
3181 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
3182 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
3183 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
3184 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
3185
3186 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
3187 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
3188 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
3189 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
3190 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
3191 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
3192
3193 def neon_uimm0_bare : Operand<i64>,
3194                         ImmLeaf<i64, [{return Imm == 0;}]> {
3195   let ParserMatchClass = neon_uimm0_asmoperand;
3196   let PrintMethod = "printNeonUImm8OperandBare";
3197 }
3198
3199 def neon_uimm1_bare : Operand<i64>,
3200                         ImmLeaf<i64, [{(void)Imm; return true;}]> {
3201   let ParserMatchClass = neon_uimm1_asmoperand;
3202   let PrintMethod = "printNeonUImm8OperandBare";
3203 }
3204
3205 def neon_uimm2_bare : Operand<i64>,
3206                         ImmLeaf<i64, [{(void)Imm; return true;}]> {
3207   let ParserMatchClass = neon_uimm2_asmoperand;
3208   let PrintMethod = "printNeonUImm8OperandBare";
3209 }
3210
3211 def neon_uimm3_bare : Operand<i64>,
3212                         ImmLeaf<i64, [{(void)Imm; return true;}]> {
3213   let ParserMatchClass = uimm3_asmoperand;
3214   let PrintMethod = "printNeonUImm8OperandBare";
3215 }
3216
3217 def neon_uimm4_bare : Operand<i64>,
3218                         ImmLeaf<i64, [{(void)Imm; return true;}]> {
3219   let ParserMatchClass = uimm4_asmoperand;
3220   let PrintMethod = "printNeonUImm8OperandBare";
3221 }
3222
3223 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
3224                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
3225   : NeonI_copy<0b1, 0b0, 0b0011,
3226                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
3227                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
3228                [(set (ResTy VPR128:$Rd),
3229                  (ResTy (vector_insert
3230                    (ResTy VPR128:$src),
3231                    (OpTy OpGPR:$Rn),
3232                    (OpImm:$Imm))))],
3233                NoItinerary> {
3234   bits<4> Imm;
3235   let Constraints = "$src = $Rd";
3236 }
3237
3238
3239 //Insert element (vector, from main)
3240 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
3241                            neon_uimm4_bare> {
3242   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3243 }
3244 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
3245                            neon_uimm3_bare> {
3246   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3247 }
3248 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
3249                            neon_uimm2_bare> {
3250   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3251 }
3252 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
3253                            neon_uimm1_bare> {
3254   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
3255 }
3256
3257 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
3258                              RegisterClass OpGPR, ValueType OpTy, 
3259                              Operand OpImm, Instruction INS> 
3260   : Pat<(ResTy (vector_insert
3261               (ResTy VPR64:$src),
3262               (OpTy OpGPR:$Rn),
3263               (OpImm:$Imm))),
3264         (ResTy (EXTRACT_SUBREG 
3265           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3266             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
3267
3268 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
3269                                           neon_uimm3_bare, INSbw>;
3270 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
3271                                           neon_uimm2_bare, INShw>;
3272 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
3273                                           neon_uimm1_bare, INSsw>;
3274 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
3275                                           neon_uimm0_bare, INSdx>;
3276
3277 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
3278                         Operand ResImm, ValueType MidTy>
3279   : NeonI_insert<0b1, 0b1,
3280                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, 
3281                  ResImm:$Immd, ResImm:$Immn),
3282                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
3283                  [(set (ResTy VPR128:$Rd),
3284                     (ResTy (vector_insert
3285                       (ResTy VPR128:$src),
3286                       (MidTy (vector_extract
3287                         (ResTy VPR128:$Rn),
3288                         (ResImm:$Immn))),
3289                       (ResImm:$Immd))))],
3290                  NoItinerary> {
3291   let Constraints = "$src = $Rd";
3292   bits<4> Immd;
3293   bits<4> Immn;
3294 }
3295
3296 //Insert element (vector, from element)
3297 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
3298   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
3299   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
3300 }
3301 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
3302   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
3303   let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
3304   // bit 11 is unspecified.
3305 }
3306 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
3307   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
3308   let Inst{14-13} = {Immn{1}, Immn{0}};
3309   // bits 11-12 are unspecified.
3310 }
3311 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
3312   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
3313   let Inst{14} = Immn{0};
3314   // bits 11-13 are unspecified.
3315 }
3316
3317 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
3318                                 ValueType MidTy, ValueType StTy,
3319                                 Operand StImm, Instruction INS> { 
3320 def : Pat<(NaTy (vector_insert
3321             (NaTy VPR64:$src),
3322             (MidTy (vector_extract
3323               (StTy VPR128:$Rn),
3324               (StImm:$Immn))),
3325             (NaImm:$Immd))),
3326           (NaTy (EXTRACT_SUBREG
3327             (StTy (INS 
3328               (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3329               (StTy VPR128:$Rn),
3330               NaImm:$Immd,
3331               StImm:$Immn)),
3332           sub_64))>;
3333
3334 def : Pat<(StTy (vector_insert
3335             (StTy VPR128:$src),
3336             (MidTy (vector_extract
3337               (NaTy VPR64:$Rn),
3338               (NaImm:$Immn))),
3339             (StImm:$Immd))),
3340           (StTy (INS 
3341             (StTy VPR128:$src),
3342             (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3343             StImm:$Immd,
3344             NaImm:$Immn))>;
3345
3346 def : Pat<(NaTy (vector_insert
3347             (NaTy VPR64:$src),
3348             (MidTy (vector_extract
3349               (NaTy VPR64:$Rn),
3350               (NaImm:$Immn))),
3351             (NaImm:$Immd))),
3352           (NaTy (EXTRACT_SUBREG
3353             (StTy (INS 
3354               (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3355               (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3356               NaImm:$Immd,
3357               NaImm:$Immn)),
3358           sub_64))>;
3359 }
3360
3361 defm INSb_pattern : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
3362                                          v16i8, neon_uimm4_bare, INSELb>;
3363 defm INSh_pattern : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
3364                                          v8i16, neon_uimm3_bare, INSELh>;
3365 defm INSs_pattern : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
3366                                          v4i32, neon_uimm2_bare, INSELs>;
3367 defm INSd_pattern : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
3368                                          v2i64, neon_uimm1_bare, INSELd>;
3369
3370 class NeonI_SMOV<string asmop, string Res, bit Q,
3371                  ValueType OpTy, ValueType eleTy,
3372                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
3373   : NeonI_copy<Q, 0b0, 0b0101,
3374                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
3375                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
3376                [(set (ResTy ResGPR:$Rd),
3377                  (ResTy (sext_inreg
3378                    (ResTy (vector_extract
3379                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
3380                    eleTy)))],
3381                NoItinerary> {
3382   bits<4> Imm;
3383 }
3384
3385 //Signed integer move (main, from element)
3386 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
3387                         GPR32, i32> {
3388   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3389 }
3390 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
3391                         GPR32, i32> {
3392   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3393 }
3394 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
3395                         GPR64, i64> {
3396   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3397 }
3398 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
3399                         GPR64, i64> {
3400   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3401 }
3402 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
3403                         GPR64, i64> {
3404   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3405 }
3406
3407 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
3408                                ValueType eleTy, Operand StImm,  Operand NaImm,
3409                                Instruction SMOVI> {
3410   def : Pat<(i64 (sext_inreg
3411               (i64 (anyext
3412                 (i32 (vector_extract
3413                   (StTy VPR128:$Rn), (StImm:$Imm))))),
3414               eleTy)),
3415             (SMOVI VPR128:$Rn, StImm:$Imm)>;
3416   
3417   def : Pat<(i64 (sext
3418               (i32 (vector_extract
3419                 (StTy VPR128:$Rn), (StImm:$Imm))))),
3420             (SMOVI VPR128:$Rn, StImm:$Imm)>;
3421   
3422   def : Pat<(i64 (sext_inreg
3423               (i64 (vector_extract
3424                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
3425               eleTy)),
3426             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3427               NaImm:$Imm)>;
3428   
3429   def : Pat<(i64 (sext_inreg
3430               (i64 (anyext
3431                 (i32 (vector_extract
3432                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
3433               eleTy)),
3434             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3435               NaImm:$Imm)>;
3436   
3437   def : Pat<(i64 (sext
3438               (i32 (vector_extract
3439                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
3440             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3441               NaImm:$Imm)>; 
3442 }
3443
3444 defm SMOVxb_pattern : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
3445                                           neon_uimm3_bare, SMOVxb>;
3446 defm SMOVxh_pattern : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
3447                                           neon_uimm2_bare, SMOVxh>;
3448 defm SMOVxs_pattern : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
3449                                           neon_uimm1_bare, SMOVxs>;
3450
3451 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
3452                           ValueType eleTy, Operand StImm,  Operand NaImm,
3453                           Instruction SMOVI>
3454   : Pat<(i32 (sext_inreg
3455           (i32 (vector_extract
3456             (NaTy VPR64:$Rn), (NaImm:$Imm))),
3457           eleTy)),
3458         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3459           NaImm:$Imm)>;
3460
3461 def SMOVwb_pattern : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
3462                                           neon_uimm3_bare, SMOVwb>;
3463 def SMOVwh_pattern : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
3464                                           neon_uimm2_bare, SMOVwh>;
3465
3466
3467 class NeonI_UMOV<string asmop, string Res, bit Q,
3468                  ValueType OpTy, Operand OpImm,
3469                  RegisterClass ResGPR, ValueType ResTy>
3470   : NeonI_copy<Q, 0b0, 0b0111,
3471                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
3472                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
3473                [(set (ResTy ResGPR:$Rd),
3474                   (ResTy (vector_extract
3475                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
3476                NoItinerary> {
3477   bits<4> Imm;
3478 }
3479
3480 //Unsigned integer move (main, from element)
3481 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
3482                          GPR32, i32> {
3483   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3484 }
3485 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
3486                          GPR32, i32> {
3487   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3488 }
3489 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
3490                          GPR32, i32> {
3491   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3492 }
3493 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
3494                          GPR64, i64> {
3495   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
3496 }
3497
3498 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
3499                          Operand StImm,  Operand NaImm,
3500                          Instruction SMOVI>
3501   : Pat<(ResTy (vector_extract
3502           (NaTy VPR64:$Rn), NaImm:$Imm)),
3503         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3504           NaImm:$Imm)>;
3505
3506 def UMOVwb_pattern : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
3507                                        neon_uimm3_bare, UMOVwb>;
3508 def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
3509                                        neon_uimm2_bare, UMOVwh>; 
3510 def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
3511                                        neon_uimm1_bare, UMOVws>;
3512
3513 def : Pat<(i32 (and
3514             (i32 (vector_extract
3515               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
3516             255)),
3517           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
3518
3519 def : Pat<(i32 (and
3520             (i32 (vector_extract
3521               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
3522             65535)),
3523           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
3524
3525 def : Pat<(i64 (zext
3526             (i32 (vector_extract
3527               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
3528           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
3529
3530 def : Pat<(i32 (and
3531             (i32 (vector_extract
3532               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
3533             255)),
3534           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3535             neon_uimm3_bare:$Imm)>;
3536
3537 def : Pat<(i32 (and
3538             (i32 (vector_extract
3539               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
3540             65535)),
3541           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3542             neon_uimm2_bare:$Imm)>;
3543
3544 def : Pat<(i64 (zext
3545             (i32 (vector_extract
3546               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
3547           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3548             neon_uimm0_bare:$Imm)>;
3549
3550 // Additional copy patterns for scalar types
3551 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
3552           (UMOVwb (v16i8
3553             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
3554
3555 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
3556           (UMOVwh (v8i16
3557             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
3558
3559 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
3560           (FMOVws FPR32:$Rn)>;
3561
3562 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
3563           (FMOVxd FPR64:$Rn)>;
3564                
3565 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
3566           (f64 FPR64:$Rn)>;
3567
3568 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
3569           (f32 FPR32:$Rn)>;
3570
3571 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
3572           (v1i8 (EXTRACT_SUBREG (v16i8
3573             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
3574             sub_8))>;
3575
3576 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
3577           (v1i16 (EXTRACT_SUBREG (v8i16
3578             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
3579             sub_16))>;
3580
3581 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
3582           (FMOVsw $src)>;
3583
3584 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
3585           (FMOVdx $src)>;
3586