[ARM] Add support for ARMV6K subtarget (LLVM)
[oota-llvm.git] / lib / Target / ARM / ARMScheduleSwift.td
1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the Swift processor..
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // This section contains legacy support for itineraries. This is
16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18 def SW_DIS0 : FuncUnit;
19 def SW_DIS1 : FuncUnit;
20 def SW_DIS2 : FuncUnit;
21
22 def SW_ALU0 : FuncUnit;
23 def SW_ALU1 : FuncUnit;
24 def SW_LS   : FuncUnit;
25 def SW_IDIV : FuncUnit;
26 def SW_FDIV : FuncUnit;
27
28 // FIXME: Need bypasses.
29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30 //        IIC_iMOVix2ld better.
31 // FIXME: Model the special immediate shifts that are not microcoded.
32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33 //        to issue on pipe 1?
34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
35 // FIXME: Better model the microcode stages of multiply instructions, especially
36 //        conditional variants.
37 // FIXME: Add preload instruction when it is documented.
38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40 def SwiftItineraries : ProcessorItineraries<
41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42   //
43   // Move instructions, unconditional
44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
46                               [1]>,
47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
49                               [1]>,
50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
52                               [1]>,
53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
55                               [1]>,
56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
60                               [2]>,
61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
65                                  [3]>,
66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
69                                InstrStage<1, [SW_LS]>],
70                               [5]>,
71   //
72   // MVN instructions
73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
75                               [1]>,
76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
78                               [1]>,
79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
81                               [1]>,
82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
84                               [1]>,
85   //
86   // No operand cycles
87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89   //
90   // Binary Instructions that produce a result
91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
93                             [1, 1]>,
94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
96                             [1, 1, 1]>,
97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
99                             [2, 1, 1]>,
100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
102                             [2, 1, 1]>,
103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
105                             [2, 1, 1, 1]>,
106   //
107   // Bitwise Instructions that produce a result
108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
110                             [1, 1]>,
111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
113                             [1, 1, 1]>,
114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
116                             [2, 1, 1]>,
117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
119                             [2, 1, 1, 1]>,
120   //
121   // Unary Instructions that produce a result
122
123   // CLZ, RBIT, etc.
124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
126                             [1, 1]>,
127
128   // BFC, BFI, UBFX, SBFX
129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
131                             [2, 1]>,
132
133   //
134   // Zero and sign extension instructions
135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
137                             [1, 1]>,
138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
140                             [1, 1, 1]>,
141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
143                             [1, 1, 1, 1]>,
144   //
145   // Compare instructions
146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
148                               [1]>,
149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
151                               [1, 1]>,
152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
154                               [1, 1]>,
155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
157                               [1, 1, 1]>,
158   //
159   // Test instructions
160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
162                               [1]>,
163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
165                               [1, 1]>,
166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
168                               [1, 1]>,
169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
171                               [1, 1, 1]>,
172   //
173   // Move instructions, conditional
174   // FIXME: Correctly model the extra input dep on the destination.
175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
177                               [1]>,
178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
180                               [1, 1]>,
181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
183                               [1, 1]>,
184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
186                               [2, 1, 1]>,
187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
191                               [2]>,
192
193   // Integer multiply pipeline
194   //
195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196                                InstrStage<1, [SW_ALU0]>],
197                               [3, 1, 1]>,
198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199                                InstrStage<1, [SW_ALU0]>],
200                               [3, 1, 1, 1]>,
201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202                                InstrStage<1, [SW_ALU0]>],
203                               [4, 1, 1]>,
204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205                                InstrStage<1, [SW_ALU0]>],
206                               [4, 1, 1, 1]>,
207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
208                                InstrStage<1, [SW_DIS1], 0>,
209                                InstrStage<1, [SW_DIS2], 0>,
210                                InstrStage<1, [SW_ALU0], 1>,
211                                InstrStage<1, [SW_ALU0], 3>,
212                                InstrStage<1, [SW_ALU0]>],
213                               [5, 5, 1, 1]>,
214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
215                                InstrStage<1, [SW_DIS1], 0>,
216                                InstrStage<1, [SW_DIS2], 0>,
217                                InstrStage<1, [SW_ALU0], 1>,
218                                InstrStage<1, [SW_ALU0], 1>,
219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
221                               [5, 6, 1, 1]>,
222   //
223   // Integer divide
224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225                              InstrStage<1, [SW_ALU0], 0>,
226                              InstrStage<14, [SW_IDIV]>],
227                             [14, 1, 1]>,
228
229   // Integer load pipeline
230   // FIXME: The timings are some rough approximations
231   //
232   // Immediate offset
233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234                                  InstrStage<1, [SW_LS]>],
235                                 [3, 1]>,
236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237                                  InstrStage<1, [SW_LS]>],
238                                 [3, 1]>,
239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240                                  InstrStage<1, [SW_DIS1], 0>,
241                                  InstrStage<1, [SW_LS], 1>,
242                                  InstrStage<1, [SW_LS]>],
243                                 [3, 4, 1]>,
244   //
245   // Register offset
246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247                                  InstrStage<1, [SW_LS]>],
248                                 [3, 1, 1]>,
249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250                                  InstrStage<1, [SW_LS]>],
251                                 [3, 1, 1]>,
252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253                                  InstrStage<1, [SW_DIS1], 0>,
254                                  InstrStage<1, [SW_DIS2], 0>,
255                                  InstrStage<1, [SW_LS], 1>,
256                                  InstrStage<1, [SW_LS], 3>,
257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
258                                 [3, 4, 1, 1]>,
259   //
260   // Scaled register offset
261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
262                                  InstrStage<1, [SW_DIS1], 0>,
263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264                                  InstrStage<1, [SW_LS]>],
265                                 [5, 1, 1]>,
266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267                                  InstrStage<1, [SW_DIS1], 0>,
268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269                                  InstrStage<1, [SW_LS]>],
270                                 [5, 1, 1]>,
271   //
272   // Immediate offset with update
273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
274                                  InstrStage<1, [SW_DIS1], 0>,
275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276                                  InstrStage<1, [SW_LS]>],
277                                 [3, 1, 1]>,
278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279                                  InstrStage<1, [SW_DIS1], 0>,
280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281                                  InstrStage<1, [SW_LS]>],
282                                 [3, 1, 1]>,
283   //
284   // Register offset with update
285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
286                                  InstrStage<1, [SW_DIS1], 0>,
287                                  InstrStage<1, [SW_ALU0], 1>,
288                                  InstrStage<1, [SW_LS]>],
289                                 [3, 1, 1, 1]>,
290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291                                  InstrStage<1, [SW_DIS1], 0>,
292                                  InstrStage<1, [SW_ALU0], 1>,
293                                  InstrStage<1, [SW_LS]>],
294                                 [3, 1, 1, 1]>,
295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296                                  InstrStage<1, [SW_DIS1], 0>,
297                                  InstrStage<1, [SW_DIS2], 0>,
298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299                                  InstrStage<1, [SW_LS], 3>,
300                                  InstrStage<1, [SW_LS], 0>,
301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
302                                 [3, 4, 1, 1]>,
303   //
304   // Scaled register offset with update
305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306                                  InstrStage<1, [SW_DIS1], 0>,
307                                  InstrStage<1, [SW_DIS2], 0>,
308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309                                  InstrStage<1, [SW_LS], 3>,
310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
311                                 [5, 3, 1, 1]>,
312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313                                   InstrStage<1, [SW_DIS1], 0>,
314                                   InstrStage<1, [SW_DIS2], 0>,
315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316                                   InstrStage<1, [SW_LS], 0>,
317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
318                                 [5, 3, 1, 1]>,
319   //
320   // Load multiple, def is the 5th operand.
321   // FIXME: This assumes 3 to 4 registers.
322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
323                                 InstrStage<1, [SW_DIS1], 0>,
324                                 InstrStage<1, [SW_DIS2], 0>,
325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326                                 InstrStage<1, [SW_LS]>],
327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329   //
330   // Load multiple + update, defs are the 1st and 5th operands.
331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332                                 InstrStage<1, [SW_DIS1], 0>,
333                                 InstrStage<1, [SW_DIS2], 0>,
334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335                                 InstrStage<1, [SW_LS], 3>,
336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338   //
339   // Load multiple plus branch
340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341                                 InstrStage<1, [SW_DIS1], 0>,
342                                 InstrStage<1, [SW_DIS2], 0>,
343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344                                 InstrStage<1, [SW_LS]>],
345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346   //
347   // Pop, def is the 3rd operand.
348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
349                                 InstrStage<1, [SW_DIS1], 0>,
350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351                                 InstrStage<1, [SW_LS]>],
352                                [1, 1, 3], [], -1>, // dynamic uops
353   //
354   // Pop + branch, def is the 3rd operand.
355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
356                                 InstrStage<1, [SW_DIS1], 0>,
357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358                                 InstrStage<1, [SW_LS]>],
359                                [1, 1, 3], [], -1>, // dynamic uops
360
361   //
362   // iLoadi + iALUr for t2LDRpci_pic.
363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364                                 InstrStage<1, [SW_LS], 3>,
365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
366                                [4, 1]>,
367
368   // Integer store pipeline
369   ///
370   // Immediate offset
371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372                                  InstrStage<1, [SW_LS]>],
373                                 [1, 1]>,
374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375                                  InstrStage<1, [SW_LS]>],
376                                 [1, 1]>,
377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378                                  InstrStage<1, [SW_DIS1], 0>,
379                                  InstrStage<1, [SW_DIS2], 0>,
380                                  InstrStage<1, [SW_LS], 0>,
381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382                                  InstrStage<1, [SW_LS]>],
383                                 [1, 1]>,
384   //
385   // Register offset
386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387                                  InstrStage<1, [SW_LS]>],
388                                 [1, 1, 1]>,
389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390                                  InstrStage<1, [SW_LS]>],
391                                 [1, 1, 1]>,
392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393                                  InstrStage<1, [SW_DIS1], 0>,
394                                  InstrStage<1, [SW_DIS2], 0>,
395                                  InstrStage<1, [SW_LS], 0>,
396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397                                  InstrStage<1, [SW_LS]>],
398                                 [1, 1, 1]>,
399   //
400   // Scaled register offset
401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
402                                   InstrStage<1, [SW_DIS1], 0>,
403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404                                   InstrStage<1, [SW_LS]>],
405                                  [1, 1, 1]>,
406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407                                   InstrStage<1, [SW_DIS1], 0>,
408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409                                   InstrStage<1, [SW_LS]>],
410                                  [1, 1, 1]>,
411   //
412   // Immediate offset with update
413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
414                                   InstrStage<1, [SW_DIS1], 0>,
415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416                                   InstrStage<1, [SW_LS]>],
417                                  [1, 1, 1]>,
418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419                                   InstrStage<1, [SW_DIS1], 0>,
420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421                                   InstrStage<1, [SW_LS]>],
422                                  [1, 1, 1]>,
423   //
424   // Register offset with update
425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
426                                   InstrStage<1, [SW_DIS1], 0>,
427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428                                   InstrStage<1, [SW_LS]>],
429                                  [1, 1, 1, 1]>,
430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431                                   InstrStage<1, [SW_DIS1], 0>,
432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433                                   InstrStage<1, [SW_LS]>],
434                                  [1, 1, 1, 1]>,
435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436                                   InstrStage<1, [SW_DIS1], 0>,
437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438                                   InstrStage<1, [SW_LS]>],
439                                  [1, 1, 1, 1]>,
440   //
441   // Scaled register offset with update
442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
443                                     InstrStage<1, [SW_DIS1], 0>,
444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445                                     InstrStage<1, [SW_LS], 0>,
446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447                                    [3, 1, 1, 1]>,
448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449                                     InstrStage<1, [SW_DIS1], 0>,
450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451                                     InstrStage<1, [SW_LS], 0>,
452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453                                    [3, 1, 1, 1]>,
454   //
455   // Store multiple
456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457                                 InstrStage<1, [SW_DIS1], 0>,
458                                 InstrStage<1, [SW_DIS2], 0>,
459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460                                 InstrStage<1, [SW_LS], 1>,
461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462                                 InstrStage<1, [SW_LS], 1>,
463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464                                 InstrStage<1, [SW_LS]>],
465                                 [], [], -1>, // dynamic uops
466   //
467   // Store multiple + update
468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469                                 InstrStage<1, [SW_DIS1], 0>,
470                                 InstrStage<1, [SW_DIS2], 0>,
471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472                                 InstrStage<1, [SW_LS], 1>,
473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474                                 InstrStage<1, [SW_LS], 1>,
475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476                                 InstrStage<1, [SW_LS]>],
477                                [2], [], -1>, // dynamic uops
478
479   //
480   // Preload
481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483   // Branch
484   //
485   // no delay slots, so the latency of a branch is unimportant
486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
487
488   // FP Special Register to Integer Register File Move
489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
491                              [1]>,
492   //
493   // Single-precision FP Unary
494   //
495   // Most floating-point moves get issued on ALU0.
496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497                                InstrStage<1, [SW_ALU0]>],
498                               [2, 1]>,
499   //
500   // Double-precision FP Unary
501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502                                InstrStage<1, [SW_ALU0]>],
503                               [2, 1]>,
504
505   //
506   // Single-precision FP Compare
507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508                                InstrStage<1, [SW_ALU0]>],
509                               [1, 1]>,
510   //
511   // Double-precision FP Compare
512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513                                InstrStage<1, [SW_ALU0]>],
514                               [1, 1]>,
515   //
516   // Single to Double FP Convert
517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518                                InstrStage<1, [SW_ALU1]>],
519                               [4, 1]>,
520   //
521   // Double to Single FP Convert
522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523                                InstrStage<1, [SW_ALU1]>],
524                               [4, 1]>,
525
526   //
527   // Single to Half FP Convert
528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529                                InstrStage<1, [SW_DIS1], 0>,
530                                InstrStage<1, [SW_ALU1], 4>,
531                                InstrStage<1, [SW_ALU1]>],
532                               [6, 1]>,
533   //
534   // Half to Single FP Convert
535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536                                InstrStage<1, [SW_ALU1]>],
537                               [4, 1]>,
538
539   //
540   // Single-Precision FP to Integer Convert
541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542                                InstrStage<1, [SW_ALU1]>],
543                               [4, 1]>,
544   //
545   // Double-Precision FP to Integer Convert
546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547                                InstrStage<1, [SW_ALU1]>],
548                               [4, 1]>,
549   //
550   // Integer to Single-Precision FP Convert
551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552                                InstrStage<1, [SW_ALU1]>],
553                               [4, 1]>,
554   //
555   // Integer to Double-Precision FP Convert
556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557                                InstrStage<1, [SW_ALU1]>],
558                               [4, 1]>,
559   //
560   // Single-precision FP ALU
561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562                                InstrStage<1, [SW_ALU0]>],
563                               [2, 1, 1]>,
564   //
565   // Double-precision FP ALU
566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567                                InstrStage<1, [SW_ALU0]>],
568                               [2, 1, 1]>,
569   //
570   // Single-precision FP Multiply
571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572                                InstrStage<1, [SW_ALU1]>],
573                               [4, 1, 1]>,
574   //
575   // Double-precision FP Multiply
576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577                                InstrStage<1, [SW_ALU1]>],
578                               [6, 1, 1]>,
579   //
580   // Single-precision FP MAC
581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582                                InstrStage<1, [SW_ALU1]>],
583                               [8, 1, 1]>,
584   //
585   // Double-precision FP MAC
586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587                                InstrStage<1, [SW_ALU1]>],
588                               [12, 1, 1]>,
589   //
590   // Single-precision Fused FP MAC
591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592                                InstrStage<1, [SW_ALU1]>],
593                               [8, 1, 1]>,
594   //
595   // Double-precision Fused FP MAC
596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597                                InstrStage<1, [SW_ALU1]>],
598                               [12, 1, 1]>,
599   //
600   // Single-precision FP DIV
601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602                                InstrStage<1, [SW_ALU1], 0>,
603                                InstrStage<15, [SW_FDIV]>],
604                               [17, 1, 1]>,
605   //
606   // Double-precision FP DIV
607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608                                InstrStage<1, [SW_ALU1], 0>,
609                                InstrStage<30, [SW_FDIV]>],
610                               [32, 1, 1]>,
611   //
612   // Single-precision FP SQRT
613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614                                InstrStage<1, [SW_ALU1], 0>,
615                                InstrStage<15, [SW_FDIV]>],
616                               [17, 1]>,
617   //
618   // Double-precision FP SQRT
619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620                                InstrStage<1, [SW_ALU1], 0>,
621                                InstrStage<30, [SW_FDIV]>],
622                               [32, 1, 1]>,
623
624   //
625   // Integer to Single-precision Move
626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
627                                InstrStage<1, [SW_DIS1], 0>,
628                                InstrStage<1, [SW_LS], 4>,
629                                InstrStage<1, [SW_ALU0]>],
630                               [6, 1]>,
631   //
632   // Integer to Double-precision Move
633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634                                InstrStage<1, [SW_LS]>],
635                               [4, 1]>,
636   //
637   // Single-precision to Integer Move
638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639                                InstrStage<1, [SW_LS]>],
640                               [3, 1]>,
641   //
642   // Double-precision to Integer Move
643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
644                                InstrStage<1, [SW_DIS1], 0>,
645                                InstrStage<1, [SW_LS], 3>,
646                                InstrStage<1, [SW_LS]>],
647                               [3, 4, 1]>,
648   //
649   // Single-precision FP Load
650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651                                InstrStage<1, [SW_LS]>],
652                               [4, 1]>,
653   //
654   // Double-precision FP Load
655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656                                InstrStage<1, [SW_LS]>],
657                               [4, 1]>,
658   //
659   // FP Load Multiple
660   // FIXME: Assumes a single Q register.
661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662                                InstrStage<1, [SW_LS]>],
663                               [1, 1, 1, 4], [], -1>, // dynamic uops
664   //
665   // FP Load Multiple + update
666   // FIXME: Assumes a single Q register.
667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668                                InstrStage<1, [SW_DIS1], 0>,
669                                InstrStage<1, [SW_LS], 4>,
670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672   //
673   // Single-precision FP Store
674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675                                InstrStage<1, [SW_LS]>],
676                               [1, 1]>,
677   //
678   // Double-precision FP Store
679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680                                InstrStage<1, [SW_LS]>],
681                               [1, 1]>,
682   //
683   // FP Store Multiple
684   // FIXME: Assumes a single Q register.
685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686                                InstrStage<1, [SW_LS]>],
687                               [1, 1, 1], [], -1>, // dynamic uops
688   //
689   // FP Store Multiple + update
690   // FIXME: Assumes a single Q register.
691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692                                 InstrStage<1, [SW_DIS1], 0>,
693                                 InstrStage<1, [SW_LS], 4>,
694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
695                                [2, 1, 1, 1], [], -1>, // dynamic uops
696   // NEON
697   //
698   // Double-register Integer Unary
699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700                                InstrStage<1, [SW_ALU0]>],
701                               [4, 1]>,
702   //
703   // Quad-register Integer Unary
704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705                                InstrStage<1, [SW_ALU0]>],
706                               [4, 1]>,
707   //
708   // Double-register Integer Q-Unary
709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710                                InstrStage<1, [SW_ALU0]>],
711                               [4, 1]>,
712   //
713   // Quad-register Integer CountQ-Unary
714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715                                InstrStage<1, [SW_ALU0]>],
716                               [4, 1]>,
717   //
718   // Double-register Integer Binary
719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720                                InstrStage<1, [SW_ALU0]>],
721                               [2, 1, 1]>,
722   //
723   // Quad-register Integer Binary
724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725                                InstrStage<1, [SW_ALU0]>],
726                               [2, 1, 1]>,
727   //
728   // Double-register Integer Subtract
729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730                                InstrStage<1, [SW_ALU0]>],
731                               [2, 1, 1]>,
732   //
733   // Quad-register Integer Subtract
734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735                                InstrStage<1, [SW_ALU0]>],
736                               [2, 1, 1]>,
737   //
738   // Double-register Integer Shift
739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740                                InstrStage<1, [SW_ALU0]>],
741                               [2, 1, 1]>,
742   //
743   // Quad-register Integer Shift
744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745                                InstrStage<1, [SW_ALU0]>],
746                               [2, 1, 1]>,
747   //
748   // Double-register Integer Shift (4 cycle)
749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750                                InstrStage<1, [SW_ALU0]>],
751                               [4, 1, 1]>,
752   //
753   // Quad-register Integer Shift (4 cycle)
754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755                                InstrStage<1, [SW_ALU0]>],
756                               [4, 1, 1]>,
757   //
758   // Double-register Integer Binary (4 cycle)
759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760                                InstrStage<1, [SW_ALU0]>],
761                               [4, 1, 1]>,
762   //
763   // Quad-register Integer Binary (4 cycle)
764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765                                InstrStage<1, [SW_ALU0]>],
766                               [4, 1, 1]>,
767   //
768   // Double-register Integer Subtract (4 cycle)
769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770                                InstrStage<1, [SW_ALU0]>],
771                               [4, 1, 1]>,
772   //
773   // Quad-register Integer Subtract (4 cycle)
774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775                                InstrStage<1, [SW_ALU0]>],
776                               [4, 1, 1]>,
777
778   //
779   // Double-register Integer Count
780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781                                InstrStage<1, [SW_ALU0]>],
782                               [2, 1, 1]>,
783   //
784   // Quad-register Integer Count
785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786                                InstrStage<1, [SW_ALU0]>],
787                               [2, 1, 1]>,
788   //
789   // Double-register Absolute Difference and Accumulate
790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791                                InstrStage<1, [SW_ALU0]>],
792                               [4, 1, 1, 1]>,
793   //
794   // Quad-register Absolute Difference and Accumulate
795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796                                InstrStage<1, [SW_ALU0]>],
797                               [4, 1, 1, 1]>,
798   //
799   // Double-register Integer Pair Add Long
800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801                                InstrStage<1, [SW_ALU0]>],
802                               [4, 1, 1]>,
803   //
804   // Quad-register Integer Pair Add Long
805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806                                InstrStage<1, [SW_ALU0]>],
807                               [4, 1, 1]>,
808
809   //
810   // Double-register Integer Multiply (.8, .16)
811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812                                InstrStage<1, [SW_ALU1]>],
813                               [4, 1, 1]>,
814   //
815   // Quad-register Integer Multiply (.8, .16)
816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817                                InstrStage<1, [SW_ALU1]>],
818                               [4, 1, 1]>,
819
820   //
821   // Double-register Integer Multiply (.32)
822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823                                InstrStage<1, [SW_ALU1]>],
824                               [4, 1, 1]>,
825   //
826   // Quad-register Integer Multiply (.32)
827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828                                InstrStage<1, [SW_ALU1]>],
829                               [4, 1, 1]>,
830   //
831   // Double-register Integer Multiply-Accumulate (.8, .16)
832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833                                InstrStage<1, [SW_ALU1]>],
834                               [4, 1, 1, 1]>,
835   //
836   // Double-register Integer Multiply-Accumulate (.32)
837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838                                InstrStage<1, [SW_ALU1]>],
839                               [4, 1, 1, 1]>,
840   //
841   // Quad-register Integer Multiply-Accumulate (.8, .16)
842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843                                InstrStage<1, [SW_ALU1]>],
844                               [4, 1, 1, 1]>,
845   //
846   // Quad-register Integer Multiply-Accumulate (.32)
847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848                                InstrStage<1, [SW_ALU1]>],
849                               [4, 1, 1, 1]>,
850
851   //
852   // Move
853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854                                InstrStage<1, [SW_ALU0]>],
855                               [2, 1]>,
856   //
857   // Move Immediate
858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859                                InstrStage<1, [SW_ALU0]>],
860                               [2]>,
861   //
862   // Double-register Permute Move
863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864                                InstrStage<1, [SW_ALU1]>],
865                               [2, 1]>,
866   //
867   // Quad-register Permute Move
868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869                                InstrStage<1, [SW_ALU1]>],
870                               [2, 1]>,
871   //
872   // Integer to Single-precision Move
873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
874                                InstrStage<1, [SW_DIS1], 0>,
875                                InstrStage<1, [SW_LS], 4>,
876                                InstrStage<1, [SW_ALU0]>],
877                               [6, 1]>,
878   //
879   // Integer to Double-precision Move
880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881                                InstrStage<1, [SW_LS]>],
882                               [4, 1, 1]>,
883   //
884   // Single-precision to Integer Move
885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886                                InstrStage<1, [SW_LS]>],
887                               [3, 1]>,
888   //
889   // Double-precision to Integer Move
890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
891                                InstrStage<1, [SW_DIS1], 0>,
892                                InstrStage<1, [SW_LS], 3>,
893                                InstrStage<1, [SW_LS]>],
894                               [3, 4, 1]>,
895   //
896   // Integer to Lane Move
897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899                                InstrStage<1, [SW_DIS1], 0>,
900                                InstrStage<1, [SW_LS], 4>,
901                                InstrStage<1, [SW_ALU0]>],
902                               [6, 1]>,
903
904   //
905   // Vector narrow move
906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907                                InstrStage<1, [SW_ALU1]>],
908                               [2, 1]>,
909   //
910   // Double-register FP Unary
911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912   //        and they issue on a different pipeline.
913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914                                InstrStage<1, [SW_ALU0]>],
915                               [2, 1]>,
916   //
917   // Quad-register FP Unary
918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919   //        and they issue on a different pipeline.
920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921                                InstrStage<1, [SW_ALU0]>],
922                               [2, 1]>,
923   //
924   // Double-register FP Binary
925   // FIXME: We're using this itin for many instructions.
926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927                                InstrStage<1, [SW_ALU0]>],
928                               [4, 1, 1]>,
929
930   //
931   // VPADD, etc.
932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933                                InstrStage<1, [SW_ALU0]>],
934                               [4, 1, 1]>,
935   //
936   // Double-register FP VMUL
937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938                                InstrStage<1, [SW_ALU1]>],
939                               [4, 1, 1]>,
940   //
941   // Quad-register FP Binary
942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943                                InstrStage<1, [SW_ALU0]>],
944                               [4, 1, 1]>,
945   //
946   // Quad-register FP VMUL
947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948                                InstrStage<1, [SW_ALU1]>],
949                               [4, 1, 1]>,
950   //
951   // Double-register FP Multiple-Accumulate
952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953                                InstrStage<1, [SW_ALU1]>],
954                               [8, 1, 1]>,
955   //
956   // Quad-register FP Multiple-Accumulate
957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958                                InstrStage<1, [SW_ALU1]>],
959                               [8, 1, 1]>,
960   //
961   // Double-register Fused FP Multiple-Accumulate
962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963                                InstrStage<1, [SW_ALU1]>],
964                               [8, 1, 1]>,
965   //
966   // Quad-register FusedF P Multiple-Accumulate
967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968                                InstrStage<1, [SW_ALU1]>],
969                               [8, 1, 1]>,
970   //
971   // Double-register Reciprical Step
972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973                                InstrStage<1, [SW_ALU1]>],
974                               [8, 1, 1]>,
975   //
976   // Quad-register Reciprical Step
977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978                                InstrStage<1, [SW_ALU1]>],
979                               [8, 1, 1]>,
980   //
981   // Double-register Permute
982   // FIXME: The latencies are unclear from the documentation.
983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
984                                InstrStage<1, [SW_DIS1], 0>,
985                                InstrStage<1, [SW_DIS2], 0>,
986                                InstrStage<1, [SW_ALU1], 2>,
987                                InstrStage<1, [SW_ALU1], 2>,
988                                InstrStage<1, [SW_ALU1]>],
989                               [3, 4, 3, 4]>,
990   //
991   // Quad-register Permute
992   // FIXME: The latencies are unclear from the documentation.
993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
994                                InstrStage<1, [SW_DIS1], 0>,
995                                InstrStage<1, [SW_DIS2], 0>,
996                                InstrStage<1, [SW_ALU1], 2>,
997                                InstrStage<1, [SW_ALU1], 2>,
998                                InstrStage<1, [SW_ALU1]>],
999                               [3, 4, 3, 4]>,
1000   //
1001   // Quad-register Permute (3 cycle issue on A9)
1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
1003                                InstrStage<1, [SW_DIS1], 0>,
1004                                InstrStage<1, [SW_DIS2], 0>,
1005                                InstrStage<1, [SW_ALU1], 2>,
1006                                InstrStage<1, [SW_ALU1], 2>,
1007                                InstrStage<1, [SW_ALU1]>],
1008                               [3, 4, 3, 4]>,
1009
1010   //
1011   // Double-register VEXT
1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013                                InstrStage<1, [SW_ALU1]>],
1014                               [2, 1, 1]>,
1015   //
1016   // Quad-register VEXT
1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018                                InstrStage<1, [SW_ALU1]>],
1019                               [2, 1, 1]>,
1020   //
1021   // VTB
1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023                                InstrStage<1, [SW_ALU1]>],
1024                               [2, 1, 1]>,
1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
1026                                InstrStage<1, [SW_DIS1], 0>,
1027                                InstrStage<1, [SW_ALU1], 2>,
1028                                InstrStage<1, [SW_ALU1]>],
1029                               [4, 1, 3, 3]>,
1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
1031                                InstrStage<1, [SW_DIS1], 0>,
1032                                InstrStage<1, [SW_DIS2], 0>,
1033                                InstrStage<1, [SW_ALU1], 2>,
1034                                InstrStage<1, [SW_ALU1], 2>,
1035                                InstrStage<1, [SW_ALU1]>],
1036                               [6, 1, 3, 5, 5]>,
1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
1038                                InstrStage<1, [SW_DIS1], 0>,
1039                                InstrStage<1, [SW_DIS2], 0>,
1040                                InstrStage<1, [SW_ALU1], 2>,
1041                                InstrStage<1, [SW_ALU1], 2>,
1042                                InstrStage<1, [SW_ALU1], 2>,
1043                                InstrStage<1, [SW_ALU1]>],
1044                               [8, 1, 3, 5, 7, 7]>,
1045   //
1046   // VTBX
1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048                                InstrStage<1, [SW_ALU1]>],
1049                               [2, 1, 1]>,
1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
1051                                InstrStage<1, [SW_DIS1], 0>,
1052                                InstrStage<1, [SW_ALU1], 2>,
1053                                InstrStage<1, [SW_ALU1]>],
1054                               [4, 1, 3, 3]>,
1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
1056                                InstrStage<1, [SW_DIS1], 0>,
1057                                InstrStage<1, [SW_DIS2], 0>,
1058                                InstrStage<1, [SW_ALU1], 2>,
1059                                InstrStage<1, [SW_ALU1], 2>,
1060                                InstrStage<1, [SW_ALU1]>],
1061                               [6, 1, 3, 5, 5]>,
1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
1063                                InstrStage<1, [SW_DIS1], 0>,
1064                                InstrStage<1, [SW_DIS2], 0>,
1065                                InstrStage<1, [SW_ALU1], 2>,
1066                                InstrStage<1, [SW_ALU1], 2>,
1067                                InstrStage<1, [SW_ALU1], 2>,
1068                                InstrStage<1, [SW_ALU1]>],
1069                               [8, 1, 3, 5, 7, 7]>
1070 ]>;
1071
1072 // ===---------------------------------------------------------------------===//
1073 // This following definitions describe the simple machine model which
1074 // will replace itineraries.
1075
1076 // Swift machine model for scheduling and other instruction cost heuristics.
1077 def SwiftModel : SchedMachineModel {
1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
1079   let MicroOpBufferSize = 45; // Based on NEON renamed registers.
1080   let LoadLatency = 3;
1081   let MispredictPenalty = 14; // A branch direction mispredict.
1082
1083   let Itineraries = SwiftItineraries;
1084 }
1085
1086 // Swift predicates.
1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
1089 // Swift resource mapping.
1090 let SchedModel = SwiftModel in {
1091   // Processor resources.
1092   def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093   def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094   def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095   def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096   def SwiftUnitDiv : ProcResource<1>;
1097
1098   // Generic resource requirements.
1099   def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
1100   def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
1101   def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
1102   def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
1103   def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1104     let Latency = 4;
1105   }
1106   def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1107     let Latency = 6;
1108   }
1109   def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
1110   def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
1111   def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
1112   def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
1113   def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
1114   def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
1115   def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
1116   def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
1117   def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
1118   def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
1119                                                       SwiftUnitP01]> {
1120     let Latency = 3;
1121     let NumMicroOps = 2;
1122   }
1123   def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1124     let Latency = 3;
1125     let NumMicroOps = 3;
1126     let ResourceCycles = [3];
1127   }
1128   // Plain load without writeback.
1129   def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
1130     let Latency = 3;
1131   }
1132   def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
1133     let Latency = 4;
1134   }
1135   // A store does not write to a register.
1136   def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
1137     let Latency = 0;
1138   }
1139   foreach Num = 1-4 in {
1140     def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
1141   }
1142   def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
1143                                                     SwiftWriteP01OneCycle,
1144                                                     SwiftWriteP2ThreeCycle]>;
1145   // 4.2.4 Arithmetic and Logical.
1146   // ALU operation register shifted by immediate variant.
1147   def SwiftWriteALUsi : SchedWriteVariant<[
1148     // lsl #2, lsl #1, or lsr #1.
1149     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
1150     SchedVar<NoSchedPred,             [WriteALU]>
1151   ]>;
1152   def SwiftWriteALUsr : SchedWriteVariant<[
1153     SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
1154     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1155   ]>;
1156   def SwiftWriteALUSsr : SchedWriteVariant<[
1157     SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
1158     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1159   ]>;
1160   def SwiftReadAdvanceALUsr : SchedReadVariant<[
1161     SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
1162     SchedVar<NoSchedPred,      [NoReadAdvance]>
1163   ]>;
1164   // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
1165   // AND,BIC,EOR,ORN,ORR
1166   // CLZ,RBIT,REV,REV16,REVSH,PKH
1167   def : WriteRes<WriteALU, [SwiftUnitP01]>;
1168   def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1169   def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1170   def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
1171   def : ReadAdvance<ReadALU, 0>;
1172   def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
1173
1174
1175   def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
1176     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
1177     SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
1178   ]>;
1179
1180   // 4.2.5 Integer comparison
1181   def : WriteRes<WriteCMP, [SwiftUnitP01]>;
1182   def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
1183   def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
1184
1185   // 4.2.6 Shift, Move
1186   // Shift
1187   //  ASR,LSL,ROR,RRX
1188   //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
1189   // Move
1190   //  MOV,MVN
1191   //  MOVT
1192   // Sign/Zero extension
1193   def : InstRW<[SwiftWriteP01OneCycle],
1194                (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
1195                           "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
1196                           "t2UXTB16")>;
1197   // Pseudo instructions.
1198   def : InstRW<[SwiftWriteP01OneCycle2x],
1199         (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
1200                    "t2MOVi32imm", "t2MOV_ga_dyn")>;
1201   def : InstRW<[SwiftWriteP01OneCycle3x],
1202         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
1203   def : InstRW<[SwiftWriteP01OneCycle2x_load],
1204         (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
1205
1206   def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
1207
1208   def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
1209     SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
1210     SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
1211   ]>;
1212
1213   // 4.2.7 Select
1214   // SEL
1215   def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
1216
1217   // 4.2.8 Bitfield
1218   // BFI,BFC, SBFX,UBFX
1219   def : InstRW< [SwiftWriteP01TwoCycle],
1220         (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
1221         "(t|t2)UBFX", "(t|t2)SBFX")>;
1222
1223   // 4.2.9 Saturating arithmetic
1224   def : InstRW< [SwiftWriteP01TwoCycle],
1225         (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
1226         "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
1227         "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
1228         "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
1229         "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
1230         "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
1231
1232   // 4.2.10 Parallel Arithmetic
1233   // Not flag setting.
1234   def : InstRW< [SwiftWriteALUsr],
1235         (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
1236         "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
1237         "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
1238         "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
1239   // Flag setting.
1240   def : InstRW< [SwiftWriteP01TwoCycle],
1241        (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
1242        "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
1243        "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
1244        "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
1245        "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
1246        "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
1247
1248   // 4.2.11 Sum of Absolute Difference
1249   def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
1250   def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
1251         (instregex "USADA8")>;
1252
1253   // 4.2.12 Integer Multiply (32-bit result)
1254   // Two sources.
1255   def : InstRW< [SwiftWriteP0FourCycle],
1256         (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
1257         "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
1258         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
1259         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
1260
1261   def SwiftWriteP0P01FiveCycleTwoUops :
1262       SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
1263     let Latency = 5;
1264   }
1265
1266   def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
1267     SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
1268     SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
1269   ]>;
1270
1271   def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
1272      SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
1273      SchedVar<NoSchedPred,      [ReadALU]>
1274   ]>;
1275
1276   // Multiply accumulate, three sources
1277   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1278                  SwiftReadAdvanceFourCyclesPred],
1279         (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
1280         "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
1281         "t2SMMLSR")>;
1282
1283   // 4.2.13 Integer Multiply (32-bit result, Q flag)
1284   def : InstRW< [SwiftWriteP0FourCycle],
1285         (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
1286   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1287                  SwiftReadAdvanceFourCyclesPred],
1288         (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
1289         "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
1290         "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
1291   def : InstRW< [SwiftPredP0P01FourFiveCycle],
1292         (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
1293
1294   def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1295     let Latency = 5;
1296     let NumMicroOps = 3;
1297     let ResourceCycles = [2, 1];
1298   }
1299   def SwiftWrite1Cycle : SchedWriteRes<[]> {
1300     let Latency = 1;
1301     let NumMicroOps = 0;
1302   }
1303   def SwiftWrite5Cycle : SchedWriteRes<[]> {
1304     let Latency = 5;
1305     let NumMicroOps = 0;
1306   }
1307   def SwiftWrite6Cycle : SchedWriteRes<[]> {
1308     let Latency = 6;
1309     let NumMicroOps = 0;
1310   }
1311
1312   // 4.2.14 Integer Multiply, Long
1313   def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
1314         (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
1315
1316   def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1317     let Latency = 7;
1318     let NumMicroOps = 5;
1319     let ResourceCycles = [2, 3];
1320   }
1321
1322   // 4.2.15 Integer Multiply Accumulate, Long
1323   // 4.2.16 Integer Multiply Accumulate, Dual
1324   // 4.2.17 Integer Multiply Accumulate Accumulate, Long
1325   // We are being a bit inaccurate here.
1326   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
1327                  SchedReadAdvance<4>, SchedReadAdvance<3>],
1328         (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
1329         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
1330         "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
1331         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
1332         "t2UMAAL")>;
1333
1334   def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
1335     let NumMicroOps = 1;
1336     let Latency = 14;
1337     let ResourceCycles = [1, 14];
1338   }
1339   // 4.2.18 Integer Divide
1340   def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
1341   def : InstRW <[SwiftDiv],
1342         (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
1343
1344   // 4.2.19 Integer Load Single Element
1345   // 4.2.20 Integer Load Signextended
1346   def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1347     let Latency = 3;
1348     let NumMicroOps = 2;
1349   }
1350   def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1351     let Latency = 4;
1352     let NumMicroOps = 2;
1353   }
1354   def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
1355                                                    SwiftUnitP01]> {
1356     let Latency = 4;
1357     let NumMicroOps = 3;
1358   }
1359   def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
1360     let Latency = 3;
1361     let NumMicroOps = 2;
1362   }
1363   def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
1364                                                    SwiftUnitP01]> {
1365     let Latency = 3;
1366     let NumMicroOps = 3;
1367   }
1368   def SwiftWrBackOne : SchedWriteRes<[]> {
1369     let Latency = 1;
1370     let NumMicroOps = 0;
1371   }
1372   def SwiftWriteLdFour : SchedWriteRes<[]> {
1373     let Latency = 4;
1374     let NumMicroOps = 0;
1375   }
1376    // Not accurate.
1377   def : InstRW<[SwiftWriteP2ThreeCycle],
1378         (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
1379         "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
1380         "tLDR(r|i|spi|pci|pciASM)")>;
1381   def : InstRW<[SwiftWriteP2ThreeCycle],
1382         (instregex "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
1383   def : InstRW<[SwiftWriteP2P01FourCyle],
1384         (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
1385         "t2LDRpci_pic", "tLDRS(B|H)")>;
1386   def : InstRW<[SwiftWriteP2P01ThreeCycle,  SwiftWrBackOne],
1387         (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
1388         "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
1389         "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
1390   def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
1391         (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
1392         "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
1393
1394   // 4.2.21 Integer Dual Load
1395   // Not accurate.
1396   def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
1397         (instregex "t2LDRDi8", "LDRD$")>;
1398   def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
1399         (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
1400
1401   // 4.2.22 Integer Load, Multiple
1402   // NumReg = 1 .. 16
1403   foreach Lat = 3-25 in {
1404     def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
1405       let Latency = Lat;
1406     }
1407     def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
1408       let Latency = Lat;
1409       let NumMicroOps = 0;
1410     }
1411   }
1412   // Predicate.
1413   foreach NumAddr = 1-16 in {
1414     def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
1415   }
1416   def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
1417   def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
1418   def SwiftWriteLM : SchedWriteVariant<[
1419     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
1420     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1421                                 SwiftWriteLM5Cy]>,
1422     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1423                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
1424     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1425                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1426                                 SwiftWriteLM7Cy]>,
1427     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1428                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1429                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
1430     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1431                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1432                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1433                                 SwiftWriteLM9Cy]>,
1434     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1435                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1436                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1437                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
1438     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1439                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1440                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1441                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1442                                 SwiftWriteLM11Cy]>,
1443     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1444                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1445                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1446                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1447                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
1448     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1449                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1450                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1451                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1452                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1453                                 SwiftWriteLM13Cy]>,
1454     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1455                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1456                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1457                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1458                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1459                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
1460     SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1461                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1462                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1463                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1464                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1465                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1466                                 SwiftWriteLM15Cy]>,
1467     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1468                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1469                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1470                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1471                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1472                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1473                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
1474     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1475                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1476                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1477                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1478                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1479                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1480                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1481                                 SwiftWriteLM17Cy]>,
1482     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1483                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1484                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1485                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1486                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1487                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1488                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1489                                 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
1490     // Unknow number of registers, just use resources for two registers.
1491     SchedVar<NoSchedPred,      [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1492                                 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
1493                                 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
1494                                 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
1495                                 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
1496                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1497                                 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
1498                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
1499
1500   ]> { let Variadic=1; }
1501
1502   def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
1503         (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
1504         "(t|sys)LDM(IA|DA|DB|IB)$")>;
1505   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
1506         (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
1507         "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
1508   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
1509         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
1510   // 4.2.23 Integer Store, Single Element
1511   def : InstRW<[SwiftWriteP2],
1512         (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
1513         "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
1514
1515   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
1516         (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
1517         "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
1518         "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
1519         "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
1520
1521   // 4.2.24 Integer Store, Dual
1522   def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
1523         (instregex "STRD$", "t2STRDi8")>;
1524   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
1525                 SwiftWriteP01OneCycle],
1526         (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
1527
1528   // 4.2.25 Integer Store, Multiple
1529   def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1530     let Latency = 0;
1531     let NumMicroOps = 2;
1532   }
1533   foreach NumAddr = 1-16 in {
1534      def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
1535   }
1536   def SwiftWriteSTM : SchedWriteVariant<[
1537     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
1538     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
1539     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
1540     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
1541     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
1542     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
1543     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
1544     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
1545     SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
1546     SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
1547     SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
1548     SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
1549     SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
1550     SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
1551     SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
1552     // Unknow number of registers, just use resources for two registers.
1553     SchedVar<NoSchedPred,      [SwiftWriteSTM2]>
1554   ]>;
1555   def : InstRW<[SwiftWriteSTM],
1556         (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
1557   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
1558         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
1559         "PUSH", "tPUSH")>;
1560
1561   // 4.2.26 Branch
1562   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
1563   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
1564   def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
1565
1566   // 4.2.27 Not issued
1567   def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1568   def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
1569
1570   // 4.2.28 Advanced SIMD, Integer, 2 cycle
1571   def : InstRW<[SwiftWriteP0TwoCycle],
1572         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
1573                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
1574                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
1575                    "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
1576                    "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
1577
1578   def : InstRW<[SwiftWriteP1TwoCycle],
1579         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
1580
1581   // 4.2.29 Advanced SIMD, Integer, 4 cycle
1582   // 4.2.30 Advanced SIMD, Integer with Accumulate
1583   def : InstRW<[SwiftWriteP0FourCycle],
1584         (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
1585         "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
1586         "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
1587         "VQSUB")>;
1588   def : InstRW<[SwiftWriteP1FourCycle],
1589         (instregex "VRECPE", "VRSQRTE")>;
1590
1591   // 4.2.31 Advanced SIMD, Add and Shift with Narrow
1592   def : InstRW<[SwiftWriteP0P1FourCycle],
1593         (instregex "VADDHN", "VSUBHN", "VSHRN")>;
1594   def : InstRW<[SwiftWriteP0P1SixCycle],
1595         (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
1596                    "VQRSHRN", "VQRSHRUN")>;
1597
1598   // 4.2.32 Advanced SIMD, Vector Table Lookup
1599   foreach Num = 1-4 in {
1600     def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
1601   }
1602   def : InstRW<[SwiftWrite1xP1TwoCycle],
1603         (instregex "VTB(L|X)1")>;
1604   def : InstRW<[SwiftWrite2xP1TwoCycle],
1605         (instregex "VTB(L|X)2")>;
1606   def : InstRW<[SwiftWrite3xP1TwoCycle],
1607         (instregex "VTB(L|X)3")>;
1608   def : InstRW<[SwiftWrite4xP1TwoCycle],
1609         (instregex "VTB(L|X)4")>;
1610
1611   // 4.2.33 Advanced SIMD, Transpose
1612   def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
1613                 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
1614         (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
1615
1616   // 4.2.34 Advanced SIMD and VFP, Floating Point
1617   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
1618   def : InstRW<[SwiftWriteP0FourCycle],
1619         (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
1620   def : InstRW<[SwiftWriteP0FourCycle],
1621         (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
1622                    "VPMIN")>;
1623   def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
1624   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
1625
1626   // 4.2.35 Advanced SIMD and VFP, Multiply
1627   def : InstRW<[SwiftWriteP1FourCycle],
1628         (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
1629                    "VMULL", "VQDMULL")>;
1630   def : InstRW<[SwiftWriteP1SixCycle],
1631         (instregex "VMULD", "VNMULD")>;
1632   def : InstRW<[SwiftWriteP1FourCycle],
1633         (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
1634         "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
1635   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
1636   def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
1637
1638   // 4.2.36 Advanced SIMD and VFP, Convert
1639   def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
1640   // Fixpoint conversions.
1641   def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
1642
1643   // 4.2.37 Advanced SIMD and VFP, Move
1644   def : InstRW<[SwiftWriteP0TwoCycle],
1645         (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
1646                    "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
1647                    "FCONST(D|S)")>;
1648   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
1649   def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
1650         (instregex "VQMOVN")>;
1651   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
1652   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
1653         (instregex "VDUP(8|16|32)")>;
1654   def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
1655   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
1656         (instregex "VMOVSR$", "VSETLN")>;
1657   def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
1658         (instregex "VMOVRR(D|S)$")>;
1659   def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
1660   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
1661                 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
1662                                SwiftWriteP1TwoCycle]>],
1663                 (instregex "VMOVSRR$")>;
1664   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
1665         (instregex "VGETLN(u|i)")>;
1666   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
1667                                SwiftWriteP01OneCycle]>],
1668         (instregex "VGETLNs")>;
1669
1670   // 4.2.38 Advanced SIMD and VFP, Move FPSCR
1671   // Serializing instructions.
1672   def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
1673     let Latency = 15;
1674     let ResourceCycles = [15];
1675   }
1676   def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
1677     let Latency = 15;
1678     let ResourceCycles = [15];
1679   }
1680   def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
1681     let Latency = 15;
1682     let ResourceCycles = [15];
1683   }
1684   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1685         (instregex "VMRS")>;
1686   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1687         (instregex "VMSR")>;
1688   // Not serializing.
1689   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
1690
1691   // 4.2.39 Advanced SIMD and VFP, Load Single Element
1692   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
1693
1694   // 4.2.40 Advanced SIMD and VFP, Store Single Element
1695   def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
1696
1697   // 4.2.41 Advanced SIMD and VFP, Load Multiple
1698   // 4.2.42 Advanced SIMD and VFP, Store Multiple
1699
1700   // Resource requirement for permuting, just reserves the resources.
1701   foreach Num = 1-28 in {
1702     def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
1703       let Latency = 0;
1704       let NumMicroOps = Num;
1705       let ResourceCycles = [Num];
1706     }
1707   }
1708
1709   // Pre RA pseudos - load/store to a Q register as a D register pair.
1710   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
1711
1712   // Post RA not modelled accurately. We assume that register use of width 64
1713   // bit maps to a D register, 128 maps to a Q register. Not all different kinds
1714   // are accurately represented.
1715   def SwiftWriteVLDM : SchedWriteVariant<[
1716     // Load of one S register.
1717     SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
1718     // Load of one D register.
1719     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
1720     // Load of 3 S register.
1721     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1722                                 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
1723                                 SwiftVLDMPerm3]>,
1724     // Load of a Q register (not necessarily true). We should not be mapping to
1725     // 4 S registers, either.
1726     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
1727                                 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
1728     // Load of 5 S registers.
1729     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1730                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1731                                 SwiftWriteLM17CyNo,  SwiftWriteP01OneCycle,
1732                                 SwiftVLDMPerm5]>,
1733     // Load of 3 D registers. (Must also be able to handle s register list -
1734     // though, not accurate)
1735     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1736                                 SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
1737                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1738                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1739     // Load of 7 S registers.
1740     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1741                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1742                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1743                                 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
1744                                 SwiftVLDMPerm7]>,
1745     // Load of two Q registers.
1746     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1747                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1748                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1749                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1750                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>,
1751     // Load of 9 S registers.
1752     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1753                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1754                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1755                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1756                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1757                                 SwiftVLDMPerm9]>,
1758     // Load of 5 D registers.
1759     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1760                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1761                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1762                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1763                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1764                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1765     // Inaccurate: reuse describtion from 9 S registers.
1766     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1767                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1768                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1769                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1770                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1771                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1772                                 SwiftVLDMPerm9]>,
1773     // Load of three Q registers.
1774     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1775                                 SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1776                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1777                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1778                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1779                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1780                                 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
1781     // Inaccurate: reuse describtion from 9 S registers.
1782     SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1783                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1784                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1785                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1786                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1787                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1788                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1789                                 SwiftVLDMPerm9]>,
1790     // Load of 7 D registers inaccurate.
1791     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1792                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1793                                 SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
1794                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1795                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1796                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1797                                 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
1798     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1799                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1800                                 SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
1801                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1802                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1803                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1804                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1805                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1806                                 SwiftVLDMPerm9]>,
1807     // Load of 4 Q registers.
1808     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
1809                                 SwiftWriteLM11Cy, SwiftWriteLM14Cy,
1810                                 SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
1811                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1812                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1813                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1814                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1815                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1816                                 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
1817     // Unknow number of registers, just use resources for two registers.
1818     SchedVar<NoSchedPred,      [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1819                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1820                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1821                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1822                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1823                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1824                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1825                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1826                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1827                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1828                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1829                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1830                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1831                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1832                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1833                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1834                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>
1835   ]> { let Variadic = 1; }
1836
1837   def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
1838
1839   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
1840         (instregex "VLDM[SD](IA|DB)_UPD$")>;
1841
1842   def SwiftWriteVSTM : SchedWriteVariant<[
1843     // One S register.
1844     SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
1845     // One D register.
1846     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
1847     // Three S registers.
1848     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
1849     // Assume one Q register.
1850     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
1851     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
1852     // Assume three D registers.
1853     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
1854     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
1855     // Assume two Q registers.
1856     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
1857     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
1858     // Assume 5 D registers.
1859     SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
1860     SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
1861     // Assume three Q registers.
1862     SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
1863     SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
1864     // Assume 7 D registers.
1865     SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
1866     SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
1867     // Assume four Q registers.
1868     SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
1869     // Asumme two Q registers.
1870     SchedVar<NoSchedPred, [SwiftWriteSTM3]>
1871   ]> { let Variadic = 1; }
1872
1873   def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
1874
1875   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
1876         (instregex "VSTM[SD](IA|DB)_UPD")>;
1877
1878   // 4.2.43 Advanced SIMD, Element or Structure Load and Store
1879   def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1880       let Latency = 4;
1881       let ResourceCycles = [2];
1882   }
1883   def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1884       let Latency = 4;
1885       let ResourceCycles = [3];
1886   }
1887   foreach Num = 1-2 in {
1888     def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
1889       let Latency = 0;
1890       let NumMicroOps = Num;
1891       let ResourceCycles = [Num];
1892     }
1893   }
1894   // VLDx
1895   // Multiple structures.
1896   // Single element structure loads.
1897   // We assume aligned.
1898   // Single/two register.
1899   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
1900   def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
1901         (instregex "VLD1(d|q)(8|16|32|64)wb")>;
1902   // Three register.
1903   def : InstRW<[SwiftWrite3xP2FourCy],
1904         (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
1905   def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
1906         (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
1907   /// Four Register.
1908   def : InstRW<[SwiftWrite2xP2FourCy],
1909         (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
1910   def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
1911         (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
1912   // Two element structure loads.
1913   // Two/four register.
1914   def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
1915         (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
1916   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1917                 SwiftVLDMPerm2],
1918         (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
1919   // Three element structure.
1920   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1921                 SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1922         (instregex "VLD3(d|q)(8|16|32)$")>;
1923   def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1924         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
1925
1926   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1927                 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1928         (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
1929   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
1930                 SwiftWrite3xP2FourCy],
1931         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1932   // Four element structure loads.
1933   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1934                 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
1935                 SwiftWrite3xP2FourCy],
1936         (instregex "VLD4(d|q)(8|16|32)$")>;
1937   def : InstRW<[SwiftWriteLM11Cy,  SwiftExt2xP0, SwiftVLDMPerm4,
1938                 SwiftWrite3xP2FourCy],
1939         (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
1940   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1941                 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1942                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1943         (instregex "VLD4(d|q)(8|16|32)_UPD")>;
1944   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1945                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1946         (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1947
1948   // Single all/lane loads.
1949   // One element structure.
1950   def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
1951         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1952   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
1953         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
1954                   "VLD1LNq(8|16|32)Pseudo_UPD")>;
1955   // Two element structure.
1956   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
1957         (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
1958                    "VLD2LN(d|q)(8|16|32)Pseudo$")>;
1959   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
1960                 SwiftExt1xP0, SwiftVLDMPerm2],
1961         (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
1962   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1963                 SwiftExt1xP0, SwiftVLDMPerm2],
1964         (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
1965   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1966                 SwiftExt1xP0, SwiftVLDMPerm2],
1967         (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
1968   // Three element structure.
1969   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
1970                 SwiftVLDMPerm3],
1971         (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
1972                    "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1973   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
1974                 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
1975         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
1976   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
1977                 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
1978         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
1979   // Four element struture.
1980   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1981                 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
1982         (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
1983                    "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1984   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1985                 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
1986                 SwiftVLDMPerm5],
1987         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
1988   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
1989                 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
1990                 SwiftVLDMPerm5],
1991         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
1992   // VSTx
1993   // Multiple structures.
1994   // Single element structure store.
1995   def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
1996   def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
1997   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
1998         (instregex "VST1d(8|16|32|64)wb")>;
1999   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
2000         (instregex "VST1q(8|16|32|64)wb")>;
2001   def : InstRW<[SwiftWrite3xP2],
2002         (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
2003   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
2004         (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
2005   def : InstRW<[SwiftWrite4xP2],
2006         (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
2007   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
2008         (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
2009   // Two element structure store.
2010   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2011         (instregex "VST2(d|b)(8|16|32)$")>;
2012   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2013         (instregex "VST2(b|d)(8|16|32)wb")>;
2014   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2015         (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
2016   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2017         (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
2018   // Three element structure store.
2019   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2020         (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
2021   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2022         (instregex "VST3(d|q)(8|16|32)_UPD",
2023                    "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2024   // Four element structure store.
2025   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2026         (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
2027   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
2028         (instregex "VST4(d|q)(8|16|32)_UPD",
2029                    "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2030   // Single/all lane store.
2031   // One element structure.
2032   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2033         (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
2034   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2035         (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
2036   // Two element structure.
2037   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
2038         (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
2039   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
2040         (instregex "VST2LN(d|q)(8|16|32)_UPD",
2041                    "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
2042   // Three element structure.
2043   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2044         (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
2045   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2046         (instregex "VST3LN(d|q)(8|16|32)_UPD",
2047                    "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
2048   // Four element structure.
2049   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2050         (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
2051   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
2052         (instregex "VST4LN(d|q)(8|16|32)_UPD",
2053                    "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
2054
2055   // 4.2.44 VFP, Divide and Square Root
2056   def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2057     let NumMicroOps = 1;
2058     let Latency = 17;
2059     let ResourceCycles = [1, 15];
2060   }
2061   def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2062     let NumMicroOps = 1;
2063     let Latency = 32;
2064     let ResourceCycles = [1, 30];
2065   }
2066   def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
2067   def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
2068
2069   // Not specified.
2070   def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
2071   // Preload.
2072   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
2073     let ResourceCycles = [0];
2074   }
2075
2076 }