ARM sched model: Use the right resources for DIV
[oota-llvm.git] / lib / Target / ARM / ARMScheduleSwift.td
1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the Swift processor..
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // This section contains legacy support for itineraries. This is
16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18 def SW_DIS0 : FuncUnit;
19 def SW_DIS1 : FuncUnit;
20 def SW_DIS2 : FuncUnit;
21
22 def SW_ALU0 : FuncUnit;
23 def SW_ALU1 : FuncUnit;
24 def SW_LS   : FuncUnit;
25 def SW_IDIV : FuncUnit;
26 def SW_FDIV : FuncUnit;
27
28 // FIXME: Need bypasses.
29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30 //        IIC_iMOVix2ld better.
31 // FIXME: Model the special immediate shifts that are not microcoded.
32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33 //        to issue on pipe 1?
34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
35 // FIXME: Better model the microcode stages of multiply instructions, especially
36 //        conditional variants.
37 // FIXME: Add preload instruction when it is documented.
38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40 def SwiftItineraries : ProcessorItineraries<
41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42   //
43   // Move instructions, unconditional
44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
46                               [1]>,
47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
49                               [1]>,
50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
52                               [1]>,
53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
55                               [1]>,
56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
60                               [2]>,
61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
65                                  [3]>,
66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
69                                InstrStage<1, [SW_LS]>],
70                               [5]>,
71   //
72   // MVN instructions
73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
75                               [1]>,
76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
78                               [1]>,
79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
81                               [1]>,
82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
84                               [1]>,
85   //
86   // No operand cycles
87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89   //
90   // Binary Instructions that produce a result
91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
93                             [1, 1]>,
94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
96                             [1, 1, 1]>,
97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
99                             [2, 1, 1]>,
100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
102                             [2, 1, 1]>,
103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
105                             [2, 1, 1, 1]>,
106   //
107   // Bitwise Instructions that produce a result
108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
110                             [1, 1]>,
111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
113                             [1, 1, 1]>,
114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
116                             [2, 1, 1]>,
117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
119                             [2, 1, 1, 1]>,
120   //
121   // Unary Instructions that produce a result
122
123   // CLZ, RBIT, etc.
124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
126                             [1, 1]>,
127
128   // BFC, BFI, UBFX, SBFX
129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
131                             [2, 1]>,
132
133   //
134   // Zero and sign extension instructions
135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
137                             [1, 1]>,
138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
140                             [1, 1, 1]>,
141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
143                             [1, 1, 1, 1]>,
144   //
145   // Compare instructions
146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
148                               [1]>,
149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
151                               [1, 1]>,
152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
154                               [1, 1]>,
155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
157                               [1, 1, 1]>,
158   //
159   // Test instructions
160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
162                               [1]>,
163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
165                               [1, 1]>,
166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
168                               [1, 1]>,
169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
171                               [1, 1, 1]>,
172   //
173   // Move instructions, conditional
174   // FIXME: Correctly model the extra input dep on the destination.
175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
177                               [1]>,
178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
180                               [1, 1]>,
181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
183                               [1, 1]>,
184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
186                               [2, 1, 1]>,
187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
191                               [2]>,
192
193   // Integer multiply pipeline
194   //
195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196                                InstrStage<1, [SW_ALU0]>],
197                               [3, 1, 1]>,
198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199                                InstrStage<1, [SW_ALU0]>],
200                               [3, 1, 1, 1]>,
201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202                                InstrStage<1, [SW_ALU0]>],
203                               [4, 1, 1]>,
204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205                                InstrStage<1, [SW_ALU0]>],
206                               [4, 1, 1, 1]>,
207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
208                                InstrStage<1, [SW_DIS1], 0>,
209                                InstrStage<1, [SW_DIS2], 0>,
210                                InstrStage<1, [SW_ALU0], 1>,
211                                InstrStage<1, [SW_ALU0], 3>,
212                                InstrStage<1, [SW_ALU0]>],
213                               [5, 5, 1, 1]>,
214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
215                                InstrStage<1, [SW_DIS1], 0>,
216                                InstrStage<1, [SW_DIS2], 0>,
217                                InstrStage<1, [SW_ALU0], 1>,
218                                InstrStage<1, [SW_ALU0], 1>,
219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
221                               [5, 6, 1, 1]>,
222   //
223   // Integer divide
224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225                              InstrStage<1, [SW_ALU0], 0>,
226                              InstrStage<14, [SW_IDIV]>],
227                             [14, 1, 1]>,
228
229   // Integer load pipeline
230   // FIXME: The timings are some rough approximations
231   //
232   // Immediate offset
233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234                                  InstrStage<1, [SW_LS]>],
235                                 [3, 1]>,
236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237                                  InstrStage<1, [SW_LS]>],
238                                 [3, 1]>,
239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240                                  InstrStage<1, [SW_DIS1], 0>,
241                                  InstrStage<1, [SW_LS], 1>,
242                                  InstrStage<1, [SW_LS]>],
243                                 [3, 4, 1]>,
244   //
245   // Register offset
246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247                                  InstrStage<1, [SW_LS]>],
248                                 [3, 1, 1]>,
249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250                                  InstrStage<1, [SW_LS]>],
251                                 [3, 1, 1]>,
252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253                                  InstrStage<1, [SW_DIS1], 0>,
254                                  InstrStage<1, [SW_DIS2], 0>,
255                                  InstrStage<1, [SW_LS], 1>,
256                                  InstrStage<1, [SW_LS], 3>,
257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
258                                 [3, 4, 1, 1]>,
259   //
260   // Scaled register offset
261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
262                                  InstrStage<1, [SW_DIS1], 0>,
263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264                                  InstrStage<1, [SW_LS]>],
265                                 [5, 1, 1]>,
266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267                                  InstrStage<1, [SW_DIS1], 0>,
268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269                                  InstrStage<1, [SW_LS]>],
270                                 [5, 1, 1]>,
271   //
272   // Immediate offset with update
273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
274                                  InstrStage<1, [SW_DIS1], 0>,
275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276                                  InstrStage<1, [SW_LS]>],
277                                 [3, 1, 1]>,
278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279                                  InstrStage<1, [SW_DIS1], 0>,
280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281                                  InstrStage<1, [SW_LS]>],
282                                 [3, 1, 1]>,
283   //
284   // Register offset with update
285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
286                                  InstrStage<1, [SW_DIS1], 0>,
287                                  InstrStage<1, [SW_ALU0], 1>,
288                                  InstrStage<1, [SW_LS]>],
289                                 [3, 1, 1, 1]>,
290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291                                  InstrStage<1, [SW_DIS1], 0>,
292                                  InstrStage<1, [SW_ALU0], 1>,
293                                  InstrStage<1, [SW_LS]>],
294                                 [3, 1, 1, 1]>,
295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296                                  InstrStage<1, [SW_DIS1], 0>,
297                                  InstrStage<1, [SW_DIS2], 0>,
298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299                                  InstrStage<1, [SW_LS], 3>,
300                                  InstrStage<1, [SW_LS], 0>,
301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
302                                 [3, 4, 1, 1]>,
303   //
304   // Scaled register offset with update
305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306                                  InstrStage<1, [SW_DIS1], 0>,
307                                  InstrStage<1, [SW_DIS2], 0>,
308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309                                  InstrStage<1, [SW_LS], 3>,
310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
311                                 [5, 3, 1, 1]>,
312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313                                   InstrStage<1, [SW_DIS1], 0>,
314                                   InstrStage<1, [SW_DIS2], 0>,
315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316                                   InstrStage<1, [SW_LS], 0>,
317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
318                                 [5, 3, 1, 1]>,
319   //
320   // Load multiple, def is the 5th operand.
321   // FIXME: This assumes 3 to 4 registers.
322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
323                                 InstrStage<1, [SW_DIS1], 0>,
324                                 InstrStage<1, [SW_DIS2], 0>,
325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326                                 InstrStage<1, [SW_LS]>],
327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329   //
330   // Load multiple + update, defs are the 1st and 5th operands.
331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332                                 InstrStage<1, [SW_DIS1], 0>,
333                                 InstrStage<1, [SW_DIS2], 0>,
334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335                                 InstrStage<1, [SW_LS], 3>,
336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338   //
339   // Load multiple plus branch
340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341                                 InstrStage<1, [SW_DIS1], 0>,
342                                 InstrStage<1, [SW_DIS2], 0>,
343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344                                 InstrStage<1, [SW_LS]>],
345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346   //
347   // Pop, def is the 3rd operand.
348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
349                                 InstrStage<1, [SW_DIS1], 0>,
350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351                                 InstrStage<1, [SW_LS]>],
352                                [1, 1, 3], [], -1>, // dynamic uops
353   //
354   // Pop + branch, def is the 3rd operand.
355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
356                                 InstrStage<1, [SW_DIS1], 0>,
357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358                                 InstrStage<1, [SW_LS]>],
359                                [1, 1, 3], [], -1>, // dynamic uops
360
361   //
362   // iLoadi + iALUr for t2LDRpci_pic.
363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364                                 InstrStage<1, [SW_LS], 3>,
365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
366                                [4, 1]>,
367
368   // Integer store pipeline
369   ///
370   // Immediate offset
371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372                                  InstrStage<1, [SW_LS]>],
373                                 [1, 1]>,
374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375                                  InstrStage<1, [SW_LS]>],
376                                 [1, 1]>,
377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378                                  InstrStage<1, [SW_DIS1], 0>,
379                                  InstrStage<1, [SW_DIS2], 0>,
380                                  InstrStage<1, [SW_LS], 0>,
381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382                                  InstrStage<1, [SW_LS]>],
383                                 [1, 1]>,
384   //
385   // Register offset
386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387                                  InstrStage<1, [SW_LS]>],
388                                 [1, 1, 1]>,
389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390                                  InstrStage<1, [SW_LS]>],
391                                 [1, 1, 1]>,
392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393                                  InstrStage<1, [SW_DIS1], 0>,
394                                  InstrStage<1, [SW_DIS2], 0>,
395                                  InstrStage<1, [SW_LS], 0>,
396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397                                  InstrStage<1, [SW_LS]>],
398                                 [1, 1, 1]>,
399   //
400   // Scaled register offset
401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
402                                   InstrStage<1, [SW_DIS1], 0>,
403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404                                   InstrStage<1, [SW_LS]>],
405                                  [1, 1, 1]>,
406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407                                   InstrStage<1, [SW_DIS1], 0>,
408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409                                   InstrStage<1, [SW_LS]>],
410                                  [1, 1, 1]>,
411   //
412   // Immediate offset with update
413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
414                                   InstrStage<1, [SW_DIS1], 0>,
415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416                                   InstrStage<1, [SW_LS]>],
417                                  [1, 1, 1]>,
418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419                                   InstrStage<1, [SW_DIS1], 0>,
420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421                                   InstrStage<1, [SW_LS]>],
422                                  [1, 1, 1]>,
423   //
424   // Register offset with update
425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
426                                   InstrStage<1, [SW_DIS1], 0>,
427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428                                   InstrStage<1, [SW_LS]>],
429                                  [1, 1, 1, 1]>,
430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431                                   InstrStage<1, [SW_DIS1], 0>,
432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433                                   InstrStage<1, [SW_LS]>],
434                                  [1, 1, 1, 1]>,
435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436                                   InstrStage<1, [SW_DIS1], 0>,
437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438                                   InstrStage<1, [SW_LS]>],
439                                  [1, 1, 1, 1]>,
440   //
441   // Scaled register offset with update
442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
443                                     InstrStage<1, [SW_DIS1], 0>,
444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445                                     InstrStage<1, [SW_LS], 0>,
446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447                                    [3, 1, 1, 1]>,
448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449                                     InstrStage<1, [SW_DIS1], 0>,
450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451                                     InstrStage<1, [SW_LS], 0>,
452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453                                    [3, 1, 1, 1]>,
454   //
455   // Store multiple
456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457                                 InstrStage<1, [SW_DIS1], 0>,
458                                 InstrStage<1, [SW_DIS2], 0>,
459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460                                 InstrStage<1, [SW_LS], 1>,
461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462                                 InstrStage<1, [SW_LS], 1>,
463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464                                 InstrStage<1, [SW_LS]>],
465                                 [], [], -1>, // dynamic uops
466   //
467   // Store multiple + update
468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469                                 InstrStage<1, [SW_DIS1], 0>,
470                                 InstrStage<1, [SW_DIS2], 0>,
471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472                                 InstrStage<1, [SW_LS], 1>,
473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474                                 InstrStage<1, [SW_LS], 1>,
475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476                                 InstrStage<1, [SW_LS]>],
477                                [2], [], -1>, // dynamic uops
478
479   //
480   // Preload
481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483   // Branch
484   //
485   // no delay slots, so the latency of a branch is unimportant
486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
487
488   // FP Special Register to Integer Register File Move
489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
491                              [1]>,
492   //
493   // Single-precision FP Unary
494   //
495   // Most floating-point moves get issued on ALU0.
496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497                                InstrStage<1, [SW_ALU0]>],
498                               [2, 1]>,
499   //
500   // Double-precision FP Unary
501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502                                InstrStage<1, [SW_ALU0]>],
503                               [2, 1]>,
504
505   //
506   // Single-precision FP Compare
507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508                                InstrStage<1, [SW_ALU0]>],
509                               [1, 1]>,
510   //
511   // Double-precision FP Compare
512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513                                InstrStage<1, [SW_ALU0]>],
514                               [1, 1]>,
515   //
516   // Single to Double FP Convert
517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518                                InstrStage<1, [SW_ALU1]>],
519                               [4, 1]>,
520   //
521   // Double to Single FP Convert
522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523                                InstrStage<1, [SW_ALU1]>],
524                               [4, 1]>,
525
526   //
527   // Single to Half FP Convert
528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529                                InstrStage<1, [SW_DIS1], 0>,
530                                InstrStage<1, [SW_ALU1], 4>,
531                                InstrStage<1, [SW_ALU1]>],
532                               [6, 1]>,
533   //
534   // Half to Single FP Convert
535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536                                InstrStage<1, [SW_ALU1]>],
537                               [4, 1]>,
538
539   //
540   // Single-Precision FP to Integer Convert
541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542                                InstrStage<1, [SW_ALU1]>],
543                               [4, 1]>,
544   //
545   // Double-Precision FP to Integer Convert
546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547                                InstrStage<1, [SW_ALU1]>],
548                               [4, 1]>,
549   //
550   // Integer to Single-Precision FP Convert
551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552                                InstrStage<1, [SW_ALU1]>],
553                               [4, 1]>,
554   //
555   // Integer to Double-Precision FP Convert
556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557                                InstrStage<1, [SW_ALU1]>],
558                               [4, 1]>,
559   //
560   // Single-precision FP ALU
561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562                                InstrStage<1, [SW_ALU0]>],
563                               [2, 1, 1]>,
564   //
565   // Double-precision FP ALU
566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567                                InstrStage<1, [SW_ALU0]>],
568                               [2, 1, 1]>,
569   //
570   // Single-precision FP Multiply
571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572                                InstrStage<1, [SW_ALU1]>],
573                               [4, 1, 1]>,
574   //
575   // Double-precision FP Multiply
576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577                                InstrStage<1, [SW_ALU1]>],
578                               [6, 1, 1]>,
579   //
580   // Single-precision FP MAC
581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582                                InstrStage<1, [SW_ALU1]>],
583                               [8, 1, 1]>,
584   //
585   // Double-precision FP MAC
586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587                                InstrStage<1, [SW_ALU1]>],
588                               [12, 1, 1]>,
589   //
590   // Single-precision Fused FP MAC
591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592                                InstrStage<1, [SW_ALU1]>],
593                               [8, 1, 1]>,
594   //
595   // Double-precision Fused FP MAC
596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597                                InstrStage<1, [SW_ALU1]>],
598                               [12, 1, 1]>,
599   //
600   // Single-precision FP DIV
601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602                                InstrStage<1, [SW_ALU1], 0>,
603                                InstrStage<15, [SW_FDIV]>],
604                               [17, 1, 1]>,
605   //
606   // Double-precision FP DIV
607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608                                InstrStage<1, [SW_ALU1], 0>,
609                                InstrStage<30, [SW_FDIV]>],
610                               [32, 1, 1]>,
611   //
612   // Single-precision FP SQRT
613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614                                InstrStage<1, [SW_ALU1], 0>,
615                                InstrStage<15, [SW_FDIV]>],
616                               [17, 1]>,
617   //
618   // Double-precision FP SQRT
619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620                                InstrStage<1, [SW_ALU1], 0>,
621                                InstrStage<30, [SW_FDIV]>],
622                               [32, 1, 1]>,
623
624   //
625   // Integer to Single-precision Move
626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
627                                InstrStage<1, [SW_DIS1], 0>,
628                                InstrStage<1, [SW_LS], 4>,
629                                InstrStage<1, [SW_ALU0]>],
630                               [6, 1]>,
631   //
632   // Integer to Double-precision Move
633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634                                InstrStage<1, [SW_LS]>],
635                               [4, 1]>,
636   //
637   // Single-precision to Integer Move
638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639                                InstrStage<1, [SW_LS]>],
640                               [3, 1]>,
641   //
642   // Double-precision to Integer Move
643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
644                                InstrStage<1, [SW_DIS1], 0>,
645                                InstrStage<1, [SW_LS], 3>,
646                                InstrStage<1, [SW_LS]>],
647                               [3, 4, 1]>,
648   //
649   // Single-precision FP Load
650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651                                InstrStage<1, [SW_LS]>],
652                               [4, 1]>,
653   //
654   // Double-precision FP Load
655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656                                InstrStage<1, [SW_LS]>],
657                               [4, 1]>,
658   //
659   // FP Load Multiple
660   // FIXME: Assumes a single Q register.
661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662                                InstrStage<1, [SW_LS]>],
663                               [1, 1, 1, 4], [], -1>, // dynamic uops
664   //
665   // FP Load Multiple + update
666   // FIXME: Assumes a single Q register.
667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668                                InstrStage<1, [SW_DIS1], 0>,
669                                InstrStage<1, [SW_LS], 4>,
670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672   //
673   // Single-precision FP Store
674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675                                InstrStage<1, [SW_LS]>],
676                               [1, 1]>,
677   //
678   // Double-precision FP Store
679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680                                InstrStage<1, [SW_LS]>],
681                               [1, 1]>,
682   //
683   // FP Store Multiple
684   // FIXME: Assumes a single Q register.
685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686                                InstrStage<1, [SW_LS]>],
687                               [1, 1, 1], [], -1>, // dynamic uops
688   //
689   // FP Store Multiple + update
690   // FIXME: Assumes a single Q register.
691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692                                 InstrStage<1, [SW_DIS1], 0>,
693                                 InstrStage<1, [SW_LS], 4>,
694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
695                                [2, 1, 1, 1], [], -1>, // dynamic uops
696   // NEON
697   //
698   // Double-register Integer Unary
699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700                                InstrStage<1, [SW_ALU0]>],
701                               [4, 1]>,
702   //
703   // Quad-register Integer Unary
704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705                                InstrStage<1, [SW_ALU0]>],
706                               [4, 1]>,
707   //
708   // Double-register Integer Q-Unary
709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710                                InstrStage<1, [SW_ALU0]>],
711                               [4, 1]>,
712   //
713   // Quad-register Integer CountQ-Unary
714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715                                InstrStage<1, [SW_ALU0]>],
716                               [4, 1]>,
717   //
718   // Double-register Integer Binary
719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720                                InstrStage<1, [SW_ALU0]>],
721                               [2, 1, 1]>,
722   //
723   // Quad-register Integer Binary
724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725                                InstrStage<1, [SW_ALU0]>],
726                               [2, 1, 1]>,
727   //
728   // Double-register Integer Subtract
729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730                                InstrStage<1, [SW_ALU0]>],
731                               [2, 1, 1]>,
732   //
733   // Quad-register Integer Subtract
734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735                                InstrStage<1, [SW_ALU0]>],
736                               [2, 1, 1]>,
737   //
738   // Double-register Integer Shift
739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740                                InstrStage<1, [SW_ALU0]>],
741                               [2, 1, 1]>,
742   //
743   // Quad-register Integer Shift
744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745                                InstrStage<1, [SW_ALU0]>],
746                               [2, 1, 1]>,
747   //
748   // Double-register Integer Shift (4 cycle)
749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750                                InstrStage<1, [SW_ALU0]>],
751                               [4, 1, 1]>,
752   //
753   // Quad-register Integer Shift (4 cycle)
754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755                                InstrStage<1, [SW_ALU0]>],
756                               [4, 1, 1]>,
757   //
758   // Double-register Integer Binary (4 cycle)
759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760                                InstrStage<1, [SW_ALU0]>],
761                               [4, 1, 1]>,
762   //
763   // Quad-register Integer Binary (4 cycle)
764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765                                InstrStage<1, [SW_ALU0]>],
766                               [4, 1, 1]>,
767   //
768   // Double-register Integer Subtract (4 cycle)
769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770                                InstrStage<1, [SW_ALU0]>],
771                               [4, 1, 1]>,
772   //
773   // Quad-register Integer Subtract (4 cycle)
774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775                                InstrStage<1, [SW_ALU0]>],
776                               [4, 1, 1]>,
777
778   //
779   // Double-register Integer Count
780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781                                InstrStage<1, [SW_ALU0]>],
782                               [2, 1, 1]>,
783   //
784   // Quad-register Integer Count
785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786                                InstrStage<1, [SW_ALU0]>],
787                               [2, 1, 1]>,
788   //
789   // Double-register Absolute Difference and Accumulate
790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791                                InstrStage<1, [SW_ALU0]>],
792                               [4, 1, 1, 1]>,
793   //
794   // Quad-register Absolute Difference and Accumulate
795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796                                InstrStage<1, [SW_ALU0]>],
797                               [4, 1, 1, 1]>,
798   //
799   // Double-register Integer Pair Add Long
800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801                                InstrStage<1, [SW_ALU0]>],
802                               [4, 1, 1]>,
803   //
804   // Quad-register Integer Pair Add Long
805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806                                InstrStage<1, [SW_ALU0]>],
807                               [4, 1, 1]>,
808
809   //
810   // Double-register Integer Multiply (.8, .16)
811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812                                InstrStage<1, [SW_ALU1]>],
813                               [4, 1, 1]>,
814   //
815   // Quad-register Integer Multiply (.8, .16)
816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817                                InstrStage<1, [SW_ALU1]>],
818                               [4, 1, 1]>,
819
820   //
821   // Double-register Integer Multiply (.32)
822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823                                InstrStage<1, [SW_ALU1]>],
824                               [4, 1, 1]>,
825   //
826   // Quad-register Integer Multiply (.32)
827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828                                InstrStage<1, [SW_ALU1]>],
829                               [4, 1, 1]>,
830   //
831   // Double-register Integer Multiply-Accumulate (.8, .16)
832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833                                InstrStage<1, [SW_ALU1]>],
834                               [4, 1, 1, 1]>,
835   //
836   // Double-register Integer Multiply-Accumulate (.32)
837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838                                InstrStage<1, [SW_ALU1]>],
839                               [4, 1, 1, 1]>,
840   //
841   // Quad-register Integer Multiply-Accumulate (.8, .16)
842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843                                InstrStage<1, [SW_ALU1]>],
844                               [4, 1, 1, 1]>,
845   //
846   // Quad-register Integer Multiply-Accumulate (.32)
847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848                                InstrStage<1, [SW_ALU1]>],
849                               [4, 1, 1, 1]>,
850
851   //
852   // Move
853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854                                InstrStage<1, [SW_ALU0]>],
855                               [2, 1]>,
856   //
857   // Move Immediate
858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859                                InstrStage<1, [SW_ALU0]>],
860                               [2]>,
861   //
862   // Double-register Permute Move
863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864                                InstrStage<1, [SW_ALU1]>],
865                               [2, 1]>,
866   //
867   // Quad-register Permute Move
868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869                                InstrStage<1, [SW_ALU1]>],
870                               [2, 1]>,
871   //
872   // Integer to Single-precision Move
873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
874                                InstrStage<1, [SW_DIS1], 0>,
875                                InstrStage<1, [SW_LS], 4>,
876                                InstrStage<1, [SW_ALU0]>],
877                               [6, 1]>,
878   //
879   // Integer to Double-precision Move
880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881                                InstrStage<1, [SW_LS]>],
882                               [4, 1, 1]>,
883   //
884   // Single-precision to Integer Move
885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886                                InstrStage<1, [SW_LS]>],
887                               [3, 1]>,
888   //
889   // Double-precision to Integer Move
890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
891                                InstrStage<1, [SW_DIS1], 0>,
892                                InstrStage<1, [SW_LS], 3>,
893                                InstrStage<1, [SW_LS]>],
894                               [3, 4, 1]>,
895   //
896   // Integer to Lane Move
897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899                                InstrStage<1, [SW_DIS1], 0>,
900                                InstrStage<1, [SW_LS], 4>,
901                                InstrStage<1, [SW_ALU0]>],
902                               [6, 1]>,
903
904   //
905   // Vector narrow move
906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907                                InstrStage<1, [SW_ALU1]>],
908                               [2, 1]>,
909   //
910   // Double-register FP Unary
911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912   //        and they issue on a different pipeline.
913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914                                InstrStage<1, [SW_ALU0]>],
915                               [2, 1]>,
916   //
917   // Quad-register FP Unary
918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919   //        and they issue on a different pipeline.
920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921                                InstrStage<1, [SW_ALU0]>],
922                               [2, 1]>,
923   //
924   // Double-register FP Binary
925   // FIXME: We're using this itin for many instructions.
926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927                                InstrStage<1, [SW_ALU0]>],
928                               [4, 1, 1]>,
929
930   //
931   // VPADD, etc.
932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933                                InstrStage<1, [SW_ALU0]>],
934                               [4, 1, 1]>,
935   //
936   // Double-register FP VMUL
937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938                                InstrStage<1, [SW_ALU1]>],
939                               [4, 1, 1]>,
940   //
941   // Quad-register FP Binary
942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943                                InstrStage<1, [SW_ALU0]>],
944                               [4, 1, 1]>,
945   //
946   // Quad-register FP VMUL
947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948                                InstrStage<1, [SW_ALU1]>],
949                               [4, 1, 1]>,
950   //
951   // Double-register FP Multiple-Accumulate
952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953                                InstrStage<1, [SW_ALU1]>],
954                               [8, 1, 1]>,
955   //
956   // Quad-register FP Multiple-Accumulate
957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958                                InstrStage<1, [SW_ALU1]>],
959                               [8, 1, 1]>,
960   //
961   // Double-register Fused FP Multiple-Accumulate
962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963                                InstrStage<1, [SW_ALU1]>],
964                               [8, 1, 1]>,
965   //
966   // Quad-register FusedF P Multiple-Accumulate
967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968                                InstrStage<1, [SW_ALU1]>],
969                               [8, 1, 1]>,
970   //
971   // Double-register Reciprical Step
972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973                                InstrStage<1, [SW_ALU1]>],
974                               [8, 1, 1]>,
975   //
976   // Quad-register Reciprical Step
977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978                                InstrStage<1, [SW_ALU1]>],
979                               [8, 1, 1]>,
980   //
981   // Double-register Permute
982   // FIXME: The latencies are unclear from the documentation.
983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
984                                InstrStage<1, [SW_DIS1], 0>,
985                                InstrStage<1, [SW_DIS2], 0>,
986                                InstrStage<1, [SW_ALU1], 2>,
987                                InstrStage<1, [SW_ALU1], 2>,
988                                InstrStage<1, [SW_ALU1]>],
989                               [3, 4, 3, 4]>,
990   //
991   // Quad-register Permute
992   // FIXME: The latencies are unclear from the documentation.
993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
994                                InstrStage<1, [SW_DIS1], 0>,
995                                InstrStage<1, [SW_DIS2], 0>,
996                                InstrStage<1, [SW_ALU1], 2>,
997                                InstrStage<1, [SW_ALU1], 2>,
998                                InstrStage<1, [SW_ALU1]>],
999                               [3, 4, 3, 4]>,
1000   //
1001   // Quad-register Permute (3 cycle issue on A9)
1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
1003                                InstrStage<1, [SW_DIS1], 0>,
1004                                InstrStage<1, [SW_DIS2], 0>,
1005                                InstrStage<1, [SW_ALU1], 2>,
1006                                InstrStage<1, [SW_ALU1], 2>,
1007                                InstrStage<1, [SW_ALU1]>],
1008                               [3, 4, 3, 4]>,
1009
1010   //
1011   // Double-register VEXT
1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013                                InstrStage<1, [SW_ALU1]>],
1014                               [2, 1, 1]>,
1015   //
1016   // Quad-register VEXT
1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018                                InstrStage<1, [SW_ALU1]>],
1019                               [2, 1, 1]>,
1020   //
1021   // VTB
1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023                                InstrStage<1, [SW_ALU1]>],
1024                               [2, 1, 1]>,
1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
1026                                InstrStage<1, [SW_DIS1], 0>,
1027                                InstrStage<1, [SW_ALU1], 2>,
1028                                InstrStage<1, [SW_ALU1]>],
1029                               [4, 1, 3, 3]>,
1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
1031                                InstrStage<1, [SW_DIS1], 0>,
1032                                InstrStage<1, [SW_DIS2], 0>,
1033                                InstrStage<1, [SW_ALU1], 2>,
1034                                InstrStage<1, [SW_ALU1], 2>,
1035                                InstrStage<1, [SW_ALU1]>],
1036                               [6, 1, 3, 5, 5]>,
1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
1038                                InstrStage<1, [SW_DIS1], 0>,
1039                                InstrStage<1, [SW_DIS2], 0>,
1040                                InstrStage<1, [SW_ALU1], 2>,
1041                                InstrStage<1, [SW_ALU1], 2>,
1042                                InstrStage<1, [SW_ALU1], 2>,
1043                                InstrStage<1, [SW_ALU1]>],
1044                               [8, 1, 3, 5, 7, 7]>,
1045   //
1046   // VTBX
1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048                                InstrStage<1, [SW_ALU1]>],
1049                               [2, 1, 1]>,
1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
1051                                InstrStage<1, [SW_DIS1], 0>,
1052                                InstrStage<1, [SW_ALU1], 2>,
1053                                InstrStage<1, [SW_ALU1]>],
1054                               [4, 1, 3, 3]>,
1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
1056                                InstrStage<1, [SW_DIS1], 0>,
1057                                InstrStage<1, [SW_DIS2], 0>,
1058                                InstrStage<1, [SW_ALU1], 2>,
1059                                InstrStage<1, [SW_ALU1], 2>,
1060                                InstrStage<1, [SW_ALU1]>],
1061                               [6, 1, 3, 5, 5]>,
1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
1063                                InstrStage<1, [SW_DIS1], 0>,
1064                                InstrStage<1, [SW_DIS2], 0>,
1065                                InstrStage<1, [SW_ALU1], 2>,
1066                                InstrStage<1, [SW_ALU1], 2>,
1067                                InstrStage<1, [SW_ALU1], 2>,
1068                                InstrStage<1, [SW_ALU1]>],
1069                               [8, 1, 3, 5, 7, 7]>
1070 ]>;
1071
1072 // ===---------------------------------------------------------------------===//
1073 // This following definitions describe the simple machine model which
1074 // will replace itineraries.
1075
1076 // Swift machine model for scheduling and other instruction cost heuristics.
1077 def SwiftModel : SchedMachineModel {
1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
1079   let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
1080   let LoadLatency = 3;
1081   let MispredictPenalty = 14; // A branch direction mispredict.
1082
1083   let Itineraries = SwiftItineraries;
1084 }
1085
1086 // Swift predicates.
1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
1089 // Swift resource mapping.
1090 let SchedModel = SwiftModel in {
1091   // Processor resources.
1092   def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093   def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094   def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095   def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096   def SwiftUnitDiv : ProcResource<1>;
1097
1098   // Generic resource requirements.
1099   def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
1100   def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
1101   def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
1102   def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
1103   def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1104     let Latency = 4;
1105   }
1106   def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1107     let Latency = 6;
1108   }
1109   def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
1110   def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
1111   def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
1112   def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
1113   def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
1114   def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
1115   def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
1116   def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
1117   def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
1118   def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
1119                                                       SwiftUnitP01]> {
1120     let Latency = 3;
1121     let NumMicroOps = 2;
1122   }
1123   def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1124     let Latency = 3;
1125     let NumMicroOps = 3;
1126     let ResourceCycles = [3];
1127   }
1128   // Plain load without writeback.
1129   def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
1130     let Latency = 3;
1131   }
1132   def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
1133     let Latency = 4;
1134   }
1135   // A store does not write to a register.
1136   def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
1137     let Latency = 0;
1138   }
1139   foreach Num = 1-4 in {
1140     def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
1141   }
1142   def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
1143                                                     SwiftWriteP01OneCycle,
1144                                                     SwiftWriteP2ThreeCycle]>;
1145   // 4.2.4 Arithmetic and Logical.
1146   // ALU operation register shifted by immediate variant.
1147   def SwiftWriteALUsi : SchedWriteVariant<[
1148     // lsl #2, lsl #1, or lsr #1.
1149     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
1150     SchedVar<NoSchedPred,             [WriteALU]>
1151   ]>;
1152   def SwiftWriteALUsr : SchedWriteVariant<[
1153     SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
1154     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1155   ]>;
1156   def SwiftWriteALUSsr : SchedWriteVariant<[
1157     SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
1158     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1159   ]>;
1160   def SwiftReadAdvanceALUsr : SchedReadVariant<[
1161     SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
1162     SchedVar<NoSchedPred,      [NoReadAdvance]>
1163   ]>;
1164   // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
1165   // AND,BIC,EOR,ORN,ORR
1166   // CLZ,RBIT,REV,REV16,REVSH,PKH
1167   def : WriteRes<WriteALU, [SwiftUnitP01]>;
1168   def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1169   def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1170   def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
1171   def : ReadAdvance<ReadALU, 0>;
1172   def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
1173
1174
1175   def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
1176     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
1177     SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
1178   ]>;
1179
1180   // 4.2.5 Integer comparison
1181   def : WriteRes<WriteCMP, [SwiftUnitP01]>;
1182   def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
1183   def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
1184
1185   // 4.2.6 Shift, Move
1186   // Shift
1187   //  ASR,LSL,ROR,RRX
1188   //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
1189   // Move
1190   //  MOV,MVN
1191   //  MOVT
1192   // Sign/Zero extension
1193   def : InstRW<[SwiftWriteP01OneCycle],
1194                (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
1195                           "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
1196                           "t2UXTB16")>;
1197   // Pseudo instructions.
1198   def : InstRW<[SwiftWriteP01OneCycle2x],
1199         (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
1200                    "t2MOVi32imm", "t2MOV_ga_dyn")>;
1201   def : InstRW<[SwiftWriteP01OneCycle3x],
1202         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
1203   def : InstRW<[SwiftWriteP01OneCycle2x_load],
1204         (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
1205
1206   def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
1207
1208   def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
1209     SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
1210     SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
1211   ]>;
1212
1213   // 4.2.7 Select
1214   // SEL
1215   def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
1216
1217   // 4.2.8 Bitfield
1218   // BFI,BFC, SBFX,UBFX
1219   def : InstRW< [SwiftWriteP01TwoCycle],
1220         (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
1221         "(t|t2)UBFX", "(t|t2)SBFX")>;
1222
1223   // 4.2.9 Saturating arithmetic
1224   def : InstRW< [SwiftWriteP01TwoCycle],
1225         (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
1226         "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
1227         "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
1228         "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
1229         "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
1230         "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
1231
1232   // 4.2.10 Parallel Arithmetic
1233   // Not flag setting.
1234   def : InstRW< [SwiftWriteALUsr],
1235         (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
1236         "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
1237         "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
1238         "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
1239   // Flag setting.
1240   def : InstRW< [SwiftWriteP01TwoCycle],
1241        (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
1242        "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
1243        "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
1244        "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
1245        "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
1246        "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
1247
1248   // 4.2.11 Sum of Absolute Difference
1249   def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
1250   def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
1251         (instregex "USADA8")>;
1252
1253   // 4.2.12 Integer Multiply (32-bit result)
1254   // Two sources.
1255   def : InstRW< [SwiftWriteP0FourCycle],
1256         (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
1257         "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
1258         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
1259         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
1260
1261   def SwiftWriteP0P01FiveCycleTwoUops :
1262       SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
1263     let Latency = 5;
1264   }
1265
1266   def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
1267     SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
1268     SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
1269   ]>;
1270
1271   def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
1272      SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
1273      SchedVar<NoSchedPred,      [ReadALU]>
1274   ]>;
1275
1276   // Multiply accumulate, three sources
1277   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1278                  SwiftReadAdvanceFourCyclesPred],
1279         (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
1280         "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
1281         "t2SMMLSR")>;
1282
1283   // 4.2.13 Integer Multiply (32-bit result, Q flag)
1284   def : InstRW< [SwiftWriteP0FourCycle],
1285         (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
1286   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1287                  SwiftReadAdvanceFourCyclesPred],
1288         (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
1289         "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
1290         "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
1291   def : InstRW< [SwiftPredP0P01FourFiveCycle],
1292         (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
1293
1294   def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1295     let Latency = 5;
1296     let NumMicroOps = 3;
1297     let ResourceCycles = [2, 1];
1298   }
1299   def SwiftWrite1Cycle : SchedWriteRes<[]> {
1300     let Latency = 1;
1301     let NumMicroOps = 0;
1302   }
1303   def SwiftWrite5Cycle : SchedWriteRes<[]> {
1304     let Latency = 5;
1305     let NumMicroOps = 0;
1306   }
1307   def SwiftWrite6Cycle : SchedWriteRes<[]> {
1308     let Latency = 6;
1309     let NumMicroOps = 0;
1310   }
1311
1312   // 4.2.14 Integer Multiply, Long
1313   def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
1314         (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
1315
1316   def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1317     let Latency = 7;
1318     let NumMicroOps = 5;
1319     let ResourceCycles = [2, 3];
1320   }
1321
1322   // 4.2.15 Integer Multiply Accumulate, Long
1323   // 4.2.16 Integer Multiply Accumulate, Dual
1324   // 4.2.17 Integer Multiply Accumulate Accumulate, Long
1325   // We are being a bit inaccurate here.
1326   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
1327                  SchedReadAdvance<4>, SchedReadAdvance<3>],
1328         (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
1329         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
1330         "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
1331         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
1332         "t2UMAAL")>;
1333
1334   def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
1335     let NumMicroOps = 1;
1336     let Latency = 14;
1337     let ResourceCycles = [1, 14];
1338   }
1339   // 4.2.18 Integer Divide
1340   def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
1341   def : InstRW <[SwiftDiv],
1342         (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
1343
1344   // 4.2.19 Integer Load Single Element
1345   // 4.2.20 Integer Load Signextended
1346   def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1347     let Latency = 3;
1348   }
1349   def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1350     let Latency = 4;
1351   }
1352   def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
1353                                                    SwiftUnitP01]> {
1354     let Latency = 4;
1355   }
1356   def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
1357     let Latency = 3;
1358   }
1359   def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
1360                                                     SwiftUnitP01]> {
1361     let Latency = 3;
1362   }
1363   def SwiftWrBackOne : SchedWriteRes<[]> {
1364     let Latency = 1;
1365     let NumMicroOps = 0;
1366   }
1367   def SwiftWriteLdFour : SchedWriteRes<[]> {
1368     let Latency = 4;
1369     let NumMicroOps = 0;
1370   }
1371    // Not accurate.
1372   def : InstRW<[SwiftWriteP2ThreeCycle],
1373         (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
1374         "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
1375         "tLDR(r|i|spi|pci|pciASM)")>;
1376   def : InstRW<[SwiftWriteP2ThreeCycle],
1377         (instregex "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
1378   def : InstRW<[SwiftWriteP2P01FourCyle],
1379         (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
1380         "t2LDRpci_pic", "tLDRS(B|H)")>;
1381   def : InstRW<[SwiftWriteP2P01ThreeCycle,  SwiftWrBackOne],
1382         (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
1383         "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
1384         "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
1385   def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
1386         (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
1387         "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
1388
1389   // 4.2.21 Integer Dual Load
1390   // Not accurate.
1391   def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
1392         (instregex "t2LDRDi8", "LDRD$")>;
1393   def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
1394         (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
1395
1396   // 4.2.22 Integer Load, Multiple
1397   // NumReg = 1 .. 16
1398   foreach Lat = 3-25 in {
1399     def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
1400       let Latency = Lat;
1401     }
1402     def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
1403   }
1404   // Predicate.
1405   foreach NumAddr = 1-16 in {
1406     def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
1407   }
1408   def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
1409   def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
1410   def SwiftWriteLM : SchedWriteVariant<[
1411     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
1412     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1413                                 SwiftWriteLM5Cy]>,
1414     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1415                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
1416     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1417                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1418                                 SwiftWriteLM7Cy]>,
1419     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1420                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1421                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
1422     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1423                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1424                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1425                                 SwiftWriteLM9Cy]>,
1426     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1427                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1428                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1429                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
1430     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1431                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1432                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1433                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1434                                 SwiftWriteLM11Cy]>,
1435     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1436                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1437                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1438                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1439                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
1440     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1441                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1442                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1443                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1444                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1445                                 SwiftWriteLM13Cy]>,
1446     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1447                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1448                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1449                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1450                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1451                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
1452     SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1453                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1454                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1455                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1456                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1457                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1458                                 SwiftWriteLM15Cy]>,
1459     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1460                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1461                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1462                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1463                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1464                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1465                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
1466     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1467                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1468                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1469                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1470                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1471                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1472                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1473                                 SwiftWriteLM17Cy]>,
1474     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1475                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1476                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1477                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1478                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1479                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1480                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1481                                 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
1482     // Unknow number of registers, just use resources for two registers.
1483     SchedVar<NoSchedPred,      [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1484                                 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
1485                                 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
1486                                 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
1487                                 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
1488                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1489                                 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
1490                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
1491
1492   ]> { let Variadic=1; }
1493
1494   def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
1495         (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
1496         "(t|sys)LDM(IA|DA|DB|IB)$")>;
1497   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
1498         (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
1499         "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
1500   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
1501         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
1502   // 4.2.23 Integer Store, Single Element
1503   def : InstRW<[SwiftWriteP2],
1504         (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
1505         "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
1506
1507   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
1508         (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
1509         "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
1510         "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
1511         "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
1512
1513   // 4.2.24 Integer Store, Dual
1514   def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
1515         (instregex "STRD$", "t2STRDi8")>;
1516   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
1517                 SwiftWriteP01OneCycle],
1518         (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
1519
1520   // 4.2.25 Integer Store, Multiple
1521   def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1522     let Latency = 0;
1523   }
1524   foreach NumAddr = 1-16 in {
1525      def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
1526   }
1527   def SwiftWriteSTM : SchedWriteVariant<[
1528     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
1529     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
1530     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
1531     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
1532     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
1533     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
1534     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
1535     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
1536     SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
1537     SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
1538     SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
1539     SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
1540     SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
1541     SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
1542     SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
1543     // Unknow number of registers, just use resources for two registers.
1544     SchedVar<NoSchedPred,      [SwiftWriteSTM2]>
1545   ]>;
1546   def : InstRW<[SwiftWriteSTM],
1547         (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
1548   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
1549         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
1550         "PUSH", "tPUSH")>;
1551
1552   // 4.2.26 Branch
1553   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
1554   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
1555   def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
1556
1557   // 4.2.27 Not issued
1558   def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1559   def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
1560
1561   // 4.2.28 Advanced SIMD, Integer, 2 cycle
1562   def : InstRW<[SwiftWriteP0TwoCycle],
1563         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
1564                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
1565                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
1566                    "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
1567                    "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
1568
1569   def : InstRW<[SwiftWriteP1TwoCycle],
1570         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
1571
1572   // 4.2.29 Advanced SIMD, Integer, 4 cycle
1573   // 4.2.30 Advanced SIMD, Integer with Accumulate
1574   def : InstRW<[SwiftWriteP0FourCycle],
1575         (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
1576         "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
1577         "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
1578         "VQSUB")>;
1579   def : InstRW<[SwiftWriteP1FourCycle],
1580         (instregex "VRECPE", "VRSQRTE")>;
1581
1582   // 4.2.31 Advanced SIMD, Add and Shift with Narrow
1583   def : InstRW<[SwiftWriteP0P1FourCycle],
1584         (instregex "VADDHN", "VSUBHN", "VSHRN")>;
1585   def : InstRW<[SwiftWriteP0P1SixCycle],
1586         (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
1587                    "VQRSHRN", "VQRSHRUN")>;
1588
1589   // 4.2.32 Advanced SIMD, Vector Table Lookup
1590   foreach Num = 1-4 in {
1591     def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
1592   }
1593   def : InstRW<[SwiftWrite1xP1TwoCycle],
1594         (instregex "VTB(L|X)1")>;
1595   def : InstRW<[SwiftWrite2xP1TwoCycle],
1596         (instregex "VTB(L|X)2")>;
1597   def : InstRW<[SwiftWrite3xP1TwoCycle],
1598         (instregex "VTB(L|X)3")>;
1599   def : InstRW<[SwiftWrite4xP1TwoCycle],
1600         (instregex "VTB(L|X)4")>;
1601
1602   // 4.2.33 Advanced SIMD, Transpose
1603   def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
1604                 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
1605         (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
1606
1607   // 4.2.34 Advanced SIMD and VFP, Floating Point
1608   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
1609   def : InstRW<[SwiftWriteP0FourCycle],
1610         (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
1611   def : InstRW<[SwiftWriteP0FourCycle],
1612         (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
1613                    "VPMIN")>;
1614   def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
1615   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
1616
1617   // 4.2.35 Advanced SIMD and VFP, Multiply
1618   def : InstRW<[SwiftWriteP1FourCycle],
1619         (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
1620                    "VMULL", "VQDMULL")>;
1621   def : InstRW<[SwiftWriteP1SixCycle],
1622         (instregex "VMULD", "VNMULD")>;
1623   def : InstRW<[SwiftWriteP1FourCycle],
1624         (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
1625         "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
1626   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
1627   def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
1628
1629   // 4.2.36 Advanced SIMD and VFP, Convert
1630   def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
1631   // Fixpoint conversions.
1632   def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
1633
1634   // 4.2.37 Advanced SIMD and VFP, Move
1635   def : InstRW<[SwiftWriteP0TwoCycle],
1636         (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
1637                    "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
1638                    "FCONST(D|S)")>;
1639   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
1640   def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
1641         (instregex "VQMOVN")>;
1642   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
1643   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
1644         (instregex "VDUP(8|16|32)")>;
1645   def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
1646   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
1647         (instregex "VMOVSR$", "VSETLN")>;
1648   def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
1649         (instregex "VMOVRR(D|S)$")>;
1650   def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
1651   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
1652                 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
1653                                SwiftWriteP1TwoCycle]>],
1654                 (instregex "VMOVSRR$")>;
1655   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
1656         (instregex "VGETLN(u|i)")>;
1657   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
1658                                SwiftWriteP01OneCycle]>],
1659         (instregex "VGETLNs")>;
1660
1661   // 4.2.38 Advanced SIMD and VFP, Move FPSCR
1662   // Serializing instructions.
1663   def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
1664     let Latency = 15;
1665     let ResourceCycles = [15];
1666   }
1667   def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
1668     let Latency = 15;
1669     let ResourceCycles = [15];
1670   }
1671   def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
1672     let Latency = 15;
1673     let ResourceCycles = [15];
1674   }
1675   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1676         (instregex "VMRS")>;
1677   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1678         (instregex "VMSR")>;
1679   // Not serializing.
1680   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
1681
1682   // 4.2.39 Advanced SIMD and VFP, Load Single Element
1683   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
1684
1685   // 4.2.40 Advanced SIMD and VFP, Store Single Element
1686   def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
1687
1688   // 4.2.41 Advanced SIMD and VFP, Load Multiple
1689   // 4.2.42 Advanced SIMD and VFP, Store Multiple
1690
1691   // Resource requirement for permuting, just reserves the resources.
1692   foreach Num = 1-28 in {
1693     def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
1694       let Latency = 0;
1695       let NumMicroOps = Num;
1696       let ResourceCycles = [Num];
1697     }
1698   }
1699
1700   // Pre RA pseudos - load/store to a Q register as a D register pair.
1701   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
1702
1703   // Post RA not modelled accurately. We assume that register use of width 64
1704   // bit maps to a D register, 128 maps to a Q register. Not all different kinds
1705   // are accurately represented.
1706   def SwiftWriteVLDM : SchedWriteVariant<[
1707     // Load of one S register.
1708     SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
1709     // Load of one D register.
1710     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
1711     // Load of 3 S register.
1712     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1713                                 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
1714                                 SwiftVLDMPerm3]>,
1715     // Load of a Q register (not neccessarily true). We should not be mapping to
1716     // 4 S registers, either.
1717     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
1718                                 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
1719     // Load of 5 S registers.
1720     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1721                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1722                                 SwiftWriteLM17CyNo,  SwiftWriteP01OneCycle,
1723                                 SwiftVLDMPerm5]>,
1724     // Load of 3 D registers. (Must also be able to handle s register list -
1725     // though, not accurate)
1726     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1727                                 SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
1728                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1729                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1730     // Load of 7 S registers.
1731     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1732                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1733                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1734                                 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
1735                                 SwiftVLDMPerm7]>,
1736     // Load of two Q registers.
1737     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1738                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1739                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1740                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1741                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>,
1742     // Load of 9 S registers.
1743     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1744                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1745                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1746                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1747                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1748                                 SwiftVLDMPerm9]>,
1749     // Load of 5 D registers.
1750     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1751                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1752                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1753                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1754                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1755                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1756     // Inaccurate: reuse describtion from 9 S registers.
1757     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1758                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1759                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1760                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1761                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1762                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1763                                 SwiftVLDMPerm9]>,
1764     // Load of three Q registers.
1765     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1766                                 SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1767                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1768                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1769                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1770                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1771                                 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
1772     // Inaccurate: reuse describtion from 9 S registers.
1773     SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1774                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1775                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1776                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1777                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1778                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1779                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1780                                 SwiftVLDMPerm9]>,
1781     // Load of 7 D registers inaccurate.
1782     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1783                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1784                                 SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
1785                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1786                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1787                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1788                                 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
1789     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1790                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1791                                 SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
1792                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1793                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1794                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1795                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1796                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1797                                 SwiftVLDMPerm9]>,
1798     // Load of 4 Q registers.
1799     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
1800                                 SwiftWriteLM11Cy, SwiftWriteLM14Cy,
1801                                 SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
1802                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1803                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1804                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1805                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1806                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1807                                 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
1808     // Unknow number of registers, just use resources for two registers.
1809     SchedVar<NoSchedPred,      [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1810                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1811                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1812                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1813                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1814                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1815                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1816                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1817                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1818                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1819                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1820                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1821                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1822                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1823                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1824                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1825                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>
1826   ]> { let Variadic = 1; }
1827
1828   def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
1829
1830   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
1831         (instregex "VLDM[SD](IA|DB)_UPD$")>;
1832
1833   def SwiftWriteVSTM : SchedWriteVariant<[
1834     // One S register.
1835     SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
1836     // One D register.
1837     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
1838     // Three S registers.
1839     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
1840     // Assume one Q register.
1841     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
1842     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
1843     // Assume three D registers.
1844     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
1845     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
1846     // Assume two Q registers.
1847     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
1848     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
1849     // Assume 5 D registers.
1850     SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
1851     SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
1852     // Asume three Q registers.
1853     SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
1854     SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
1855     // Assume 7 D registers.
1856     SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
1857     SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
1858     // Assume four Q registers.
1859     SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
1860     // Asumme two Q registers.
1861     SchedVar<NoSchedPred, [SwiftWriteSTM3]>
1862   ]> { let Variadic = 1; }
1863
1864   def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
1865
1866   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
1867         (instregex "VSTM[SD](IA|DB)_UPD")>;
1868
1869   // 4.2.43 Advanced SIMD, Element or Structure Load and Store
1870   def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1871       let Latency = 4;
1872       let ResourceCycles = [2];
1873   }
1874   def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1875       let Latency = 4;
1876       let ResourceCycles = [3];
1877   }
1878   foreach Num = 1-2 in {
1879     def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
1880       let Latency = 0;
1881       let NumMicroOps = Num;
1882       let ResourceCycles = [Num];
1883     }
1884   }
1885   // VLDx
1886   // Multiple structures.
1887   // Single element structure loads.
1888   // We assume aligned.
1889   // Single/two register.
1890   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
1891   def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
1892         (instregex "VLD1(d|q)(8|16|32|64)wb")>;
1893   // Three register.
1894   def : InstRW<[SwiftWrite3xP2FourCy],
1895         (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
1896   def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
1897         (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
1898   /// Four Register.
1899   def : InstRW<[SwiftWrite2xP2FourCy],
1900         (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
1901   def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
1902         (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
1903   // Two element structure loads.
1904   // Two/four register.
1905   def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
1906         (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
1907   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1908                 SwiftVLDMPerm2],
1909         (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
1910   // Three element structure.
1911   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1912                 SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1913         (instregex "VLD3(d|q)(8|16|32)$")>;
1914   def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1915         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
1916
1917   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1918                 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1919         (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
1920   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
1921                 SwiftWrite3xP2FourCy],
1922         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1923   // Four element structure loads.
1924   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1925                 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
1926                 SwiftWrite3xP2FourCy],
1927         (instregex "VLD4(d|q)(8|16|32)$")>;
1928   def : InstRW<[SwiftWriteLM11Cy,  SwiftExt2xP0, SwiftVLDMPerm4,
1929                 SwiftWrite3xP2FourCy],
1930         (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
1931   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1932                 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1933                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1934         (instregex "VLD4(d|q)(8|16|32)_UPD")>;
1935   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1936                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1937         (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1938
1939   // Single all/lane loads.
1940   // One element structure.
1941   def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
1942         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1943   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
1944         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
1945                   "VLD1LNq(8|16|32)Pseudo_UPD")>;
1946   // Two element structure.
1947   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
1948         (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
1949                    "VLD2LN(d|q)(8|16|32)Pseudo$")>;
1950   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
1951                 SwiftExt1xP0, SwiftVLDMPerm2],
1952         (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
1953   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1954                 SwiftExt1xP0, SwiftVLDMPerm2],
1955         (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
1956   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1957                 SwiftExt1xP0, SwiftVLDMPerm2],
1958         (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
1959   // Three element structure.
1960   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
1961                 SwiftVLDMPerm3],
1962         (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
1963                    "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1964   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
1965                 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
1966         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
1967   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
1968                 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
1969         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
1970   // Four element struture.
1971   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1972                 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
1973         (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
1974                    "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1975   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1976                 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
1977                 SwiftVLDMPerm5],
1978         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
1979   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
1980                 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
1981                 SwiftVLDMPerm5],
1982         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
1983   // VSTx
1984   // Multiple structures.
1985   // Single element structure store.
1986   def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
1987   def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
1988   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
1989         (instregex "VST1d(8|16|32|64)wb")>;
1990   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
1991         (instregex "VST1q(8|16|32|64)wb")>;
1992   def : InstRW<[SwiftWrite3xP2],
1993         (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
1994   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
1995         (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
1996   def : InstRW<[SwiftWrite4xP2],
1997         (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
1998   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
1999         (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
2000   // Two element structure store.
2001   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2002         (instregex "VST2(d|b)(8|16|32)$")>;
2003   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2004         (instregex "VST2(b|d)(8|16|32)wb")>;
2005   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2006         (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
2007   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2008         (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
2009   // Three element structure store.
2010   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2011         (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
2012   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2013         (instregex "VST3(d|q)(8|16|32)_UPD",
2014                    "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2015   // Four element structure store.
2016   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2017         (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
2018   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
2019         (instregex "VST4(d|q)(8|16|32)_UPD",
2020                    "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2021   // Single/all lane store.
2022   // One element structure.
2023   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2024         (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
2025   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2026         (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
2027   // Two element structure.
2028   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
2029         (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
2030   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
2031         (instregex "VST2LN(d|q)(8|16|32)_UPD",
2032                    "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
2033   // Three element structure.
2034   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2035         (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
2036   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2037         (instregex "VST3LN(d|q)(8|16|32)_UPD",
2038                    "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
2039   // Four element structure.
2040   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2041         (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
2042   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
2043         (instregex "VST4LN(d|q)(8|16|32)_UPD",
2044                    "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
2045
2046   // 4.2.44 VFP, Divide and Square Root
2047   def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2048     let NumMicroOps = 1;
2049     let Latency = 17;
2050     let ResourceCycles = [1, 15];
2051   }
2052   def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2053     let NumMicroOps = 1;
2054     let Latency = 32;
2055     let ResourceCycles = [1, 30];
2056   }
2057   def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
2058   def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
2059
2060   // Not specified.
2061   def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
2062   // Preload.
2063   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
2064     let ResourceCycles = [0];
2065   }
2066
2067 }