ARM sched model: Add integer arithmetic instructions on Swift
[oota-llvm.git] / lib / Target / ARM / ARMScheduleSwift.td
1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the Swift processor..
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // This section contains legacy support for itineraries. This is
16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18 def SW_DIS0 : FuncUnit;
19 def SW_DIS1 : FuncUnit;
20 def SW_DIS2 : FuncUnit;
21
22 def SW_ALU0 : FuncUnit;
23 def SW_ALU1 : FuncUnit;
24 def SW_LS   : FuncUnit;
25 def SW_IDIV : FuncUnit;
26 def SW_FDIV : FuncUnit;
27
28 // FIXME: Need bypasses.
29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30 //        IIC_iMOVix2ld better.
31 // FIXME: Model the special immediate shifts that are not microcoded.
32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33 //        to issue on pipe 1?
34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
35 // FIXME: Better model the microcode stages of multiply instructions, especially
36 //        conditional variants.
37 // FIXME: Add preload instruction when it is documented.
38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40 def SwiftItineraries : ProcessorItineraries<
41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42   //
43   // Move instructions, unconditional
44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
46                               [1]>,
47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
49                               [1]>,
50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
52                               [1]>,
53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
55                               [1]>,
56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
60                               [2]>,
61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
65                                  [3]>,
66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
69                                InstrStage<1, [SW_LS]>],
70                               [5]>,
71   //
72   // MVN instructions
73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
75                               [1]>,
76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
78                               [1]>,
79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
81                               [1]>,
82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
84                               [1]>,
85   //
86   // No operand cycles
87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89   //
90   // Binary Instructions that produce a result
91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
93                             [1, 1]>,
94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
96                             [1, 1, 1]>,
97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
99                             [2, 1, 1]>,
100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
102                             [2, 1, 1]>,
103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
105                             [2, 1, 1, 1]>,
106   //
107   // Bitwise Instructions that produce a result
108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
110                             [1, 1]>,
111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
113                             [1, 1, 1]>,
114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
116                             [2, 1, 1]>,
117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
119                             [2, 1, 1, 1]>,
120   //
121   // Unary Instructions that produce a result
122
123   // CLZ, RBIT, etc.
124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
126                             [1, 1]>,
127
128   // BFC, BFI, UBFX, SBFX
129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
131                             [2, 1]>,
132
133   //
134   // Zero and sign extension instructions
135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
137                             [1, 1]>,
138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
140                             [1, 1, 1]>,
141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
143                             [1, 1, 1, 1]>,
144   //
145   // Compare instructions
146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
148                               [1]>,
149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
151                               [1, 1]>,
152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
154                               [1, 1]>,
155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
157                               [1, 1, 1]>,
158   //
159   // Test instructions
160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
162                               [1]>,
163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
165                               [1, 1]>,
166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
168                               [1, 1]>,
169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
171                               [1, 1, 1]>,
172   //
173   // Move instructions, conditional
174   // FIXME: Correctly model the extra input dep on the destination.
175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
177                               [1]>,
178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
180                               [1, 1]>,
181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
183                               [1, 1]>,
184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
186                               [2, 1, 1]>,
187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
191                               [2]>,
192
193   // Integer multiply pipeline
194   //
195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196                                InstrStage<1, [SW_ALU0]>],
197                               [3, 1, 1]>,
198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199                                InstrStage<1, [SW_ALU0]>],
200                               [3, 1, 1, 1]>,
201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202                                InstrStage<1, [SW_ALU0]>],
203                               [4, 1, 1]>,
204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205                                InstrStage<1, [SW_ALU0]>],
206                               [4, 1, 1, 1]>,
207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
208                                InstrStage<1, [SW_DIS1], 0>,
209                                InstrStage<1, [SW_DIS2], 0>,
210                                InstrStage<1, [SW_ALU0], 1>,
211                                InstrStage<1, [SW_ALU0], 3>,
212                                InstrStage<1, [SW_ALU0]>],
213                               [5, 5, 1, 1]>,
214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
215                                InstrStage<1, [SW_DIS1], 0>,
216                                InstrStage<1, [SW_DIS2], 0>,
217                                InstrStage<1, [SW_ALU0], 1>,
218                                InstrStage<1, [SW_ALU0], 1>,
219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
221                               [5, 6, 1, 1]>,
222   //
223   // Integer divide
224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225                              InstrStage<1, [SW_ALU0], 0>,
226                              InstrStage<14, [SW_IDIV]>],
227                             [14, 1, 1]>,
228
229   // Integer load pipeline
230   // FIXME: The timings are some rough approximations
231   //
232   // Immediate offset
233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234                                  InstrStage<1, [SW_LS]>],
235                                 [3, 1]>,
236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237                                  InstrStage<1, [SW_LS]>],
238                                 [3, 1]>,
239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240                                  InstrStage<1, [SW_DIS1], 0>,
241                                  InstrStage<1, [SW_LS], 1>,
242                                  InstrStage<1, [SW_LS]>],
243                                 [3, 4, 1]>,
244   //
245   // Register offset
246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247                                  InstrStage<1, [SW_LS]>],
248                                 [3, 1, 1]>,
249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250                                  InstrStage<1, [SW_LS]>],
251                                 [3, 1, 1]>,
252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253                                  InstrStage<1, [SW_DIS1], 0>,
254                                  InstrStage<1, [SW_DIS2], 0>,
255                                  InstrStage<1, [SW_LS], 1>,
256                                  InstrStage<1, [SW_LS], 3>,
257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
258                                 [3, 4, 1, 1]>,
259   //
260   // Scaled register offset
261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
262                                  InstrStage<1, [SW_DIS1], 0>,
263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264                                  InstrStage<1, [SW_LS]>],
265                                 [5, 1, 1]>,
266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267                                  InstrStage<1, [SW_DIS1], 0>,
268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269                                  InstrStage<1, [SW_LS]>],
270                                 [5, 1, 1]>,
271   //
272   // Immediate offset with update
273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
274                                  InstrStage<1, [SW_DIS1], 0>,
275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276                                  InstrStage<1, [SW_LS]>],
277                                 [3, 1, 1]>,
278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279                                  InstrStage<1, [SW_DIS1], 0>,
280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281                                  InstrStage<1, [SW_LS]>],
282                                 [3, 1, 1]>,
283   //
284   // Register offset with update
285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
286                                  InstrStage<1, [SW_DIS1], 0>,
287                                  InstrStage<1, [SW_ALU0], 1>,
288                                  InstrStage<1, [SW_LS]>],
289                                 [3, 1, 1, 1]>,
290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291                                  InstrStage<1, [SW_DIS1], 0>,
292                                  InstrStage<1, [SW_ALU0], 1>,
293                                  InstrStage<1, [SW_LS]>],
294                                 [3, 1, 1, 1]>,
295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296                                  InstrStage<1, [SW_DIS1], 0>,
297                                  InstrStage<1, [SW_DIS2], 0>,
298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299                                  InstrStage<1, [SW_LS], 3>,
300                                  InstrStage<1, [SW_LS], 0>,
301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
302                                 [3, 4, 1, 1]>,
303   //
304   // Scaled register offset with update
305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306                                  InstrStage<1, [SW_DIS1], 0>,
307                                  InstrStage<1, [SW_DIS2], 0>,
308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309                                  InstrStage<1, [SW_LS], 3>,
310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
311                                 [5, 3, 1, 1]>,
312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313                                   InstrStage<1, [SW_DIS1], 0>,
314                                   InstrStage<1, [SW_DIS2], 0>,
315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316                                   InstrStage<1, [SW_LS], 0>,
317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
318                                 [5, 3, 1, 1]>,
319   //
320   // Load multiple, def is the 5th operand.
321   // FIXME: This assumes 3 to 4 registers.
322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
323                                 InstrStage<1, [SW_DIS1], 0>,
324                                 InstrStage<1, [SW_DIS2], 0>,
325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326                                 InstrStage<1, [SW_LS]>],
327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329   //
330   // Load multiple + update, defs are the 1st and 5th operands.
331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332                                 InstrStage<1, [SW_DIS1], 0>,
333                                 InstrStage<1, [SW_DIS2], 0>,
334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335                                 InstrStage<1, [SW_LS], 3>,
336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338   //
339   // Load multiple plus branch
340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341                                 InstrStage<1, [SW_DIS1], 0>,
342                                 InstrStage<1, [SW_DIS2], 0>,
343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344                                 InstrStage<1, [SW_LS]>],
345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346   //
347   // Pop, def is the 3rd operand.
348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
349                                 InstrStage<1, [SW_DIS1], 0>,
350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351                                 InstrStage<1, [SW_LS]>],
352                                [1, 1, 3], [], -1>, // dynamic uops
353   //
354   // Pop + branch, def is the 3rd operand.
355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
356                                 InstrStage<1, [SW_DIS1], 0>,
357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358                                 InstrStage<1, [SW_LS]>],
359                                [1, 1, 3], [], -1>, // dynamic uops
360
361   //
362   // iLoadi + iALUr for t2LDRpci_pic.
363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364                                 InstrStage<1, [SW_LS], 3>,
365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
366                                [4, 1]>,
367
368   // Integer store pipeline
369   ///
370   // Immediate offset
371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372                                  InstrStage<1, [SW_LS]>],
373                                 [1, 1]>,
374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375                                  InstrStage<1, [SW_LS]>],
376                                 [1, 1]>,
377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378                                  InstrStage<1, [SW_DIS1], 0>,
379                                  InstrStage<1, [SW_DIS2], 0>,
380                                  InstrStage<1, [SW_LS], 0>,
381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382                                  InstrStage<1, [SW_LS]>],
383                                 [1, 1]>,
384   //
385   // Register offset
386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387                                  InstrStage<1, [SW_LS]>],
388                                 [1, 1, 1]>,
389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390                                  InstrStage<1, [SW_LS]>],
391                                 [1, 1, 1]>,
392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393                                  InstrStage<1, [SW_DIS1], 0>,
394                                  InstrStage<1, [SW_DIS2], 0>,
395                                  InstrStage<1, [SW_LS], 0>,
396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397                                  InstrStage<1, [SW_LS]>],
398                                 [1, 1, 1]>,
399   //
400   // Scaled register offset
401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
402                                   InstrStage<1, [SW_DIS1], 0>,
403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404                                   InstrStage<1, [SW_LS]>],
405                                  [1, 1, 1]>,
406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407                                   InstrStage<1, [SW_DIS1], 0>,
408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409                                   InstrStage<1, [SW_LS]>],
410                                  [1, 1, 1]>,
411   //
412   // Immediate offset with update
413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
414                                   InstrStage<1, [SW_DIS1], 0>,
415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416                                   InstrStage<1, [SW_LS]>],
417                                  [1, 1, 1]>,
418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419                                   InstrStage<1, [SW_DIS1], 0>,
420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421                                   InstrStage<1, [SW_LS]>],
422                                  [1, 1, 1]>,
423   //
424   // Register offset with update
425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
426                                   InstrStage<1, [SW_DIS1], 0>,
427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428                                   InstrStage<1, [SW_LS]>],
429                                  [1, 1, 1, 1]>,
430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431                                   InstrStage<1, [SW_DIS1], 0>,
432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433                                   InstrStage<1, [SW_LS]>],
434                                  [1, 1, 1, 1]>,
435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436                                   InstrStage<1, [SW_DIS1], 0>,
437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438                                   InstrStage<1, [SW_LS]>],
439                                  [1, 1, 1, 1]>,
440   //
441   // Scaled register offset with update
442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
443                                     InstrStage<1, [SW_DIS1], 0>,
444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445                                     InstrStage<1, [SW_LS], 0>,
446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447                                    [3, 1, 1, 1]>,
448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449                                     InstrStage<1, [SW_DIS1], 0>,
450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451                                     InstrStage<1, [SW_LS], 0>,
452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453                                    [3, 1, 1, 1]>,
454   //
455   // Store multiple
456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457                                 InstrStage<1, [SW_DIS1], 0>,
458                                 InstrStage<1, [SW_DIS2], 0>,
459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460                                 InstrStage<1, [SW_LS], 1>,
461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462                                 InstrStage<1, [SW_LS], 1>,
463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464                                 InstrStage<1, [SW_LS]>],
465                                 [], [], -1>, // dynamic uops
466   //
467   // Store multiple + update
468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469                                 InstrStage<1, [SW_DIS1], 0>,
470                                 InstrStage<1, [SW_DIS2], 0>,
471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472                                 InstrStage<1, [SW_LS], 1>,
473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474                                 InstrStage<1, [SW_LS], 1>,
475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476                                 InstrStage<1, [SW_LS]>],
477                                [2], [], -1>, // dynamic uops
478
479   //
480   // Preload
481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483   // Branch
484   //
485   // no delay slots, so the latency of a branch is unimportant
486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
487
488   // FP Special Register to Integer Register File Move
489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
491                              [1]>,
492   //
493   // Single-precision FP Unary
494   //
495   // Most floating-point moves get issued on ALU0.
496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497                                InstrStage<1, [SW_ALU0]>],
498                               [2, 1]>,
499   //
500   // Double-precision FP Unary
501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502                                InstrStage<1, [SW_ALU0]>],
503                               [2, 1]>,
504
505   //
506   // Single-precision FP Compare
507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508                                InstrStage<1, [SW_ALU0]>],
509                               [1, 1]>,
510   //
511   // Double-precision FP Compare
512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513                                InstrStage<1, [SW_ALU0]>],
514                               [1, 1]>,
515   //
516   // Single to Double FP Convert
517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518                                InstrStage<1, [SW_ALU1]>],
519                               [4, 1]>,
520   //
521   // Double to Single FP Convert
522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523                                InstrStage<1, [SW_ALU1]>],
524                               [4, 1]>,
525
526   //
527   // Single to Half FP Convert
528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529                                InstrStage<1, [SW_DIS1], 0>,
530                                InstrStage<1, [SW_ALU1], 4>,
531                                InstrStage<1, [SW_ALU1]>],
532                               [6, 1]>,
533   //
534   // Half to Single FP Convert
535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536                                InstrStage<1, [SW_ALU1]>],
537                               [4, 1]>,
538
539   //
540   // Single-Precision FP to Integer Convert
541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542                                InstrStage<1, [SW_ALU1]>],
543                               [4, 1]>,
544   //
545   // Double-Precision FP to Integer Convert
546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547                                InstrStage<1, [SW_ALU1]>],
548                               [4, 1]>,
549   //
550   // Integer to Single-Precision FP Convert
551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552                                InstrStage<1, [SW_ALU1]>],
553                               [4, 1]>,
554   //
555   // Integer to Double-Precision FP Convert
556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557                                InstrStage<1, [SW_ALU1]>],
558                               [4, 1]>,
559   //
560   // Single-precision FP ALU
561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562                                InstrStage<1, [SW_ALU0]>],
563                               [2, 1, 1]>,
564   //
565   // Double-precision FP ALU
566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567                                InstrStage<1, [SW_ALU0]>],
568                               [2, 1, 1]>,
569   //
570   // Single-precision FP Multiply
571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572                                InstrStage<1, [SW_ALU1]>],
573                               [4, 1, 1]>,
574   //
575   // Double-precision FP Multiply
576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577                                InstrStage<1, [SW_ALU1]>],
578                               [6, 1, 1]>,
579   //
580   // Single-precision FP MAC
581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582                                InstrStage<1, [SW_ALU1]>],
583                               [8, 1, 1]>,
584   //
585   // Double-precision FP MAC
586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587                                InstrStage<1, [SW_ALU1]>],
588                               [12, 1, 1]>,
589   //
590   // Single-precision Fused FP MAC
591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592                                InstrStage<1, [SW_ALU1]>],
593                               [8, 1, 1]>,
594   //
595   // Double-precision Fused FP MAC
596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597                                InstrStage<1, [SW_ALU1]>],
598                               [12, 1, 1]>,
599   //
600   // Single-precision FP DIV
601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602                                InstrStage<1, [SW_ALU1], 0>,
603                                InstrStage<15, [SW_FDIV]>],
604                               [17, 1, 1]>,
605   //
606   // Double-precision FP DIV
607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608                                InstrStage<1, [SW_ALU1], 0>,
609                                InstrStage<30, [SW_FDIV]>],
610                               [32, 1, 1]>,
611   //
612   // Single-precision FP SQRT
613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614                                InstrStage<1, [SW_ALU1], 0>,
615                                InstrStage<15, [SW_FDIV]>],
616                               [17, 1]>,
617   //
618   // Double-precision FP SQRT
619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620                                InstrStage<1, [SW_ALU1], 0>,
621                                InstrStage<30, [SW_FDIV]>],
622                               [32, 1, 1]>,
623
624   //
625   // Integer to Single-precision Move
626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
627                                InstrStage<1, [SW_DIS1], 0>,
628                                InstrStage<1, [SW_LS], 4>,
629                                InstrStage<1, [SW_ALU0]>],
630                               [6, 1]>,
631   //
632   // Integer to Double-precision Move
633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634                                InstrStage<1, [SW_LS]>],
635                               [4, 1]>,
636   //
637   // Single-precision to Integer Move
638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639                                InstrStage<1, [SW_LS]>],
640                               [3, 1]>,
641   //
642   // Double-precision to Integer Move
643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
644                                InstrStage<1, [SW_DIS1], 0>,
645                                InstrStage<1, [SW_LS], 3>,
646                                InstrStage<1, [SW_LS]>],
647                               [3, 4, 1]>,
648   //
649   // Single-precision FP Load
650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651                                InstrStage<1, [SW_LS]>],
652                               [4, 1]>,
653   //
654   // Double-precision FP Load
655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656                                InstrStage<1, [SW_LS]>],
657                               [4, 1]>,
658   //
659   // FP Load Multiple
660   // FIXME: Assumes a single Q register.
661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662                                InstrStage<1, [SW_LS]>],
663                               [1, 1, 1, 4], [], -1>, // dynamic uops
664   //
665   // FP Load Multiple + update
666   // FIXME: Assumes a single Q register.
667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668                                InstrStage<1, [SW_DIS1], 0>,
669                                InstrStage<1, [SW_LS], 4>,
670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672   //
673   // Single-precision FP Store
674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675                                InstrStage<1, [SW_LS]>],
676                               [1, 1]>,
677   //
678   // Double-precision FP Store
679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680                                InstrStage<1, [SW_LS]>],
681                               [1, 1]>,
682   //
683   // FP Store Multiple
684   // FIXME: Assumes a single Q register.
685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686                                InstrStage<1, [SW_LS]>],
687                               [1, 1, 1], [], -1>, // dynamic uops
688   //
689   // FP Store Multiple + update
690   // FIXME: Assumes a single Q register.
691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692                                 InstrStage<1, [SW_DIS1], 0>,
693                                 InstrStage<1, [SW_LS], 4>,
694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
695                                [2, 1, 1, 1], [], -1>, // dynamic uops
696   // NEON
697   //
698   // Double-register Integer Unary
699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700                                InstrStage<1, [SW_ALU0]>],
701                               [4, 1]>,
702   //
703   // Quad-register Integer Unary
704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705                                InstrStage<1, [SW_ALU0]>],
706                               [4, 1]>,
707   //
708   // Double-register Integer Q-Unary
709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710                                InstrStage<1, [SW_ALU0]>],
711                               [4, 1]>,
712   //
713   // Quad-register Integer CountQ-Unary
714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715                                InstrStage<1, [SW_ALU0]>],
716                               [4, 1]>,
717   //
718   // Double-register Integer Binary
719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720                                InstrStage<1, [SW_ALU0]>],
721                               [2, 1, 1]>,
722   //
723   // Quad-register Integer Binary
724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725                                InstrStage<1, [SW_ALU0]>],
726                               [2, 1, 1]>,
727   //
728   // Double-register Integer Subtract
729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730                                InstrStage<1, [SW_ALU0]>],
731                               [2, 1, 1]>,
732   //
733   // Quad-register Integer Subtract
734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735                                InstrStage<1, [SW_ALU0]>],
736                               [2, 1, 1]>,
737   //
738   // Double-register Integer Shift
739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740                                InstrStage<1, [SW_ALU0]>],
741                               [2, 1, 1]>,
742   //
743   // Quad-register Integer Shift
744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745                                InstrStage<1, [SW_ALU0]>],
746                               [2, 1, 1]>,
747   //
748   // Double-register Integer Shift (4 cycle)
749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750                                InstrStage<1, [SW_ALU0]>],
751                               [4, 1, 1]>,
752   //
753   // Quad-register Integer Shift (4 cycle)
754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755                                InstrStage<1, [SW_ALU0]>],
756                               [4, 1, 1]>,
757   //
758   // Double-register Integer Binary (4 cycle)
759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760                                InstrStage<1, [SW_ALU0]>],
761                               [4, 1, 1]>,
762   //
763   // Quad-register Integer Binary (4 cycle)
764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765                                InstrStage<1, [SW_ALU0]>],
766                               [4, 1, 1]>,
767   //
768   // Double-register Integer Subtract (4 cycle)
769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770                                InstrStage<1, [SW_ALU0]>],
771                               [4, 1, 1]>,
772   //
773   // Quad-register Integer Subtract (4 cycle)
774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775                                InstrStage<1, [SW_ALU0]>],
776                               [4, 1, 1]>,
777
778   //
779   // Double-register Integer Count
780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781                                InstrStage<1, [SW_ALU0]>],
782                               [2, 1, 1]>,
783   //
784   // Quad-register Integer Count
785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786                                InstrStage<1, [SW_ALU0]>],
787                               [2, 1, 1]>,
788   //
789   // Double-register Absolute Difference and Accumulate
790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791                                InstrStage<1, [SW_ALU0]>],
792                               [4, 1, 1, 1]>,
793   //
794   // Quad-register Absolute Difference and Accumulate
795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796                                InstrStage<1, [SW_ALU0]>],
797                               [4, 1, 1, 1]>,
798   //
799   // Double-register Integer Pair Add Long
800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801                                InstrStage<1, [SW_ALU0]>],
802                               [4, 1, 1]>,
803   //
804   // Quad-register Integer Pair Add Long
805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806                                InstrStage<1, [SW_ALU0]>],
807                               [4, 1, 1]>,
808
809   //
810   // Double-register Integer Multiply (.8, .16)
811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812                                InstrStage<1, [SW_ALU1]>],
813                               [4, 1, 1]>,
814   //
815   // Quad-register Integer Multiply (.8, .16)
816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817                                InstrStage<1, [SW_ALU1]>],
818                               [4, 1, 1]>,
819
820   //
821   // Double-register Integer Multiply (.32)
822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823                                InstrStage<1, [SW_ALU1]>],
824                               [4, 1, 1]>,
825   //
826   // Quad-register Integer Multiply (.32)
827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828                                InstrStage<1, [SW_ALU1]>],
829                               [4, 1, 1]>,
830   //
831   // Double-register Integer Multiply-Accumulate (.8, .16)
832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833                                InstrStage<1, [SW_ALU1]>],
834                               [4, 1, 1, 1]>,
835   //
836   // Double-register Integer Multiply-Accumulate (.32)
837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838                                InstrStage<1, [SW_ALU1]>],
839                               [4, 1, 1, 1]>,
840   //
841   // Quad-register Integer Multiply-Accumulate (.8, .16)
842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843                                InstrStage<1, [SW_ALU1]>],
844                               [4, 1, 1, 1]>,
845   //
846   // Quad-register Integer Multiply-Accumulate (.32)
847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848                                InstrStage<1, [SW_ALU1]>],
849                               [4, 1, 1, 1]>,
850
851   //
852   // Move
853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854                                InstrStage<1, [SW_ALU0]>],
855                               [2, 1]>,
856   //
857   // Move Immediate
858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859                                InstrStage<1, [SW_ALU0]>],
860                               [2]>,
861   //
862   // Double-register Permute Move
863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864                                InstrStage<1, [SW_ALU1]>],
865                               [2, 1]>,
866   //
867   // Quad-register Permute Move
868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869                                InstrStage<1, [SW_ALU1]>],
870                               [2, 1]>,
871   //
872   // Integer to Single-precision Move
873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
874                                InstrStage<1, [SW_DIS1], 0>,
875                                InstrStage<1, [SW_LS], 4>,
876                                InstrStage<1, [SW_ALU0]>],
877                               [6, 1]>,
878   //
879   // Integer to Double-precision Move
880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881                                InstrStage<1, [SW_LS]>],
882                               [4, 1, 1]>,
883   //
884   // Single-precision to Integer Move
885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886                                InstrStage<1, [SW_LS]>],
887                               [3, 1]>,
888   //
889   // Double-precision to Integer Move
890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
891                                InstrStage<1, [SW_DIS1], 0>,
892                                InstrStage<1, [SW_LS], 3>,
893                                InstrStage<1, [SW_LS]>],
894                               [3, 4, 1]>,
895   //
896   // Integer to Lane Move
897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899                                InstrStage<1, [SW_DIS1], 0>,
900                                InstrStage<1, [SW_LS], 4>,
901                                InstrStage<1, [SW_ALU0]>],
902                               [6, 1]>,
903
904   //
905   // Vector narrow move
906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907                                InstrStage<1, [SW_ALU1]>],
908                               [2, 1]>,
909   //
910   // Double-register FP Unary
911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912   //        and they issue on a different pipeline.
913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914                                InstrStage<1, [SW_ALU0]>],
915                               [2, 1]>,
916   //
917   // Quad-register FP Unary
918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919   //        and they issue on a different pipeline.
920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921                                InstrStage<1, [SW_ALU0]>],
922                               [2, 1]>,
923   //
924   // Double-register FP Binary
925   // FIXME: We're using this itin for many instructions.
926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927                                InstrStage<1, [SW_ALU0]>],
928                               [4, 1, 1]>,
929
930   //
931   // VPADD, etc.
932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933                                InstrStage<1, [SW_ALU0]>],
934                               [4, 1, 1]>,
935   //
936   // Double-register FP VMUL
937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938                                InstrStage<1, [SW_ALU1]>],
939                               [4, 1, 1]>,
940   //
941   // Quad-register FP Binary
942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943                                InstrStage<1, [SW_ALU0]>],
944                               [4, 1, 1]>,
945   //
946   // Quad-register FP VMUL
947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948                                InstrStage<1, [SW_ALU1]>],
949                               [4, 1, 1]>,
950   //
951   // Double-register FP Multiple-Accumulate
952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953                                InstrStage<1, [SW_ALU1]>],
954                               [8, 1, 1]>,
955   //
956   // Quad-register FP Multiple-Accumulate
957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958                                InstrStage<1, [SW_ALU1]>],
959                               [8, 1, 1]>,
960   //
961   // Double-register Fused FP Multiple-Accumulate
962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963                                InstrStage<1, [SW_ALU1]>],
964                               [8, 1, 1]>,
965   //
966   // Quad-register FusedF P Multiple-Accumulate
967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968                                InstrStage<1, [SW_ALU1]>],
969                               [8, 1, 1]>,
970   //
971   // Double-register Reciprical Step
972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973                                InstrStage<1, [SW_ALU1]>],
974                               [8, 1, 1]>,
975   //
976   // Quad-register Reciprical Step
977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978                                InstrStage<1, [SW_ALU1]>],
979                               [8, 1, 1]>,
980   //
981   // Double-register Permute
982   // FIXME: The latencies are unclear from the documentation.
983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
984                                InstrStage<1, [SW_DIS1], 0>,
985                                InstrStage<1, [SW_DIS2], 0>,
986                                InstrStage<1, [SW_ALU1], 2>,
987                                InstrStage<1, [SW_ALU1], 2>,
988                                InstrStage<1, [SW_ALU1]>],
989                               [3, 4, 3, 4]>,
990   //
991   // Quad-register Permute
992   // FIXME: The latencies are unclear from the documentation.
993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
994                                InstrStage<1, [SW_DIS1], 0>,
995                                InstrStage<1, [SW_DIS2], 0>,
996                                InstrStage<1, [SW_ALU1], 2>,
997                                InstrStage<1, [SW_ALU1], 2>,
998                                InstrStage<1, [SW_ALU1]>],
999                               [3, 4, 3, 4]>,
1000   //
1001   // Quad-register Permute (3 cycle issue on A9)
1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
1003                                InstrStage<1, [SW_DIS1], 0>,
1004                                InstrStage<1, [SW_DIS2], 0>,
1005                                InstrStage<1, [SW_ALU1], 2>,
1006                                InstrStage<1, [SW_ALU1], 2>,
1007                                InstrStage<1, [SW_ALU1]>],
1008                               [3, 4, 3, 4]>,
1009
1010   //
1011   // Double-register VEXT
1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013                                InstrStage<1, [SW_ALU1]>],
1014                               [2, 1, 1]>,
1015   //
1016   // Quad-register VEXT
1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018                                InstrStage<1, [SW_ALU1]>],
1019                               [2, 1, 1]>,
1020   //
1021   // VTB
1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023                                InstrStage<1, [SW_ALU1]>],
1024                               [2, 1, 1]>,
1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
1026                                InstrStage<1, [SW_DIS1], 0>,
1027                                InstrStage<1, [SW_ALU1], 2>,
1028                                InstrStage<1, [SW_ALU1]>],
1029                               [4, 1, 3, 3]>,
1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
1031                                InstrStage<1, [SW_DIS1], 0>,
1032                                InstrStage<1, [SW_DIS2], 0>,
1033                                InstrStage<1, [SW_ALU1], 2>,
1034                                InstrStage<1, [SW_ALU1], 2>,
1035                                InstrStage<1, [SW_ALU1]>],
1036                               [6, 1, 3, 5, 5]>,
1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
1038                                InstrStage<1, [SW_DIS1], 0>,
1039                                InstrStage<1, [SW_DIS2], 0>,
1040                                InstrStage<1, [SW_ALU1], 2>,
1041                                InstrStage<1, [SW_ALU1], 2>,
1042                                InstrStage<1, [SW_ALU1], 2>,
1043                                InstrStage<1, [SW_ALU1]>],
1044                               [8, 1, 3, 5, 7, 7]>,
1045   //
1046   // VTBX
1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048                                InstrStage<1, [SW_ALU1]>],
1049                               [2, 1, 1]>,
1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
1051                                InstrStage<1, [SW_DIS1], 0>,
1052                                InstrStage<1, [SW_ALU1], 2>,
1053                                InstrStage<1, [SW_ALU1]>],
1054                               [4, 1, 3, 3]>,
1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
1056                                InstrStage<1, [SW_DIS1], 0>,
1057                                InstrStage<1, [SW_DIS2], 0>,
1058                                InstrStage<1, [SW_ALU1], 2>,
1059                                InstrStage<1, [SW_ALU1], 2>,
1060                                InstrStage<1, [SW_ALU1]>],
1061                               [6, 1, 3, 5, 5]>,
1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
1063                                InstrStage<1, [SW_DIS1], 0>,
1064                                InstrStage<1, [SW_DIS2], 0>,
1065                                InstrStage<1, [SW_ALU1], 2>,
1066                                InstrStage<1, [SW_ALU1], 2>,
1067                                InstrStage<1, [SW_ALU1], 2>,
1068                                InstrStage<1, [SW_ALU1]>],
1069                               [8, 1, 3, 5, 7, 7]>
1070 ]>;
1071
1072 // ===---------------------------------------------------------------------===//
1073 // This following definitions describe the simple machine model which
1074 // will replace itineraries.
1075
1076 // Swift machine model for scheduling and other instruction cost heuristics.
1077 def SwiftModel : SchedMachineModel {
1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
1079   let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
1080   let LoadLatency = 3;
1081   let MispredictPenalty = 14; // A branch direction mispredict.
1082
1083   let Itineraries = SwiftItineraries;
1084 }
1085
1086 // Swift predicates.
1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
1089 // Swift resource mapping.
1090 let SchedModel = SwiftModel in {
1091   // Processor resources.
1092   def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093   def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094   def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095   def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096   def SwiftUnitDiv : ProcResource<1>;
1097
1098   // Generic resource requirements.
1099   def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
1100   def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
1101   def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
1102   def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
1103   def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1104     let Latency = 4;
1105   }
1106   def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1107     let Latency = 6;
1108   }
1109   def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
1110   def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
1111   def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
1112   def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
1113   def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
1114   def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
1115   def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
1116   def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
1117   def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
1118   def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
1119                                                       SwiftUnitP01]> {
1120     let Latency = 3;
1121     let NumMicroOps = 2;
1122   }
1123   def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1124     let Latency = 3;
1125     let NumMicroOps = 3;
1126     let ResourceCycles = [3];
1127   }
1128   // Plain load without writeback.
1129   def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
1130     let Latency = 3;
1131   }
1132   def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
1133     let Latency = 4;
1134   }
1135   // A store does not write to a register.
1136   def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
1137     let Latency = 0;
1138   }
1139   foreach Num = 1-4 in {
1140     def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
1141   }
1142   def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
1143                                                     SwiftWriteP01OneCycle,
1144                                                     SwiftWriteP2ThreeCycle]>;
1145   // 4.2.4 Arithmetic and Logical.
1146   // ALU operation register shifted by immediate variant.
1147   def SwiftWriteALUsi : SchedWriteVariant<[
1148     // lsl #2, lsl #1, or lsr #1.
1149     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
1150     SchedVar<NoSchedPred,             [WriteALU]>
1151   ]>;
1152   def SwiftWriteALUsr : SchedWriteVariant<[
1153     SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
1154     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1155   ]>;
1156   def SwiftWriteALUSsr : SchedWriteVariant<[
1157     SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
1158     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1159   ]>;
1160   def SwiftReadAdvanceALUsr : SchedReadVariant<[
1161     SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
1162     SchedVar<NoSchedPred,      [NoReadAdvance]>
1163   ]>;
1164   // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
1165   // AND,BIC,EOR,ORN,ORR
1166   // CLZ,RBIT,REV,REV16,REVSH,PKH
1167   def : WriteRes<WriteALU, [SwiftUnitP01]>;
1168   def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1169   def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1170   def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
1171   def : ReadAdvance<ReadALU, 0>;
1172   def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
1173
1174
1175   def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
1176     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
1177     SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
1178   ]>;
1179
1180   // 4.2.5 Integer comparison
1181   def : WriteRes<WriteCMP, [SwiftUnitP01]>;
1182   def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
1183   def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
1184
1185   // 4.2.6 Shift, Move
1186   // Shift
1187   //  ASR,LSL,ROR,RRX
1188   //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
1189   // Move
1190   //  MOV,MVN
1191   //  MOVT
1192   // Sign/Zero extension
1193   def : InstRW<[SwiftWriteP01OneCycle],
1194                (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
1195                           "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
1196                           "t2UXTB16")>;
1197   // Pseudo instructions.
1198   def : InstRW<[SwiftWriteP01OneCycle2x],
1199         (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
1200                    "t2MOVi32imm", "t2MOV_ga_dyn")>;
1201   def : InstRW<[SwiftWriteP01OneCycle3x],
1202         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
1203   def : InstRW<[SwiftWriteP01OneCycle2x_load],
1204         (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
1205
1206   def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
1207
1208   def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
1209     SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
1210     SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
1211   ]>;
1212
1213   // 4.2.7 Select
1214   // SEL
1215   def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
1216
1217   // 4.2.8 Bitfield
1218   // BFI,BFC, SBFX,UBFX
1219   def : InstRW< [SwiftWriteP01TwoCycle],
1220         (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
1221         "(t|t2)UBFX", "(t|t2)SBFX")>;
1222
1223   // 4.2.9 Saturating arithmetic
1224   def : InstRW< [SwiftWriteP01TwoCycle],
1225         (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
1226         "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
1227         "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
1228         "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
1229         "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
1230         "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
1231
1232   // 4.2.10 Parallel Arithmetic
1233   // Not flag setting.
1234   def : InstRW< [SwiftWriteALUsr],
1235         (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
1236         "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
1237         "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
1238         "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
1239   // Flag setting.
1240   def : InstRW< [SwiftWriteP01TwoCycle],
1241        (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
1242        "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
1243        "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
1244        "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
1245        "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
1246        "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
1247
1248   // 4.2.11 Sum of Absolute Difference
1249   def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
1250   def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
1251         (instregex "USADA8")>;
1252
1253   // 4.2.12 Integer Multiply (32-bit result)
1254   // Two sources.
1255   def : InstRW< [SwiftWriteP0FourCycle],
1256         (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
1257         "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
1258         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
1259         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
1260
1261   def SwiftWriteP0P01FiveCycleTwoUops :
1262       SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
1263     let Latency = 5;
1264   }
1265
1266   def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
1267     SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
1268     SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
1269   ]>;
1270
1271   def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
1272      SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
1273      SchedVar<NoSchedPred,      [ReadALU]>
1274   ]>;
1275
1276   // Multiply accumulate, three sources
1277   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1278                  SwiftReadAdvanceFourCyclesPred],
1279         (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
1280         "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
1281         "t2SMMLSR")>;
1282
1283   // 4.2.13 Integer Multiply (32-bit result, Q flag)
1284   def : InstRW< [SwiftWriteP0FourCycle],
1285         (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
1286   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1287                  SwiftReadAdvanceFourCyclesPred],
1288         (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
1289         "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
1290         "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
1291   def : InstRW< [SwiftPredP0P01FourFiveCycle],
1292         (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
1293
1294   def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1295     let Latency = 5;
1296     let NumMicroOps = 3;
1297     let ResourceCycles = [2, 1];
1298   }
1299   def SwiftWrite1Cycle : SchedWriteRes<[]> {
1300     let Latency = 1;
1301     let NumMicroOps = 0;
1302   }
1303   def SwiftWrite5Cycle : SchedWriteRes<[]> {
1304     let Latency = 5;
1305     let NumMicroOps = 0;
1306   }
1307   def SwiftWrite6Cycle : SchedWriteRes<[]> {
1308     let Latency = 6;
1309     let NumMicroOps = 0;
1310   }
1311
1312   // 4.2.14 Integer Multiply, Long
1313   def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
1314         (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
1315
1316   def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1317     let Latency = 7;
1318     let NumMicroOps = 5;
1319     let ResourceCycles = [2, 3];
1320   }
1321
1322   // 4.2.15 Integer Multiply Accumulate, Long
1323   // 4.2.16 Integer Multiply Accumulate, Dual
1324   // 4.2.17 Integer Multiply Accumulate Accumulate, Long
1325   // We are being a bit inaccurate here.
1326   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
1327                  SchedReadAdvance<4>, SchedReadAdvance<3>],
1328         (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
1329         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
1330         "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
1331         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
1332         "t2UMAAL")>;
1333
1334   def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
1335     let NumMicroOps = 1;
1336     let Latency = 14;
1337     let ResourceCycles = [1, 14];
1338   }
1339   // 4.2.18 Integer Divide
1340   def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
1341   def : InstRW < [],
1342         (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
1343   // 4.2.26 Branch
1344   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
1345   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
1346   def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
1347
1348   // 4.2.36 Advanced SIMD and VFP, Convert
1349   def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
1350   // Fixpoint conversions.
1351   def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
1352   // Preload.
1353   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
1354     let ResourceCycles = [0];
1355   }
1356
1357 }