do some serious surgery on CellSPU to get it back into a world
[oota-llvm.git] / lib / Target / CellSPU / SPU64InstrInfo.td
1 //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
2 //
3 //                     Cell SPU 64-bit operations
4 //
5 //===----------------------------------------------------------------------===//
6
7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
8 // 64-bit comparisons:
9 //
10 // 1. The instruction sequences for vector vice scalar differ by a
11 //    constant. In the scalar case, we're only interested in the
12 //    top two 32-bit slots, whereas we're interested in an exact
13 //    all-four-slot match in the vector case.
14 //
15 // 2. There are no "immediate" forms, since loading 64-bit constants
16 //    could be a constant pool load.
17 //
18 // 3. i64 setcc results are i32, which are subsequently converted to a FSM
19 //    mask when used in a select pattern.
20 //
21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22 //    [Note: this may be moot, since gb produces v4i32 or r32.]
23 //
24 // 5. The code sequences for r64 and v2i64 are probably overly conservative,
25 //    compared to the code that gcc produces.
26 //
27 // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
28 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
29
30 // selb instruction definition for i64. Note that the selection mask is
31 // a vector, produced by various forms of FSM:
32 def SELBr64_cond:
33   SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
34            [/* no pattern */]>;
35
36 // The generic i64 select pattern, which assumes that the comparison result
37 // is in a 32-bit register that contains a select mask pattern (i.e., gather
38 // bits result):
39
40 def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
41           (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
42
43 // select the negative condition:
44 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
45   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
46       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
47
48 // setcc the negative condition:
49 class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
50   Pat<(cond R64C:$rA, R64C:$rB),
51       (XORIr32 compare.Fragment, -1)>;
52
53 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
54 // The i64 seteq fragment that does the scalar->vector conversion and
55 // comparison:
56 def CEQr64compare:
57     CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
58                                            (ORv2i64_i64 R64C:$rB))), 0xb)>;
59
60 // The i64 seteq fragment that does the vector comparison
61 def CEQv2i64compare:
62     CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
63
64 // i64 seteq (equality): the setcc result is i32, which is converted to a
65 // vector FSM mask when used in a select pattern.
66 //
67 // v2i64 seteq (equality): the setcc result is v4i32
68 multiclass CompareEqual64 {
69   // Plain old comparison, converts back to i32 scalar
70   def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
71   def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
72
73   // SELB mask from FSM:
74   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
75   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
76 }
77
78 defm I64EQ: CompareEqual64;
79
80 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
81 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
82
83 // i64 setne:
84 def : I64SETCCNegCond<setne, I64EQr64>;
85 def : I64SELECTNegCond<setne, I64EQr64>;
86
87 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
88 // i64 setugt/setule:
89 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
90
91 def CLGTr64ugt:
92     CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
93
94 def CLGTr64eq:
95     CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
96     
97 def CLGTr64compare:
98     CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
99                         (XSWDv2i64 CLGTr64ugt.Fragment),
100                         CLGTr64eq.Fragment)>;
101
102 def CLGTv2i64ugt:
103     CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
104
105 def CLGTv2i64eq:
106     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
107     
108 def CLGTv2i64compare:
109     CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
110                         (XSWDv2i64 CLGTr64ugt.Fragment),
111                         CLGTv2i64eq.Fragment)>;
112
113 multiclass CompareLogicalGreaterThan64 {
114   // Plain old comparison, converts back to i32 scalar
115   def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
116   def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
117
118   // SELB mask from FSM:
119   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
120   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
121 }
122
123 defm I64LGT: CompareLogicalGreaterThan64;
124
125 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
126 //def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
127 //          I64LGTv2i64.Fragment>;
128
129 // i64 setult:
130 def : I64SETCCNegCond<setule, I64LGTr64>;
131 def : I64SELECTNegCond<setule, I64LGTr64>;
132
133 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
134 // i64 setuge/setult:
135 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
136
137 def CLGEr64compare:
138     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
139                                           CLGTr64eq.Fragment)), 0xb)>;
140
141 def CLGEv2i64compare:
142     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
143                                           CLGTv2i64eq.Fragment)), 0xf)>;
144
145 multiclass CompareLogicalGreaterEqual64 {
146   // Plain old comparison, converts back to i32 scalar
147   def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
148   def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
149
150   // SELB mask from FSM:
151   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
152   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
153 }
154
155 defm I64LGE: CompareLogicalGreaterEqual64;
156
157 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
158 def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
159           I64LGEv2i64.Fragment>;
160                   
161
162 // i64 setult:
163 def : I64SETCCNegCond<setult, I64LGEr64>;
164 def : I64SELECTNegCond<setult, I64LGEr64>;
165
166 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
167 // i64 setgt/setle:
168 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
169
170 def CGTr64sgt:
171     CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
172
173 def CGTr64eq:
174     CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
175     
176 def CGTr64compare:
177     CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
178                         (XSWDv2i64 CGTr64sgt.Fragment),
179                         CGTr64eq.Fragment)>;
180
181 def CGTv2i64sgt:
182     CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
183
184 def CGTv2i64eq:
185     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
186     
187 def CGTv2i64compare:
188     CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
189                         (XSWDv2i64 CGTr64sgt.Fragment),
190                         CGTv2i64eq.Fragment)>;
191
192 multiclass CompareGreaterThan64 {
193   // Plain old comparison, converts back to i32 scalar
194   def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
195   def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
196
197   // SELB mask from FSM:
198   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
199   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
200 }
201
202 defm I64GT: CompareLogicalGreaterThan64;
203
204 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
205 //def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
206 //                  I64GTv2i64.Fragment>;
207
208 // i64 setult:
209 def : I64SETCCNegCond<setle, I64GTr64>;
210 def : I64SELECTNegCond<setle, I64GTr64>;
211
212 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
213 // i64 setge/setlt:
214 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
215     
216 def CGEr64compare:
217     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
218                                           CGTr64eq.Fragment)), 0xb)>;
219
220 def CGEv2i64compare:
221     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
222                                           CGTv2i64eq.Fragment)), 0xf)>;
223
224 multiclass CompareGreaterEqual64 {
225   // Plain old comparison, converts back to i32 scalar
226   def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
227   def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
228
229   // SELB mask from FSM:
230   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
231   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
232 }
233
234 defm I64GE: CompareGreaterEqual64;
235
236 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
237 def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
238           I64GEv2i64.Fragment>;
239
240 // i64 setult:
241 def : I64SETCCNegCond<setlt, I64GEr64>;
242 def : I64SELECTNegCond<setlt, I64GEr64>;
243
244 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
245 // v2i64, i64 add
246 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
247
248 class v2i64_add_cg<dag lhs, dag rhs>:
249     CodeFrag<(CGv4i32 lhs, rhs)>;
250
251 class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
252     CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
253
254 class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
255     v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
256
257 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
258            (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
259                                   (ORv2i64_i64 R64C:$rB),
260                                   (v4i32 VECREG:$rCGmask)>.Fragment)>;
261
262 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
263                     (v4i32 VECREG:$rCGmask)),
264            v2i64_add<(v2i64 VECREG:$rA),
265                      (v2i64 VECREG:$rB),
266                      (v4i32 VECREG:$rCGmask)>.Fragment>;
267
268 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
269 // v2i64, i64 subtraction
270 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
271
272 class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
273
274 class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
275     CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
276
277 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
278            (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
279                                   (ORv2i64_i64 R64C:$rB),
280                                   v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
281                                                (ORv2i64_i64 R64C:$rB)>.Fragment,
282                                   (v4i32 VECREG:$rCGmask)>.Fragment)>;
283
284 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
285                     (v4i32 VECREG:$rCGmask)),
286            v2i64_sub<(v2i64 VECREG:$rA),
287                      (v2i64 VECREG:$rB),
288                      v2i64_sub_bg<(v2i64 VECREG:$rA),
289                                   (v2i64 VECREG:$rB)>.Fragment,
290                      (v4i32 VECREG:$rCGmask)>.Fragment>;
291
292 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
293 // v2i64, i64 multiply
294 //
295 // Note: i64 multiply is simply the vector->scalar conversion of the
296 // full-on v2i64 multiply, since the entire vector has to be manipulated
297 // anyway.
298 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
299
300 class v2i64_mul_ahi64<dag rA> :
301     CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
302
303 class v2i64_mul_bhi64<dag rB> :
304     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
305
306 class v2i64_mul_alo64<dag rB> :
307     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
308
309 class v2i64_mul_blo64<dag rB> :
310     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
311
312 class v2i64_mul_ashlq2<dag rA>:
313     CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
314
315 class v2i64_mul_ashlq4<dag rA>:
316     CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
317
318 class v2i64_mul_bshlq2<dag rB> :
319     CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
320
321 class v2i64_mul_bshlq4<dag rB> :
322     CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
323
324 class v2i64_highprod<dag rA, dag rB>:
325     CodeFrag<(Av4i32
326                 (Av4i32
327                   (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
328                              v2i64_mul_ahi64<rA>.Fragment),
329                   (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
330                              v2i64_mul_bshlq4<rB>.Fragment)),
331                 (Av4i32
332                   (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
333                              v2i64_mul_ashlq4<rA>.Fragment),
334                   (Av4i32
335                       (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
336                                  v2i64_mul_bhi64<rB>.Fragment),
337                     (Av4i32
338                       (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
339                                  v2i64_mul_bhi64<rB>.Fragment),
340                       (Av4i32
341                         (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
342                                    v2i64_mul_bshlq2<rB>.Fragment),
343                         (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
344                                    v2i64_mul_bshlq2<rB>.Fragment))))))>;
345
346 class v2i64_mul_a3_b3<dag rA, dag rB>:
347     CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
348                         v2i64_mul_blo64<rB>.Fragment)>;
349
350 class v2i64_mul_a2_b3<dag rA, dag rB>:
351     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
352                           (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
353                                        v2i64_mul_bshlq2<rB>.Fragment), 0x2),
354                         (ILv4i32 0),
355                         (FSMBIv4i32 0xc3c3))>;
356
357 class v2i64_mul_a3_b2<dag rA, dag rB>:
358     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
359                           (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
360                                        v2i64_mul_ashlq2<rA>.Fragment), 0x2),
361                         (ILv4i32 0),
362                         (FSMBIv4i32 0xc3c3))>;
363
364 class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
365     v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
366                         v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
367               v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
368
369 class v2i64_mul<dag rA, dag rB, dag rCGmask>:
370     v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
371               (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
372                          (ILv4i32 0),
373                          (FSMBIv4i32 0x0f0f)),
374               rCGmask>;
375
376 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
377           (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
378                                  (ORv2i64_i64 R64C:$rB),
379                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
380
381 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
382                     (v4i32 VECREG:$rCGmask)),
383           v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
384                     (v4i32 VECREG:$rCGmask)>.Fragment>;
385
386 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
387 // f64 comparisons
388 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
389
390 // selb instruction definition for i64. Note that the selection mask is
391 // a vector, produced by various forms of FSM:
392 def SELBf64_cond:
393    SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
394             [(set R64FP:$rT,
395                   (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;