CellSPU:
[oota-llvm.git] / lib / Target / CellSPU / SPU64InstrInfo.td
1 //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
2 //
3 //                     Cell SPU 64-bit operations
4 //
5 //===----------------------------------------------------------------------===//
6
7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
8 // 64-bit comparisons:
9 //
10 // 1. The instruction sequences for vector vice scalar differ by a
11 //    constant. In the scalar case, we're only interested in the
12 //    top two 32-bit slots, whereas we're interested in an exact
13 //    all-four-slot match in the vector case.
14 //
15 // 2. There are no "immediate" forms, since loading 64-bit constants
16 //    could be a constant pool load.
17 //
18 // 3. i64 setcc results are i32, which are subsequently converted to a FSM
19 //    mask when used in a select pattern.
20 //
21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22 //    [Note: this may be moot, since gb produces v4i32 or r32.]
23 //
24 // 5. The code sequences for r64 and v2i64 are probably overly conservative,
25 //    compared to the code that gcc produces.
26 //
27 // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
28 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
29
30 // selb instruction definition for i64. Note that the selection mask is
31 // a vector, produced by various forms of FSM:
32 def SELBr64_cond:
33   SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
34            [/* no pattern */]>;
35
36 // The generic i64 select pattern, which assumes that the comparison result
37 // is in a 32-bit register that contains a select mask pattern (i.e., gather
38 // bits result):
39
40 def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
41           (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
42
43 // select the negative condition:
44 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
45   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
46       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
47
48 // setcc the negative condition:
49 class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
50   Pat<(cond R64C:$rA, R64C:$rB),
51       (XORIr32 compare.Fragment, -1)>;
52
53 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
54 // The i64 seteq fragment that does the scalar->vector conversion and
55 // comparison:
56 def CEQr64compare:
57     CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
58                                            (ORv2i64_i64 R64C:$rB))), 0xb)>;
59
60 // The i64 seteq fragment that does the vector comparison
61 def CEQv2i64compare:
62     CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
63
64 // i64 seteq (equality): the setcc result is i32, which is converted to a
65 // vector FSM mask when used in a select pattern.
66 //
67 // v2i64 seteq (equality): the setcc result is v4i32
68 multiclass CompareEqual64 {
69   // Plain old comparison, converts back to i32 scalar
70   def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
71   def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
72
73   // SELB mask from FSM:
74   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
75   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
76 }
77
78 defm I64EQ: CompareEqual64;
79
80 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
81 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
82
83 // i64 setne:
84 def : I64SETCCNegCond<setne, I64EQr64>;
85 def : I64SELECTNegCond<setne, I64EQr64>;
86
87 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
88 // i64 setugt/setule:
89 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
90
91 def CLGTr64ugt:
92     CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
93
94 def CLGTr64eq:
95     CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
96     
97 def CLGTr64compare:
98     CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
99                         (XSWDv2i64 CLGTr64ugt.Fragment),
100                         CLGTr64eq.Fragment)>;
101
102 def CLGTv2i64ugt:
103     CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
104
105 def CLGTv2i64eq:
106     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
107     
108 def CLGTv2i64compare:
109     CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
110                         (XSWDv2i64 CLGTr64ugt.Fragment),
111                         CLGTv2i64eq.Fragment)>;
112
113 multiclass CompareLogicalGreaterThan64 {
114   // Plain old comparison, converts back to i32 scalar
115   def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
116   def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
117
118   // SELB mask from FSM:
119   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
120   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
121 }
122
123 defm I64LGT: CompareLogicalGreaterThan64;
124
125 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
126 def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
127                   I64LGTv2i64.Fragment>;
128
129 // i64 setult:
130 def : I64SETCCNegCond<setule, I64LGTr64>;
131 def : I64SELECTNegCond<setule, I64LGTr64>;
132
133 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
134 // i64 setuge/setult:
135 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
136
137 def CLGEr64compare:
138     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
139                                           CLGTr64eq.Fragment)), 0xb)>;
140
141 def CLGEv2i64compare:
142     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
143                                           CLGTv2i64eq.Fragment)), 0xf)>;
144
145 multiclass CompareLogicalGreaterEqual64 {
146   // Plain old comparison, converts back to i32 scalar
147   def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
148   def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
149
150   // SELB mask from FSM:
151   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
152   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
153 }
154
155 defm I64LGE: CompareLogicalGreaterEqual64;
156
157 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
158 def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
159                   I64LGEv2i64.Fragment>;
160
161 // i64 setult:
162 def : I64SETCCNegCond<setult, I64LGEr64>;
163 def : I64SELECTNegCond<setult, I64LGEr64>;
164
165 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
166 // i64 setgt/setle:
167 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
168
169 def CGTr64sgt:
170     CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
171
172 def CGTr64eq:
173     CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
174     
175 def CGTr64compare:
176     CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
177                         (XSWDv2i64 CGTr64sgt.Fragment),
178                         CGTr64eq.Fragment)>;
179
180 def CGTv2i64sgt:
181     CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
182
183 def CGTv2i64eq:
184     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
185     
186 def CGTv2i64compare:
187     CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
188                         (XSWDv2i64 CGTr64sgt.Fragment),
189                         CGTv2i64eq.Fragment)>;
190
191 multiclass CompareGreaterThan64 {
192   // Plain old comparison, converts back to i32 scalar
193   def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
194   def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
195
196   // SELB mask from FSM:
197   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
198   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
199 }
200
201 defm I64GT: CompareLogicalGreaterThan64;
202
203 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
204 def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
205                   I64GTv2i64.Fragment>;
206
207 // i64 setult:
208 def : I64SETCCNegCond<setle, I64GTr64>;
209 def : I64SELECTNegCond<setle, I64GTr64>;
210
211 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
212 // i64 setge/setlt:
213 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
214     
215 def CGEr64compare:
216     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
217                                           CGTr64eq.Fragment)), 0xb)>;
218
219 def CGEv2i64compare:
220     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
221                                           CGTv2i64eq.Fragment)), 0xf)>;
222
223 multiclass CompareGreaterEqual64 {
224   // Plain old comparison, converts back to i32 scalar
225   def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
226   def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
227
228   // SELB mask from FSM:
229   def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
230   def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
231 }
232
233 defm I64GE: CompareGreaterEqual64;
234
235 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
236 def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
237                   I64GEv2i64.Fragment>;
238
239 // i64 setult:
240 def : I64SETCCNegCond<setlt, I64GEr64>;
241 def : I64SELECTNegCond<setlt, I64GEr64>;
242
243 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
244 // v2i64, i64 add
245 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
246
247 class v2i64_add_cg<dag lhs, dag rhs>:
248     CodeFrag<(CGv4i32 lhs, rhs)>;
249
250 class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
251     CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
252
253 class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
254     v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
255
256 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
257            (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
258                                   (ORv2i64_i64 R64C:$rB),
259                                   (v4i32 VECREG:$rCGmask)>.Fragment)>;
260
261 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
262                     (v4i32 VECREG:$rCGmask)),
263            v2i64_add<(v2i64 VECREG:$rA),
264                      (v2i64 VECREG:$rB),
265                      (v4i32 VECREG:$rCGmask)>.Fragment>;
266
267 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
268 // v2i64, i64 subtraction
269 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
270
271 class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
272
273 class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
274     CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
275
276 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
277            (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
278                                   (ORv2i64_i64 R64C:$rB),
279                                   v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
280                                                (ORv2i64_i64 R64C:$rB)>.Fragment,
281                                   (v4i32 VECREG:$rCGmask)>.Fragment)>;
282
283 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
284                     (v4i32 VECREG:$rCGmask)),
285            v2i64_sub<(v2i64 VECREG:$rA),
286                      (v2i64 VECREG:$rB),
287                      v2i64_sub_bg<(v2i64 VECREG:$rA),
288                                   (v2i64 VECREG:$rB)>.Fragment,
289                      (v4i32 VECREG:$rCGmask)>.Fragment>;
290
291 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
292 // v2i64, i64 multiply
293 //
294 // Note: i64 multiply is simply the vector->scalar conversion of the
295 // full-on v2i64 multiply, since the entire vector has to be manipulated
296 // anyway.
297 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
298
299 class v2i64_mul_ahi64<dag rA> :
300     CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
301
302 class v2i64_mul_bhi64<dag rB> :
303     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
304
305 class v2i64_mul_alo64<dag rB> :
306     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
307
308 class v2i64_mul_blo64<dag rB> :
309     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
310
311 class v2i64_mul_ashlq2<dag rA>:
312     CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
313
314 class v2i64_mul_ashlq4<dag rA>:
315     CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
316
317 class v2i64_mul_bshlq2<dag rB> :
318     CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
319
320 class v2i64_mul_bshlq4<dag rB> :
321     CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
322
323 class v2i64_highprod<dag rA, dag rB>:
324     CodeFrag<(Av4i32
325                 (Av4i32
326                   (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
327                              v2i64_mul_ahi64<rA>.Fragment),
328                   (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
329                              v2i64_mul_bshlq4<rB>.Fragment)),
330                 (Av4i32
331                   (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
332                              v2i64_mul_ashlq4<rA>.Fragment),
333                   (Av4i32
334                       (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
335                                  v2i64_mul_bhi64<rB>.Fragment),
336                     (Av4i32
337                       (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
338                                  v2i64_mul_bhi64<rB>.Fragment),
339                       (Av4i32
340                         (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
341                                    v2i64_mul_bshlq2<rB>.Fragment),
342                         (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
343                                    v2i64_mul_bshlq2<rB>.Fragment))))))>;
344
345 class v2i64_mul_a3_b3<dag rA, dag rB>:
346     CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
347                         v2i64_mul_blo64<rB>.Fragment)>;
348
349 class v2i64_mul_a2_b3<dag rA, dag rB>:
350     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
351                           (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
352                                        v2i64_mul_bshlq2<rB>.Fragment), 0x2),
353                         (ILv4i32 0),
354                         (FSMBIv4i32 0xc3c3))>;
355
356 class v2i64_mul_a3_b2<dag rA, dag rB>:
357     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
358                           (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
359                                        v2i64_mul_ashlq2<rA>.Fragment), 0x2),
360                         (ILv4i32 0),
361                         (FSMBIv4i32 0xc3c3))>;
362
363 class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
364     v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
365                         v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
366               v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
367
368 class v2i64_mul<dag rA, dag rB, dag rCGmask>:
369     v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
370               (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
371                          (ILv4i32 0),
372                          (FSMBIv4i32 0x0f0f)),
373               rCGmask>;
374
375 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
376           (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
377                                  (ORv2i64_i64 R64C:$rB),
378                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
379
380 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
381                     (v4i32 VECREG:$rCGmask)),
382           v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
383                     (v4i32 VECREG:$rCGmask)>.Fragment>;
384
385 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
386 // f64 comparisons
387 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
388
389 // selb instruction definition for i64. Note that the selection mask is
390 // a vector, produced by various forms of FSM:
391 def SELBf64_cond:
392    SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
393             [(set R64FP:$rT,
394                   (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;