1 //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
3 // Cell SPU 64-bit operations
5 //===----------------------------------------------------------------------===//
7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
10 // 1. The instruction sequences for vector vice scalar differ by a
11 // constant. In the scalar case, we're only interested in the
12 // top two 32-bit slots, whereas we're interested in an exact
13 // all-four-slot match in the vector case.
15 // 2. There are no "immediate" forms, since loading 64-bit constants
16 // could be a constant pool load.
18 // 3. i64 setcc results are i32, which are subsequently converted to a FSM
19 // mask when used in a select pattern.
21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22 // [Note: this may be moot, since gb produces v4i32 or r32.]
24 // 5. The code sequences for r64 and v2i64 are probably overly conservative,
25 // compared to the code that gcc produces.
27 // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
28 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
30 // selb instruction definition for i64. Note that the selection mask is
31 // a vector, produced by various forms of FSM:
33 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
36 // The generic i64 select pattern, which assumes that the comparison result
37 // is in a 32-bit register that contains a select mask pattern (i.e., gather
40 def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
41 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
43 // select the negative condition:
44 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
45 Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
46 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
48 // setcc the negative condition:
49 class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
50 Pat<(cond R64C:$rA, R64C:$rB),
51 (XORIr32 compare.Fragment, -1)>;
53 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
54 // The i64 seteq fragment that does the scalar->vector conversion and
57 CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
58 (ORv2i64_i64 R64C:$rB))), 0xb)>;
60 // The i64 seteq fragment that does the vector comparison
62 CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
64 // i64 seteq (equality): the setcc result is i32, which is converted to a
65 // vector FSM mask when used in a select pattern.
67 // v2i64 seteq (equality): the setcc result is v4i32
68 multiclass CompareEqual64 {
69 // Plain old comparison, converts back to i32 scalar
70 def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
71 def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
73 // SELB mask from FSM:
74 def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
75 def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
78 defm I64EQ: CompareEqual64;
80 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
81 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
84 def : I64SETCCNegCond<setne, I64EQr64>;
85 def : I64SELECTNegCond<setne, I64EQr64>;
87 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
89 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
92 CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
95 CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
98 CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
99 (XSWDv2i64 CLGTr64ugt.Fragment),
100 CLGTr64eq.Fragment)>;
103 CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
106 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
108 def CLGTv2i64compare:
109 CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
110 (XSWDv2i64 CLGTr64ugt.Fragment),
111 CLGTv2i64eq.Fragment)>;
113 multiclass CompareLogicalGreaterThan64 {
114 // Plain old comparison, converts back to i32 scalar
115 def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
116 def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
118 // SELB mask from FSM:
119 def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
120 def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
123 defm I64LGT: CompareLogicalGreaterThan64;
125 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
126 //def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
127 // I64LGTv2i64.Fragment>;
130 def : I64SETCCNegCond<setule, I64LGTr64>;
131 def : I64SELECTNegCond<setule, I64LGTr64>;
133 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
134 // i64 setuge/setult:
135 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
138 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
139 CLGTr64eq.Fragment)), 0xb)>;
141 def CLGEv2i64compare:
142 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
143 CLGTv2i64eq.Fragment)), 0xf)>;
145 multiclass CompareLogicalGreaterEqual64 {
146 // Plain old comparison, converts back to i32 scalar
147 def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
148 def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
150 // SELB mask from FSM:
151 def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
152 def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
155 defm I64LGE: CompareLogicalGreaterEqual64;
157 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
158 def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
159 I64LGEv2i64.Fragment>;
163 def : I64SETCCNegCond<setult, I64LGEr64>;
164 def : I64SELECTNegCond<setult, I64LGEr64>;
166 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
168 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
171 CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
174 CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
177 CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
178 (XSWDv2i64 CGTr64sgt.Fragment),
182 CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
185 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
188 CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
189 (XSWDv2i64 CGTr64sgt.Fragment),
190 CGTv2i64eq.Fragment)>;
192 multiclass CompareGreaterThan64 {
193 // Plain old comparison, converts back to i32 scalar
194 def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
195 def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
197 // SELB mask from FSM:
198 def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
199 def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
202 defm I64GT: CompareLogicalGreaterThan64;
204 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
205 //def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
206 // I64GTv2i64.Fragment>;
209 def : I64SETCCNegCond<setle, I64GTr64>;
210 def : I64SELECTNegCond<setle, I64GTr64>;
212 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
214 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
217 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
218 CGTr64eq.Fragment)), 0xb)>;
221 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
222 CGTv2i64eq.Fragment)), 0xf)>;
224 multiclass CompareGreaterEqual64 {
225 // Plain old comparison, converts back to i32 scalar
226 def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
227 def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
229 // SELB mask from FSM:
230 def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
231 def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
234 defm I64GE: CompareGreaterEqual64;
236 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
237 def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
238 I64GEv2i64.Fragment>;
241 def : I64SETCCNegCond<setlt, I64GEr64>;
242 def : I64SELECTNegCond<setlt, I64GEr64>;
244 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
246 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
248 class v2i64_add_cg<dag lhs, dag rhs>:
249 CodeFrag<(CGv4i32 lhs, rhs)>;
251 class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
252 CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
254 class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
255 v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
257 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
258 (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
259 (ORv2i64_i64 R64C:$rB),
260 (v4i32 VECREG:$rCGmask)>.Fragment)>;
262 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
263 (v4i32 VECREG:$rCGmask)),
264 v2i64_add<(v2i64 VECREG:$rA),
266 (v4i32 VECREG:$rCGmask)>.Fragment>;
268 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
269 // v2i64, i64 subtraction
270 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
272 class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
274 class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
275 CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
277 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
278 (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
279 (ORv2i64_i64 R64C:$rB),
280 v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
281 (ORv2i64_i64 R64C:$rB)>.Fragment,
282 (v4i32 VECREG:$rCGmask)>.Fragment)>;
284 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
285 (v4i32 VECREG:$rCGmask)),
286 v2i64_sub<(v2i64 VECREG:$rA),
288 v2i64_sub_bg<(v2i64 VECREG:$rA),
289 (v2i64 VECREG:$rB)>.Fragment,
290 (v4i32 VECREG:$rCGmask)>.Fragment>;
292 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
293 // v2i64, i64 multiply
295 // Note: i64 multiply is simply the vector->scalar conversion of the
296 // full-on v2i64 multiply, since the entire vector has to be manipulated
298 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
300 class v2i64_mul_ahi64<dag rA> :
301 CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
303 class v2i64_mul_bhi64<dag rB> :
304 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
306 class v2i64_mul_alo64<dag rB> :
307 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
309 class v2i64_mul_blo64<dag rB> :
310 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
312 class v2i64_mul_ashlq2<dag rA>:
313 CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
315 class v2i64_mul_ashlq4<dag rA>:
316 CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
318 class v2i64_mul_bshlq2<dag rB> :
319 CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
321 class v2i64_mul_bshlq4<dag rB> :
322 CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
324 class v2i64_highprod<dag rA, dag rB>:
327 (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
328 v2i64_mul_ahi64<rA>.Fragment),
329 (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
330 v2i64_mul_bshlq4<rB>.Fragment)),
332 (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
333 v2i64_mul_ashlq4<rA>.Fragment),
335 (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
336 v2i64_mul_bhi64<rB>.Fragment),
338 (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
339 v2i64_mul_bhi64<rB>.Fragment),
341 (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
342 v2i64_mul_bshlq2<rB>.Fragment),
343 (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
344 v2i64_mul_bshlq2<rB>.Fragment))))))>;
346 class v2i64_mul_a3_b3<dag rA, dag rB>:
347 CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
348 v2i64_mul_blo64<rB>.Fragment)>;
350 class v2i64_mul_a2_b3<dag rA, dag rB>:
351 CodeFrag<(SELBv4i32 (SHLQBYIv4i32
352 (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
353 v2i64_mul_bshlq2<rB>.Fragment), 0x2),
355 (FSMBIv4i32 0xc3c3))>;
357 class v2i64_mul_a3_b2<dag rA, dag rB>:
358 CodeFrag<(SELBv4i32 (SHLQBYIv4i32
359 (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
360 v2i64_mul_ashlq2<rA>.Fragment), 0x2),
362 (FSMBIv4i32 0xc3c3))>;
364 class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
365 v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
366 v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
367 v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
369 class v2i64_mul<dag rA, dag rB, dag rCGmask>:
370 v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
371 (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
373 (FSMBIv4i32 0x0f0f)),
376 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
377 (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
378 (ORv2i64_i64 R64C:$rB),
379 (v4i32 VECREG:$rCGmask)>.Fragment)>;
381 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
382 (v4i32 VECREG:$rCGmask)),
383 v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
384 (v4i32 VECREG:$rCGmask)>.Fragment>;
386 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
388 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
390 // selb instruction definition for i64. Note that the selection mask is
391 // a vector, produced by various forms of FSM:
393 SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
395 (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;