1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
84 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
88 // Fold away setcc operations if possible.
91 // Use _setjmp/_longjmp instead of setjmp/longjmp.
92 setUseUnderscoreSetJmp(true);
93 setUseUnderscoreLongJmp(true);
95 // Set RTLIB libcall names as used by SPU:
96 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
98 // Set up the SPU's register classes:
99 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
100 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
101 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
102 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
103 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
104 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
105 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
107 // SPU has no sign or zero extended loads for i1, i8, i16:
108 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
109 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
110 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
112 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
113 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
115 // SPU constant load actions are custom lowered:
116 setOperationAction(ISD::Constant, MVT::i64, Custom);
117 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
118 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
120 // SPU's loads and stores have to be custom lowered:
121 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
123 MVT VT = (MVT::SimpleValueType)sctype;
125 setOperationAction(ISD::LOAD, VT, Custom);
126 setOperationAction(ISD::STORE, VT, Custom);
127 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
128 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
129 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
131 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
132 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
133 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
135 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
136 MVT StoreVT = (MVT::SimpleValueType) stype;
137 setTruncStoreAction(VT, StoreVT, Expand);
141 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
143 MVT VT = (MVT::SimpleValueType) sctype;
145 setOperationAction(ISD::LOAD, VT, Custom);
146 setOperationAction(ISD::STORE, VT, Custom);
148 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
149 MVT StoreVT = (MVT::SimpleValueType) stype;
150 setTruncStoreAction(VT, StoreVT, Expand);
154 // Expand the jumptable branches
155 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
156 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
158 // Custom lower SELECT_CC for most cases, but expand by default
159 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
160 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
161 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
162 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
163 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
165 // SPU has no intrinsics for these particular operations:
166 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
168 // SPU has no SREM/UREM instructions
169 setOperationAction(ISD::SREM, MVT::i32, Expand);
170 setOperationAction(ISD::UREM, MVT::i32, Expand);
171 setOperationAction(ISD::SREM, MVT::i64, Expand);
172 setOperationAction(ISD::UREM, MVT::i64, Expand);
174 // We don't support sin/cos/sqrt/fmod
175 setOperationAction(ISD::FSIN , MVT::f64, Expand);
176 setOperationAction(ISD::FCOS , MVT::f64, Expand);
177 setOperationAction(ISD::FREM , MVT::f64, Expand);
178 setOperationAction(ISD::FSIN , MVT::f32, Expand);
179 setOperationAction(ISD::FCOS , MVT::f32, Expand);
180 setOperationAction(ISD::FREM , MVT::f32, Expand);
182 // If we're enabling GP optimizations, use hardware square root
183 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
184 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
186 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
187 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
189 // Make sure that DAGCombine doesn't insert illegal 64-bit constants
190 setOperationAction(ISD::FABS, MVT::f64, Custom);
192 // SPU can do rotate right and left, so legalize it... but customize for i8
193 // because instructions don't exist.
195 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
197 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
198 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
199 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
201 setOperationAction(ISD::ROTL, MVT::i32, Legal);
202 setOperationAction(ISD::ROTL, MVT::i16, Legal);
203 setOperationAction(ISD::ROTL, MVT::i8, Custom);
205 // SPU has no native version of shift left/right for i8
206 setOperationAction(ISD::SHL, MVT::i8, Custom);
207 setOperationAction(ISD::SRL, MVT::i8, Custom);
208 setOperationAction(ISD::SRA, MVT::i8, Custom);
210 // Make these operations legal and handle them during instruction selection:
211 setOperationAction(ISD::SHL, MVT::i64, Legal);
212 setOperationAction(ISD::SRL, MVT::i64, Legal);
213 setOperationAction(ISD::SRA, MVT::i64, Legal);
215 // Custom lower i8, i32 and i64 multiplications
216 setOperationAction(ISD::MUL, MVT::i8, Custom);
217 setOperationAction(ISD::MUL, MVT::i32, Legal);
218 setOperationAction(ISD::MUL, MVT::i64, Legal);
220 // Need to custom handle (some) common i8, i64 math ops
221 setOperationAction(ISD::ADD, MVT::i8, Custom);
222 setOperationAction(ISD::ADD, MVT::i64, Legal);
223 setOperationAction(ISD::SUB, MVT::i8, Custom);
224 setOperationAction(ISD::SUB, MVT::i64, Legal);
226 // SPU does not have BSWAP. It does have i32 support CTLZ.
227 // CTPOP has to be custom lowered.
228 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
229 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
231 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
232 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
233 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
236 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
237 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
239 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
241 // SPU has a version of select that implements (a&~c)|(b&c), just like
242 // select ought to work:
243 setOperationAction(ISD::SELECT, MVT::i8, Legal);
244 setOperationAction(ISD::SELECT, MVT::i16, Legal);
245 setOperationAction(ISD::SELECT, MVT::i32, Legal);
246 setOperationAction(ISD::SELECT, MVT::i64, Legal);
248 setOperationAction(ISD::SETCC, MVT::i8, Legal);
249 setOperationAction(ISD::SETCC, MVT::i16, Legal);
250 setOperationAction(ISD::SETCC, MVT::i32, Legal);
251 setOperationAction(ISD::SETCC, MVT::i64, Legal);
252 setOperationAction(ISD::SETCC, MVT::f64, Custom);
254 // Custom lower i128 -> i64 truncates
255 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
257 // SPU has a legal FP -> signed INT instruction
258 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
259 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
260 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
263 // FDIV on SPU requires custom lowering
264 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
266 // SPU has [U|S]INT_TO_FP
267 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
268 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
269 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
273 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
274 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
276 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
277 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
278 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
281 // We cannot sextinreg(i1). Expand to shifts.
282 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
284 // Support label based line numbers.
285 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
286 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
288 // We want to legalize GlobalAddress and ConstantPool nodes into the
289 // appropriate instructions to materialize the address.
290 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
292 MVT VT = (MVT::SimpleValueType)sctype;
294 setOperationAction(ISD::GlobalAddress, VT, Custom);
295 setOperationAction(ISD::ConstantPool, VT, Custom);
296 setOperationAction(ISD::JumpTable, VT, Custom);
299 // RET must be custom lowered, to meet ABI requirements
300 setOperationAction(ISD::RET, MVT::Other, Custom);
302 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
303 setOperationAction(ISD::VASTART , MVT::Other, Custom);
305 // Use the default implementation.
306 setOperationAction(ISD::VAARG , MVT::Other, Expand);
307 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
308 setOperationAction(ISD::VAEND , MVT::Other, Expand);
309 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
310 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
314 // Cell SPU has instructions for converting between i64 and fp.
315 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
316 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
318 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
319 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
321 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
322 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
324 // First set operation action for all vector types to expand. Then we
325 // will selectively turn on ones that can be effectively codegen'd.
326 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
327 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
333 // "Odd size" vector classes that we're willing to support:
334 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
336 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
337 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
338 MVT VT = (MVT::SimpleValueType)i;
340 // add/sub are legal for all supported vector VT's.
341 setOperationAction(ISD::ADD , VT, Legal);
342 setOperationAction(ISD::SUB , VT, Legal);
343 // mul has to be custom lowered.
344 // TODO: v2i64 vector multiply
345 setOperationAction(ISD::MUL , VT, Legal);
347 setOperationAction(ISD::AND , VT, Legal);
348 setOperationAction(ISD::OR , VT, Legal);
349 setOperationAction(ISD::XOR , VT, Legal);
350 setOperationAction(ISD::LOAD , VT, Legal);
351 setOperationAction(ISD::SELECT, VT, Legal);
352 setOperationAction(ISD::STORE, VT, Legal);
354 // These operations need to be expanded:
355 setOperationAction(ISD::SDIV, VT, Expand);
356 setOperationAction(ISD::SREM, VT, Expand);
357 setOperationAction(ISD::UDIV, VT, Expand);
358 setOperationAction(ISD::UREM, VT, Expand);
360 // Custom lower build_vector, constant pool spills, insert and
361 // extract vector elements:
362 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
363 setOperationAction(ISD::ConstantPool, VT, Custom);
364 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
366 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
367 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
370 setOperationAction(ISD::AND, MVT::v16i8, Custom);
371 setOperationAction(ISD::OR, MVT::v16i8, Custom);
372 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
373 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
375 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
377 setShiftAmountType(MVT::i32);
378 setBooleanContents(ZeroOrNegativeOneBooleanContent);
380 setStackPointerRegisterToSaveRestore(SPU::R1);
382 // We have target-specific dag combine patterns for the following nodes:
383 setTargetDAGCombine(ISD::ADD);
384 setTargetDAGCombine(ISD::ZERO_EXTEND);
385 setTargetDAGCombine(ISD::SIGN_EXTEND);
386 setTargetDAGCombine(ISD::ANY_EXTEND);
388 computeRegisterProperties();
390 // Set pre-RA register scheduler default to BURR, which produces slightly
391 // better code than the default (could also be TDRR, but TargetLowering.h
392 // needs a mod to support that model):
393 setSchedulingPreference(SchedulingForRegPressure);
397 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
399 if (node_names.empty()) {
400 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
401 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
402 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
403 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
404 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
405 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
406 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
407 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
408 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
409 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
410 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
411 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
412 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
413 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
414 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
415 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
416 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
417 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
418 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
419 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
420 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
421 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
422 "SPUISD::ROTBYTES_LEFT_BITS";
423 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
424 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
425 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
426 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
427 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
430 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
432 return ((i != node_names.end()) ? i->second : 0);
435 //===----------------------------------------------------------------------===//
436 // Return the Cell SPU's SETCC result type
437 //===----------------------------------------------------------------------===//
439 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
440 // i16 and i32 are valid SETCC result types
441 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
444 //===----------------------------------------------------------------------===//
445 // Calling convention code:
446 //===----------------------------------------------------------------------===//
448 #include "SPUGenCallingConv.inc"
450 //===----------------------------------------------------------------------===//
451 // LowerOperation implementation
452 //===----------------------------------------------------------------------===//
454 /// Custom lower loads for CellSPU
456 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
457 within a 16-byte block, we have to rotate to extract the requested element.
459 For extending loads, we also want to ensure that the following sequence is
460 emitted, e.g. for MVT::f32 extending load to MVT::f64:
464 %2 v16i8,ch = rotate %1
465 %3 v4f8, ch = bitconvert %2
466 %4 f32 = vec2perfslot %3
467 %5 f64 = fp_extend %4
471 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
472 LoadSDNode *LN = cast<LoadSDNode>(Op);
473 SDValue the_chain = LN->getChain();
474 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
475 MVT InVT = LN->getMemoryVT();
476 MVT OutVT = Op.getValueType();
477 ISD::LoadExtType ExtType = LN->getExtensionType();
478 unsigned alignment = LN->getAlignment();
479 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
481 switch (LN->getAddressingMode()) {
482 case ISD::UNINDEXED: {
484 SDValue basePtr = LN->getBasePtr();
487 if (alignment == 16) {
490 // Special cases for a known aligned load to simplify the base pointer
491 // and the rotation amount:
492 if (basePtr.getOpcode() == ISD::ADD
493 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
494 // Known offset into basePtr
495 int64_t offset = CN->getSExtValue();
496 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
501 rotate = DAG.getConstant(rotamt, MVT::i16);
503 // Simplify the base pointer for this case:
504 basePtr = basePtr.getOperand(0);
505 if ((offset & ~0xf) > 0) {
506 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
508 DAG.getConstant((offset & ~0xf), PtrVT));
510 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
511 || (basePtr.getOpcode() == SPUISD::IndirectAddr
512 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
513 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
514 // Plain aligned a-form address: rotate into preferred slot
515 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
516 int64_t rotamt = -vtm->prefslot_byte;
519 rotate = DAG.getConstant(rotamt, MVT::i16);
521 // Offset the rotate amount by the basePtr and the preferred slot
523 int64_t rotamt = -vtm->prefslot_byte;
526 rotate = DAG.getNode(ISD::ADD, PtrVT,
528 DAG.getConstant(rotamt, PtrVT));
531 // Unaligned load: must be more pessimistic about addressing modes:
532 if (basePtr.getOpcode() == ISD::ADD) {
533 MachineFunction &MF = DAG.getMachineFunction();
534 MachineRegisterInfo &RegInfo = MF.getRegInfo();
535 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
538 SDValue Op0 = basePtr.getOperand(0);
539 SDValue Op1 = basePtr.getOperand(1);
541 if (isa<ConstantSDNode>(Op1)) {
542 // Convert the (add <ptr>, <const>) to an indirect address contained
543 // in a register. Note that this is done because we need to avoid
544 // creating a 0(reg) d-form address due to the SPU's block loads.
545 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
546 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
547 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
549 // Convert the (add <arg1>, <arg2>) to an indirect address, which
550 // will likely be lowered as a reg(reg) x-form address.
551 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
554 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
556 DAG.getConstant(0, PtrVT));
559 // Offset the rotate amount by the basePtr and the preferred slot
561 rotate = DAG.getNode(ISD::ADD, PtrVT,
563 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
566 // Re-emit as a v16i8 vector load
567 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
568 LN->getSrcValue(), LN->getSrcValueOffset(),
569 LN->isVolatile(), 16);
572 the_chain = result.getValue(1);
574 // Rotate into the preferred slot:
575 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
576 result.getValue(0), rotate);
578 // Convert the loaded v16i8 vector to the appropriate vector type
579 // specified by the operand:
580 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
581 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
582 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
584 // Handle extending loads by extending the scalar result:
585 if (ExtType == ISD::SEXTLOAD) {
586 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
587 } else if (ExtType == ISD::ZEXTLOAD) {
588 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
589 } else if (ExtType == ISD::EXTLOAD) {
590 unsigned NewOpc = ISD::ANY_EXTEND;
592 if (OutVT.isFloatingPoint())
593 NewOpc = ISD::FP_EXTEND;
595 result = DAG.getNode(NewOpc, OutVT, result);
598 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
599 SDValue retops[2] = {
604 result = DAG.getNode(SPUISD::LDRESULT, retvts,
605 retops, sizeof(retops) / sizeof(retops[0]));
612 case ISD::LAST_INDEXED_MODE:
613 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
615 cerr << (unsigned) LN->getAddressingMode() << "\n";
623 /// Custom lower stores for CellSPU
625 All CellSPU stores are aligned to 16-byte boundaries, so for elements
626 within a 16-byte block, we have to generate a shuffle to insert the
627 requested element into its place, then store the resulting block.
630 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
631 StoreSDNode *SN = cast<StoreSDNode>(Op);
632 SDValue Value = SN->getValue();
633 MVT VT = Value.getValueType();
634 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
635 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
636 unsigned alignment = SN->getAlignment();
638 switch (SN->getAddressingMode()) {
639 case ISD::UNINDEXED: {
640 // The vector type we really want to load from the 16-byte chunk.
641 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
642 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
644 SDValue alignLoadVec;
645 SDValue basePtr = SN->getBasePtr();
646 SDValue the_chain = SN->getChain();
647 SDValue insertEltOffs;
649 if (alignment == 16) {
652 // Special cases for a known aligned load to simplify the base pointer
653 // and insertion byte:
654 if (basePtr.getOpcode() == ISD::ADD
655 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
656 // Known offset into basePtr
657 int64_t offset = CN->getSExtValue();
659 // Simplify the base pointer for this case:
660 basePtr = basePtr.getOperand(0);
661 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
663 DAG.getConstant((offset & 0xf), PtrVT));
665 if ((offset & ~0xf) > 0) {
666 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
668 DAG.getConstant((offset & ~0xf), PtrVT));
671 // Otherwise, assume it's at byte 0 of basePtr
672 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
674 DAG.getConstant(0, PtrVT));
677 // Unaligned load: must be more pessimistic about addressing modes:
678 if (basePtr.getOpcode() == ISD::ADD) {
679 MachineFunction &MF = DAG.getMachineFunction();
680 MachineRegisterInfo &RegInfo = MF.getRegInfo();
681 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
684 SDValue Op0 = basePtr.getOperand(0);
685 SDValue Op1 = basePtr.getOperand(1);
687 if (isa<ConstantSDNode>(Op1)) {
688 // Convert the (add <ptr>, <const>) to an indirect address contained
689 // in a register. Note that this is done because we need to avoid
690 // creating a 0(reg) d-form address due to the SPU's block loads.
691 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
692 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
693 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
695 // Convert the (add <arg1>, <arg2>) to an indirect address, which
696 // will likely be lowered as a reg(reg) x-form address.
697 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
700 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
702 DAG.getConstant(0, PtrVT));
705 // Insertion point is solely determined by basePtr's contents
706 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
708 DAG.getConstant(0, PtrVT));
711 // Re-emit as a v16i8 vector load
712 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
713 SN->getSrcValue(), SN->getSrcValueOffset(),
714 SN->isVolatile(), 16);
717 the_chain = alignLoadVec.getValue(1);
719 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
720 SDValue theValue = SN->getValue();
724 && (theValue.getOpcode() == ISD::AssertZext
725 || theValue.getOpcode() == ISD::AssertSext)) {
726 // Drill down and get the value for zero- and sign-extended
728 theValue = theValue.getOperand(0);
731 // If the base pointer is already a D-form address, then just create
732 // a new D-form address with a slot offset and the orignal base pointer.
733 // Otherwise generate a D-form address with the slot offset relative
734 // to the stack pointer, which is always aligned.
736 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
737 cerr << "CellSPU LowerSTORE: basePtr = ";
738 basePtr.getNode()->dump(&DAG);
743 SDValue insertEltOp =
744 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
745 SDValue vectorizeOp =
746 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
748 result = DAG.getNode(SPUISD::SHUFB, vecVT,
749 vectorizeOp, alignLoadVec,
750 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
752 result = DAG.getStore(the_chain, result, basePtr,
753 LN->getSrcValue(), LN->getSrcValueOffset(),
754 LN->isVolatile(), LN->getAlignment());
756 #if 0 && !defined(NDEBUG)
757 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
758 const SDValue ¤tRoot = DAG.getRoot();
761 cerr << "------- CellSPU:LowerStore result:\n";
764 DAG.setRoot(currentRoot);
775 case ISD::LAST_INDEXED_MODE:
776 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
778 cerr << (unsigned) SN->getAddressingMode() << "\n";
786 //! Generate the address of a constant pool entry.
788 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT PtrVT = Op.getValueType();
790 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
791 Constant *C = CP->getConstVal();
792 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
793 SDValue Zero = DAG.getConstant(0, PtrVT);
794 const TargetMachine &TM = DAG.getTarget();
796 if (TM.getRelocationModel() == Reloc::Static) {
797 if (!ST->usingLargeMem()) {
798 // Just return the SDValue with the constant pool address in it.
799 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
801 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
802 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
803 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
808 "LowerConstantPool: Relocation model other than static"
813 //! Alternate entry point for generating the address of a constant pool entry
815 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
816 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
820 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
821 MVT PtrVT = Op.getValueType();
822 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
823 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
824 SDValue Zero = DAG.getConstant(0, PtrVT);
825 const TargetMachine &TM = DAG.getTarget();
827 if (TM.getRelocationModel() == Reloc::Static) {
828 if (!ST->usingLargeMem()) {
829 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
831 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
832 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
833 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
838 "LowerJumpTable: Relocation model other than static not supported.");
843 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
844 MVT PtrVT = Op.getValueType();
845 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
846 GlobalValue *GV = GSDN->getGlobal();
847 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
848 const TargetMachine &TM = DAG.getTarget();
849 SDValue Zero = DAG.getConstant(0, PtrVT);
851 if (TM.getRelocationModel() == Reloc::Static) {
852 if (!ST->usingLargeMem()) {
853 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
855 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
856 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
857 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
860 cerr << "LowerGlobalAddress: Relocation model other than static not "
869 //! Custom lower i64 integer constants
871 This code inserts all of the necessary juggling that needs to occur to load
872 a 64-bit constant into a register.
875 LowerConstant(SDValue Op, SelectionDAG &DAG) {
876 MVT VT = Op.getValueType();
878 if (VT == MVT::i64) {
879 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
880 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
881 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
882 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
884 cerr << "LowerConstant: unhandled constant type "
894 //! Custom lower double precision floating point constants
896 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
897 MVT VT = Op.getValueType();
899 if (VT == MVT::f64) {
900 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
903 "LowerConstantFP: Node is not ConstantFPSDNode");
905 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
906 SDValue T = DAG.getConstant(dbits, MVT::i64);
907 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
908 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
909 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
916 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
918 MachineFunction &MF = DAG.getMachineFunction();
919 MachineFrameInfo *MFI = MF.getFrameInfo();
920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
921 SmallVector<SDValue, 48> ArgValues;
922 SDValue Root = Op.getOperand(0);
923 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
925 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
926 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
928 unsigned ArgOffset = SPUFrameInfo::minStackSize();
929 unsigned ArgRegIdx = 0;
930 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
932 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
934 // Add DAG nodes to load the arguments or copy them out of registers.
935 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
936 ArgNo != e; ++ArgNo) {
937 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
938 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
941 if (ArgRegIdx < NumArgRegs) {
942 const TargetRegisterClass *ArgRegClass;
944 switch (ObjectVT.getSimpleVT()) {
946 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
947 << ObjectVT.getMVTString()
952 ArgRegClass = &SPU::R8CRegClass;
955 ArgRegClass = &SPU::R16CRegClass;
958 ArgRegClass = &SPU::R32CRegClass;
961 ArgRegClass = &SPU::R64CRegClass;
964 ArgRegClass = &SPU::GPRCRegClass;
967 ArgRegClass = &SPU::R32FPRegClass;
970 ArgRegClass = &SPU::R64FPRegClass;
978 ArgRegClass = &SPU::VECREGRegClass;
982 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
983 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
984 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
987 // We need to load the argument to a virtual register if we determined
988 // above that we ran out of physical registers of the appropriate type
989 // or we're forced to do vararg
990 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
991 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
992 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
993 ArgOffset += StackSlotSize;
996 ArgValues.push_back(ArgVal);
998 Root = ArgVal.getOperand(0);
1003 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1004 // We will spill (79-3)+1 registers to the stack
1005 SmallVector<SDValue, 79-3+1> MemOps;
1007 // Create the frame slot
1009 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1010 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1011 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1012 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1013 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1014 Root = Store.getOperand(0);
1015 MemOps.push_back(Store);
1017 // Increment address by stack slot size for the next stored argument
1018 ArgOffset += StackSlotSize;
1020 if (!MemOps.empty())
1021 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1024 ArgValues.push_back(Root);
1026 // Return the new list of results.
1027 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1028 &ArgValues[0], ArgValues.size());
1031 /// isLSAAddress - Return the immediate to use if the specified
1032 /// value is representable as a LSA address.
1033 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1034 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1037 int Addr = C->getZExtValue();
1038 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1039 (Addr << 14 >> 14) != Addr)
1040 return 0; // Top 14 bits have to be sext of immediate.
1042 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1046 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1047 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1048 SDValue Chain = TheCall->getChain();
1049 SDValue Callee = TheCall->getCallee();
1050 unsigned NumOps = TheCall->getNumArgs();
1051 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1052 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1053 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1055 // Handy pointer type
1056 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1058 // Accumulate how many bytes are to be pushed on the stack, including the
1059 // linkage area, and parameter passing area. According to the SPU ABI,
1060 // we minimally need space for [LR] and [SP]
1061 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1063 // Set up a copy of the stack pointer for use loading and storing any
1064 // arguments that may not fit in the registers available for argument
1066 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1068 // Figure out which arguments are going to go in registers, and which in
1070 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1071 unsigned ArgRegIdx = 0;
1073 // Keep track of registers passing arguments
1074 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1075 // And the arguments passed on the stack
1076 SmallVector<SDValue, 8> MemOpChains;
1078 for (unsigned i = 0; i != NumOps; ++i) {
1079 SDValue Arg = TheCall->getArg(i);
1081 // PtrOff will be used to store the current argument to the stack if a
1082 // register cannot be found for it.
1083 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1084 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1086 switch (Arg.getValueType().getSimpleVT()) {
1087 default: assert(0 && "Unexpected ValueType for argument!");
1093 if (ArgRegIdx != NumArgRegs) {
1094 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1096 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1097 ArgOffset += StackSlotSize;
1102 if (ArgRegIdx != NumArgRegs) {
1103 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1105 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1106 ArgOffset += StackSlotSize;
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1125 // Update number of stack bytes actually used, insert a call sequence start
1126 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1127 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1130 if (!MemOpChains.empty()) {
1131 // Adjust the stack pointer for the stack arguments.
1132 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1133 &MemOpChains[0], MemOpChains.size());
1136 // Build a sequence of copy-to-reg nodes chained together with token chain
1137 // and flag operands which copy the outgoing args into the appropriate regs.
1139 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1140 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1142 InFlag = Chain.getValue(1);
1145 SmallVector<SDValue, 8> Ops;
1146 unsigned CallOpc = SPUISD::CALL;
1148 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1149 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1150 // node so that legalize doesn't hack it.
1151 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1152 GlobalValue *GV = G->getGlobal();
1153 MVT CalleeVT = Callee.getValueType();
1154 SDValue Zero = DAG.getConstant(0, PtrVT);
1155 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1157 if (!ST->usingLargeMem()) {
1158 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1159 // style calls, otherwise, external symbols are BRASL calls. This assumes
1160 // that declared/defined symbols are in the same compilation unit and can
1161 // be reached through PC-relative jumps.
1164 // This may be an unsafe assumption for JIT and really large compilation
1166 if (GV->isDeclaration()) {
1167 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1169 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1172 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1174 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1176 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1177 MVT CalleeVT = Callee.getValueType();
1178 SDValue Zero = DAG.getConstant(0, PtrVT);
1179 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1180 Callee.getValueType());
1182 if (!ST->usingLargeMem()) {
1183 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1185 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1187 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1188 // If this is an absolute destination address that appears to be a legal
1189 // local store address, use the munged value.
1190 Callee = SDValue(Dest, 0);
1193 Ops.push_back(Chain);
1194 Ops.push_back(Callee);
1196 // Add argument registers to the end of the list so that they are known live
1198 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1199 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1200 RegsToPass[i].second.getValueType()));
1202 if (InFlag.getNode())
1203 Ops.push_back(InFlag);
1204 // Returns a chain and a flag for retval copy to use.
1205 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1206 &Ops[0], Ops.size());
1207 InFlag = Chain.getValue(1);
1209 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1210 DAG.getIntPtrConstant(0, true), InFlag);
1211 if (TheCall->getValueType(0) != MVT::Other)
1212 InFlag = Chain.getValue(1);
1214 SDValue ResultVals[3];
1215 unsigned NumResults = 0;
1217 // If the call has results, copy the values out of the ret val registers.
1218 switch (TheCall->getValueType(0).getSimpleVT()) {
1219 default: assert(0 && "Unexpected ret value!");
1220 case MVT::Other: break;
1222 if (TheCall->getValueType(1) == MVT::i32) {
1223 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1224 ResultVals[0] = Chain.getValue(0);
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1226 Chain.getValue(2)).getValue(1);
1227 ResultVals[1] = Chain.getValue(0);
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1231 ResultVals[0] = Chain.getValue(0);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1237 ResultVals[0] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1248 InFlag).getValue(1);
1249 ResultVals[0] = Chain.getValue(0);
1258 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1259 InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1265 // If the function returns void, just return the chain.
1266 if (NumResults == 0)
1269 // Otherwise, merge everything together with a MERGE_VALUES node.
1270 ResultVals[NumResults++] = Chain;
1271 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1272 return Res.getValue(Op.getResNo());
1276 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1277 SmallVector<CCValAssign, 16> RVLocs;
1278 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1279 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1280 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1281 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1283 // If this is the first return lowered for this function, add the regs to the
1284 // liveout set for the function.
1285 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1286 for (unsigned i = 0; i != RVLocs.size(); ++i)
1287 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1290 SDValue Chain = Op.getOperand(0);
1293 // Copy the result values into the output registers.
1294 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1295 CCValAssign &VA = RVLocs[i];
1296 assert(VA.isRegLoc() && "Can only return in registers!");
1297 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1298 Flag = Chain.getValue(1);
1302 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1304 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1308 //===----------------------------------------------------------------------===//
1309 // Vector related lowering:
1310 //===----------------------------------------------------------------------===//
1312 static ConstantSDNode *
1313 getVecImm(SDNode *N) {
1314 SDValue OpVal(0, 0);
1316 // Check to see if this buildvec has a single non-undef value in its elements.
1317 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1318 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1319 if (OpVal.getNode() == 0)
1320 OpVal = N->getOperand(i);
1321 else if (OpVal != N->getOperand(i))
1325 if (OpVal.getNode() != 0) {
1326 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1331 return 0; // All UNDEF: use implicit def.; not Constant node
1334 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1335 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1337 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1339 if (ConstantSDNode *CN = getVecImm(N)) {
1340 uint64_t Value = CN->getZExtValue();
1341 if (ValueType == MVT::i64) {
1342 uint64_t UValue = CN->getZExtValue();
1343 uint32_t upper = uint32_t(UValue >> 32);
1344 uint32_t lower = uint32_t(UValue);
1347 Value = Value >> 32;
1349 if (Value <= 0x3ffff)
1350 return DAG.getTargetConstant(Value, ValueType);
1356 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1357 /// and the value fits into a signed 16-bit constant, and if so, return the
1359 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 int64_t Value = CN->getSExtValue();
1363 if (ValueType == MVT::i64) {
1364 uint64_t UValue = CN->getZExtValue();
1365 uint32_t upper = uint32_t(UValue >> 32);
1366 uint32_t lower = uint32_t(UValue);
1369 Value = Value >> 32;
1371 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1372 return DAG.getTargetConstant(Value, ValueType);
1379 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1380 /// and the value fits into a signed 10-bit constant, and if so, return the
1382 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1384 if (ConstantSDNode *CN = getVecImm(N)) {
1385 int64_t Value = CN->getSExtValue();
1386 if (ValueType == MVT::i64) {
1387 uint64_t UValue = CN->getZExtValue();
1388 uint32_t upper = uint32_t(UValue >> 32);
1389 uint32_t lower = uint32_t(UValue);
1392 Value = Value >> 32;
1394 if (isS10Constant(Value))
1395 return DAG.getTargetConstant(Value, ValueType);
1401 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1402 /// and the value fits into a signed 8-bit constant, and if so, return the
1405 /// @note: The incoming vector is v16i8 because that's the only way we can load
1406 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1408 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1410 if (ConstantSDNode *CN = getVecImm(N)) {
1411 int Value = (int) CN->getZExtValue();
1412 if (ValueType == MVT::i16
1413 && Value <= 0xffff /* truncated from uint64_t */
1414 && ((short) Value >> 8) == ((short) Value & 0xff))
1415 return DAG.getTargetConstant(Value & 0xff, ValueType);
1416 else if (ValueType == MVT::i8
1417 && (Value & 0xff) == Value)
1418 return DAG.getTargetConstant(Value, ValueType);
1424 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1425 /// and the value fits into a signed 16-bit constant, and if so, return the
1427 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1429 if (ConstantSDNode *CN = getVecImm(N)) {
1430 uint64_t Value = CN->getZExtValue();
1431 if ((ValueType == MVT::i32
1432 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1433 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1434 return DAG.getTargetConstant(Value >> 16, ValueType);
1440 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1441 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1449 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1450 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1458 // If this is a vector of constants or undefs, get the bits. A bit in
1459 // UndefBits is set if the corresponding element of the vector is an
1460 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1461 // zero. Return true if this is not an array of constants, false if it is.
1463 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1464 uint64_t UndefBits[2]) {
1465 // Start with zero'd results.
1466 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1468 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1469 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1470 SDValue OpVal = BV->getOperand(i);
1472 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1473 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1475 uint64_t EltBits = 0;
1476 if (OpVal.getOpcode() == ISD::UNDEF) {
1477 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1478 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1480 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1481 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1482 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1483 const APFloat &apf = CN->getValueAPF();
1484 EltBits = (CN->getValueType(0) == MVT::f32
1485 ? FloatToBits(apf.convertToFloat())
1486 : DoubleToBits(apf.convertToDouble()));
1488 // Nonconstant element.
1492 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1495 //printf("%llx %llx %llx %llx\n",
1496 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1500 /// If this is a splat (repetition) of a value across the whole vector, return
1501 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1502 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1503 /// SplatSize = 1 byte.
1504 static bool isConstantSplat(const uint64_t Bits128[2],
1505 const uint64_t Undef128[2],
1507 uint64_t &SplatBits, uint64_t &SplatUndef,
1509 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1510 // the same as the lower 64-bits, ignoring undefs.
1511 uint64_t Bits64 = Bits128[0] | Bits128[1];
1512 uint64_t Undef64 = Undef128[0] & Undef128[1];
1513 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1514 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1515 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1516 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1518 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1519 if (MinSplatBits < 64) {
1521 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1523 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1524 if (MinSplatBits < 32) {
1526 // If the top 16-bits are different than the lower 16-bits, ignoring
1527 // undefs, we have an i32 splat.
1528 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1529 if (MinSplatBits < 16) {
1530 // If the top 8-bits are different than the lower 8-bits, ignoring
1531 // undefs, we have an i16 splat.
1532 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1533 == ((Bits16 >> 8) & ~Undef16)) {
1534 // Otherwise, we have an 8-bit splat.
1535 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1536 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1542 SplatUndef = Undef16;
1549 SplatUndef = Undef32;
1555 SplatBits = Bits128[0];
1556 SplatUndef = Undef128[0];
1562 return false; // Can't be a splat if two pieces don't match.
1565 //! Lower a BUILD_VECTOR instruction creatively:
1567 SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1568 MVT VT = Op.getValueType();
1569 // If this is a vector of constants or undefs, get the bits. A bit in
1570 // UndefBits is set if the corresponding element of the vector is an
1571 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1573 uint64_t VectorBits[2];
1574 uint64_t UndefBits[2];
1575 uint64_t SplatBits, SplatUndef;
1577 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1578 || !isConstantSplat(VectorBits, UndefBits,
1579 VT.getVectorElementType().getSizeInBits(),
1580 SplatBits, SplatUndef, SplatSize))
1581 return SDValue(); // Not a constant vector, not a splat.
1583 switch (VT.getSimpleVT()) {
1585 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1586 << VT.getMVTString()
1591 uint32_t Value32 = SplatBits;
1592 assert(SplatSize == 4
1593 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1594 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1595 SDValue T = DAG.getConstant(Value32, MVT::i32);
1596 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1597 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1601 uint64_t f64val = SplatBits;
1602 assert(SplatSize == 8
1603 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1604 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1605 SDValue T = DAG.getConstant(f64val, MVT::i64);
1606 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1607 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1611 // 8-bit constants have to be expanded to 16-bits
1612 unsigned short Value16 = SplatBits | (SplatBits << 8);
1614 for (int i = 0; i < 8; ++i)
1615 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1616 return DAG.getNode(ISD::BIT_CONVERT, VT,
1617 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1620 unsigned short Value16;
1622 Value16 = (unsigned short) (SplatBits & 0xffff);
1624 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1625 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1627 for (int i = 0; i < 8; ++i) Ops[i] = T;
1628 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1631 unsigned int Value = SplatBits;
1632 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1633 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1636 unsigned int Value = SplatBits;
1637 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1638 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1641 uint64_t val = SplatBits;
1642 uint32_t upper = uint32_t(val >> 32);
1643 uint32_t lower = uint32_t(val);
1645 if (upper == lower) {
1646 // Magic constant that can be matched by IL, ILA, et. al.
1647 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1652 SmallVector<SDValue, 16> ShufBytes;
1654 bool upper_special, lower_special;
1656 // NOTE: This code creates common-case shuffle masks that can be easily
1657 // detected as common expressions. It is not attempting to create highly
1658 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1660 // Detect if the upper or lower half is a special shuffle mask pattern:
1661 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1662 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1664 // Create lower vector if not a special pattern
1665 if (!lower_special) {
1666 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1667 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 LO32C, LO32C, LO32C, LO32C));
1672 // Create upper vector if not a special pattern
1673 if (!upper_special) {
1674 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1675 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1676 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1677 HI32C, HI32C, HI32C, HI32C));
1680 // If either upper or lower are special, then the two input operands are
1681 // the same (basically, one of them is a "don't care")
1686 if (lower_special && upper_special) {
1687 // Unhappy situation... both upper and lower are special, so punt with
1688 // a target constant:
1689 SDValue Zero = DAG.getConstant(0, MVT::i32);
1690 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1694 for (int i = 0; i < 4; ++i) {
1696 for (int j = 0; j < 4; ++j) {
1698 bool process_upper, process_lower;
1700 process_upper = (upper_special && (i & 1) == 0);
1701 process_lower = (lower_special && (i & 1) == 1);
1703 if (process_upper || process_lower) {
1704 if ((process_upper && upper == 0)
1705 || (process_lower && lower == 0))
1707 else if ((process_upper && upper == 0xffffffff)
1708 || (process_lower && lower == 0xffffffff))
1710 else if ((process_upper && upper == 0x80000000)
1711 || (process_lower && lower == 0x80000000))
1712 val |= (j == 0 ? 0xe0 : 0x80);
1714 val |= i * 4 + j + ((i & 1) * 16);
1717 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1720 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1721 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1722 &ShufBytes[0], ShufBytes.size()));
1730 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1731 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1732 /// permutation vector, V3, is monotonically increasing with one "exception"
1733 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1734 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1735 /// In either case, the net result is going to eventually invoke SHUFB to
1736 /// permute/shuffle the bytes from V1 and V2.
1738 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1739 /// control word for byte/halfword/word insertion. This takes care of a single
1740 /// element move from V2 into V1.
1742 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1743 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1744 SDValue V1 = Op.getOperand(0);
1745 SDValue V2 = Op.getOperand(1);
1746 SDValue PermMask = Op.getOperand(2);
1748 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1750 // If we have a single element being moved from V1 to V2, this can be handled
1751 // using the C*[DX] compute mask instructions, but the vector elements have
1752 // to be monotonically increasing with one exception element.
1753 MVT VecVT = V1.getValueType();
1754 MVT EltVT = VecVT.getVectorElementType();
1755 unsigned EltsFromV2 = 0;
1757 unsigned V2EltIdx0 = 0;
1758 unsigned CurrElt = 0;
1759 unsigned MaxElts = VecVT.getVectorNumElements();
1760 unsigned PrevElt = 0;
1762 bool monotonic = true;
1765 if (EltVT == MVT::i8) {
1767 } else if (EltVT == MVT::i16) {
1769 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1771 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1774 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1776 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1777 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1778 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1781 if (SrcElt >= V2EltIdx0) {
1782 if (1 >= (++EltsFromV2)) {
1783 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1785 } else if (CurrElt != SrcElt) {
1793 if (PrevElt > 0 && SrcElt < MaxElts) {
1794 if ((PrevElt == SrcElt - 1)
1795 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1802 } else if (PrevElt == 0) {
1803 // First time through, need to keep track of previous element
1806 // This isn't a rotation, takes elements from vector 2
1813 if (EltsFromV2 == 1 && monotonic) {
1814 // Compute mask and shuffle
1815 MachineFunction &MF = DAG.getMachineFunction();
1816 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819 // Initialize temporary register to 0
1820 SDValue InitTempReg =
1821 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822 // Copy register's contents as index in SHUFFLE_MASK:
1823 SDValue ShufMaskOp =
1824 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1825 DAG.getTargetConstant(V2Elt, MVT::i32),
1826 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827 // Use shuffle mask in SHUFB synthetic instruction:
1828 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1829 } else if (rotate) {
1830 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1832 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1833 V1, DAG.getConstant(rotamt, MVT::i16));
1835 // Convert the SHUFFLE_VECTOR mask's input element units to the
1837 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1839 SmallVector<SDValue, 16> ResultMask;
1840 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1842 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1845 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1847 for (unsigned j = 0; j < BytesPerElement; ++j) {
1848 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1853 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1854 &ResultMask[0], ResultMask.size());
1855 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1859 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1860 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1862 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1863 // For a constant, build the appropriate constant vector, which will
1864 // eventually simplify to a vector register load.
1866 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1867 SmallVector<SDValue, 16> ConstVecValues;
1871 // Create a constant vector:
1872 switch (Op.getValueType().getSimpleVT()) {
1873 default: assert(0 && "Unexpected constant value type in "
1874 "LowerSCALAR_TO_VECTOR");
1875 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1876 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1877 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1878 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1879 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1880 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1883 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1884 for (size_t j = 0; j < n_copies; ++j)
1885 ConstVecValues.push_back(CValue);
1887 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1888 &ConstVecValues[0], ConstVecValues.size());
1890 // Otherwise, copy the value from one register to another:
1891 switch (Op0.getValueType().getSimpleVT()) {
1892 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1899 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1906 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1907 MVT VT = Op.getValueType();
1908 SDValue N = Op.getOperand(0);
1909 SDValue Elt = Op.getOperand(1);
1912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1913 // Constant argument:
1914 int EltNo = (int) C->getZExtValue();
1917 if (VT == MVT::i8 && EltNo >= 16)
1918 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1919 else if (VT == MVT::i16 && EltNo >= 8)
1920 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1921 else if (VT == MVT::i32 && EltNo >= 4)
1922 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1923 else if (VT == MVT::i64 && EltNo >= 2)
1924 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1926 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1927 // i32 and i64: Element 0 is the preferred slot
1928 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1931 // Need to generate shuffle mask and extract:
1932 int prefslot_begin = -1, prefslot_end = -1;
1933 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1935 switch (VT.getSimpleVT()) {
1937 assert(false && "Invalid value type!");
1939 prefslot_begin = prefslot_end = 3;
1943 prefslot_begin = 2; prefslot_end = 3;
1948 prefslot_begin = 0; prefslot_end = 3;
1953 prefslot_begin = 0; prefslot_end = 7;
1958 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1959 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1961 unsigned int ShufBytes[16];
1962 for (int i = 0; i < 16; ++i) {
1963 // zero fill uppper part of preferred slot, don't care about the
1965 unsigned int mask_val;
1966 if (i <= prefslot_end) {
1968 ((i < prefslot_begin)
1970 : elt_byte + (i - prefslot_begin));
1972 ShufBytes[i] = mask_val;
1974 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1977 SDValue ShufMask[4];
1978 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1979 unsigned bidx = i * 4;
1980 unsigned int bits = ((ShufBytes[bidx] << 24) |
1981 (ShufBytes[bidx+1] << 16) |
1982 (ShufBytes[bidx+2] << 8) |
1984 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1987 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1989 sizeof(ShufMask) / sizeof(ShufMask[0]));
1991 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1992 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1993 N, N, ShufMaskVec));
1995 // Variable index: Rotate the requested element into slot 0, then replicate
1996 // slot 0 across the vector
1997 MVT VecVT = N.getValueType();
1998 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1999 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2003 // Make life easier by making sure the index is zero-extended to i32
2004 if (Elt.getValueType() != MVT::i32)
2005 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2007 // Scale the index to a bit/byte shift quantity
2009 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2010 unsigned scaleShift = scaleFactor.logBase2();
2013 if (scaleShift > 0) {
2014 // Scale the shift factor:
2015 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2016 DAG.getConstant(scaleShift, MVT::i32));
2019 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2021 // Replicate the bytes starting at byte 0 across the entire vector (for
2022 // consistency with the notion of a unified register set)
2025 switch (VT.getSimpleVT()) {
2027 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2031 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2032 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2037 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2038 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2044 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2045 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2051 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2052 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2053 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2054 loFactor, hiFactor);
2059 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2060 DAG.getNode(SPUISD::SHUFB, VecVT,
2061 vecShift, vecShift, replicate));
2067 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2068 SDValue VecOp = Op.getOperand(0);
2069 SDValue ValOp = Op.getOperand(1);
2070 SDValue IdxOp = Op.getOperand(2);
2071 MVT VT = Op.getValueType();
2073 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2074 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2076 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2077 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2078 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2079 DAG.getRegister(SPU::R1, PtrVT),
2080 DAG.getConstant(CN->getSExtValue(), PtrVT));
2081 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2084 DAG.getNode(SPUISD::SHUFB, VT,
2085 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2087 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2092 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2093 const TargetLowering &TLI)
2095 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2096 MVT ShiftVT = TLI.getShiftAmountTy();
2098 assert(Op.getValueType() == MVT::i8);
2101 assert(0 && "Unhandled i8 math operator");
2105 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2107 SDValue N1 = Op.getOperand(1);
2108 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2109 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2110 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2111 DAG.getNode(Opc, MVT::i16, N0, N1));
2116 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2118 SDValue N1 = Op.getOperand(1);
2119 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2120 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2121 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2122 DAG.getNode(Opc, MVT::i16, N0, N1));
2126 SDValue N1 = Op.getOperand(1);
2128 N0 = (N0.getOpcode() != ISD::Constant
2129 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2130 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2132 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2135 N1 = (N1.getOpcode() != ISD::Constant
2136 ? DAG.getNode(N1Opc, ShiftVT, N1)
2137 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2138 TLI.getShiftAmountTy()));
2140 DAG.getNode(ISD::OR, MVT::i16, N0,
2141 DAG.getNode(ISD::SHL, MVT::i16,
2142 N0, DAG.getConstant(8, MVT::i32)));
2143 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2144 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2148 SDValue N1 = Op.getOperand(1);
2150 N0 = (N0.getOpcode() != ISD::Constant
2151 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2152 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2154 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2157 N1 = (N1.getOpcode() != ISD::Constant
2158 ? DAG.getNode(N1Opc, ShiftVT, N1)
2159 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2160 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2161 DAG.getNode(Opc, MVT::i16, N0, N1));
2164 SDValue N1 = Op.getOperand(1);
2166 N0 = (N0.getOpcode() != ISD::Constant
2167 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2168 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2170 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2173 N1 = (N1.getOpcode() != ISD::Constant
2174 ? DAG.getNode(N1Opc, ShiftVT, N1)
2175 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2177 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2178 DAG.getNode(Opc, MVT::i16, N0, N1));
2181 SDValue N1 = Op.getOperand(1);
2183 N0 = (N0.getOpcode() != ISD::Constant
2184 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2185 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2187 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2188 N1 = (N1.getOpcode() != ISD::Constant
2189 ? DAG.getNode(N1Opc, MVT::i16, N1)
2190 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2192 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2193 DAG.getNode(Opc, MVT::i16, N0, N1));
2201 //! Generate the carry-generate shuffle mask.
2202 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2203 SmallVector<SDValue, 16 > ShufBytes;
2205 // Create the shuffle mask for "rotating" the borrow up one register slot
2206 // once the borrow is generated.
2207 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2208 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2209 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2210 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2212 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2213 &ShufBytes[0], ShufBytes.size());
2216 //! Generate the borrow-generate shuffle mask
2217 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2218 SmallVector<SDValue, 16 > ShufBytes;
2220 // Create the shuffle mask for "rotating" the borrow up one register slot
2221 // once the borrow is generated.
2222 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2223 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2224 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2225 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2227 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2228 &ShufBytes[0], ShufBytes.size());
2231 //! Lower byte immediate operations for v16i8 vectors:
2233 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2236 MVT VT = Op.getValueType();
2238 ConstVec = Op.getOperand(0);
2239 Arg = Op.getOperand(1);
2240 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2241 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2242 ConstVec = ConstVec.getOperand(0);
2244 ConstVec = Op.getOperand(1);
2245 Arg = Op.getOperand(0);
2246 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2247 ConstVec = ConstVec.getOperand(0);
2252 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2253 uint64_t VectorBits[2];
2254 uint64_t UndefBits[2];
2255 uint64_t SplatBits, SplatUndef;
2258 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2259 && isConstantSplat(VectorBits, UndefBits,
2260 VT.getVectorElementType().getSizeInBits(),
2261 SplatBits, SplatUndef, SplatSize)) {
2263 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2264 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2266 // Turn the BUILD_VECTOR into a set of target constants:
2267 for (size_t i = 0; i < tcVecSize; ++i)
2270 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2271 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2274 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2275 // lowered. Return the operation, rather than a null SDValue.
2279 //! Custom lowering for CTPOP (count population)
2281 Custom lowering code that counts the number ones in the input
2282 operand. SPU has such an instruction, but it counts the number of
2283 ones per byte, which then have to be accumulated.
2285 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2286 MVT VT = Op.getValueType();
2287 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2289 switch (VT.getSimpleVT()) {
2291 assert(false && "Invalid value type!");
2293 SDValue N = Op.getOperand(0);
2294 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2296 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2297 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2299 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2303 MachineFunction &MF = DAG.getMachineFunction();
2304 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2306 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2308 SDValue N = Op.getOperand(0);
2309 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2310 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2311 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2313 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2314 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2316 // CNTB_result becomes the chain to which all of the virtual registers
2317 // CNTB_reg, SUM1_reg become associated:
2318 SDValue CNTB_result =
2319 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2321 SDValue CNTB_rescopy =
2322 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2324 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2326 return DAG.getNode(ISD::AND, MVT::i16,
2327 DAG.getNode(ISD::ADD, MVT::i16,
2328 DAG.getNode(ISD::SRL, MVT::i16,
2335 MachineFunction &MF = DAG.getMachineFunction();
2336 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2338 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2339 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2341 SDValue N = Op.getOperand(0);
2342 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2343 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2344 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2345 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2347 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2348 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2350 // CNTB_result becomes the chain to which all of the virtual registers
2351 // CNTB_reg, SUM1_reg become associated:
2352 SDValue CNTB_result =
2353 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2355 SDValue CNTB_rescopy =
2356 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2359 DAG.getNode(ISD::SRL, MVT::i32,
2360 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2363 DAG.getNode(ISD::ADD, MVT::i32,
2364 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2366 SDValue Sum1_rescopy =
2367 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2370 DAG.getNode(ISD::SRL, MVT::i32,
2371 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2374 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2375 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2377 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2389 DAGCombine does the same basic reduction: convert the double to i64 and mask
2390 off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
2391 CellSPU has to legalize. Hence, the custom lowering.
2394 static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
2395 MVT OpVT = Op.getValueType();
2396 MVT IntVT(MVT::i64);
2397 SDValue Op0 = Op.getOperand(0);
2399 assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
2402 DAG.getNode(ISD::AND, IntVT,
2403 DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
2404 DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
2406 return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
2409 //! Lower ISD::SETCC
2411 This handles MVT::f64 (double floating point) condition lowering
2414 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2415 const TargetLowering &TLI) {
2416 SDValue lhs = Op.getOperand(0);
2417 SDValue rhs = Op.getOperand(1);
2418 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
2419 MVT lhsVT = lhs.getValueType();
2420 SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
2422 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2423 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2425 switch (CC->get()) {
2432 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2436 SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
2438 DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
2440 return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
2443 SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
2445 DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
2447 return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
2456 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2464 //! Lower ISD::SELECT_CC
2466 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2469 \note Need to revisit this in the future: if the code path through the true
2470 and false value computations is longer than the latency of a branch (6
2471 cycles), then it would be more advantageous to branch and insert a new basic
2472 block and branch on the condition. However, this code does not make that
2473 assumption, given the simplisitc uses so far.
2476 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2477 const TargetLowering &TLI) {
2478 MVT VT = Op.getValueType();
2479 SDValue lhs = Op.getOperand(0);
2480 SDValue rhs = Op.getOperand(1);
2481 SDValue trueval = Op.getOperand(2);
2482 SDValue falseval = Op.getOperand(3);
2483 SDValue condition = Op.getOperand(4);
2485 // NOTE: SELB's arguments: $rA, $rB, $mask
2487 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2488 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2489 // condition was true and 0s where the condition was false. Hence, the
2490 // arguments to SELB get reversed.
2492 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2493 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2494 // with another "cannot select select_cc" assert:
2496 SDValue compare = DAG.getNode(ISD::SETCC,
2497 TLI.getSetCCResultType(Op.getValueType()),
2498 lhs, rhs, condition);
2499 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2502 //! Custom lower ISD::TRUNCATE
2503 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2505 MVT VT = Op.getValueType();
2506 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2507 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2509 SDValue Op0 = Op.getOperand(0);
2510 MVT Op0VT = Op0.getValueType();
2511 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2513 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2514 // Create shuffle mask, least significant doubleword of quadword
2515 unsigned maskHigh = 0x08090a0b;
2516 unsigned maskLow = 0x0c0d0e0f;
2517 // Use a shuffle to perform the truncation
2518 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2519 DAG.getConstant(maskHigh, MVT::i32),
2520 DAG.getConstant(maskLow, MVT::i32),
2521 DAG.getConstant(maskHigh, MVT::i32),
2522 DAG.getConstant(maskLow, MVT::i32));
2525 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2527 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2528 PromoteScalar, PromoteScalar, shufMask);
2530 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2531 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2534 return SDValue(); // Leave the truncate unmolested
2537 //! Custom (target-specific) lowering entry point
2539 This is where LLVM's DAG selection process calls to do target-specific
2543 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2545 unsigned Opc = (unsigned) Op.getOpcode();
2546 MVT VT = Op.getValueType();
2550 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2551 cerr << "Op.getOpcode() = " << Opc << "\n";
2552 cerr << "*Op.getNode():\n";
2553 Op.getNode()->dump();
2560 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2562 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2563 case ISD::ConstantPool:
2564 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2565 case ISD::GlobalAddress:
2566 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2567 case ISD::JumpTable:
2568 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2570 return LowerConstant(Op, DAG);
2571 case ISD::ConstantFP:
2572 return LowerConstantFP(Op, DAG);
2573 case ISD::FORMAL_ARGUMENTS:
2574 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2576 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2578 return LowerRET(Op, DAG, getTargetMachine());
2580 // i8, i64 math ops:
2589 return LowerI8Math(Op, DAG, Opc, *this);
2594 return LowerFABS(Op, DAG);
2596 // Vector-related lowering.
2597 case ISD::BUILD_VECTOR:
2598 return SPU::LowerBUILD_VECTOR(Op, DAG);
2599 case ISD::SCALAR_TO_VECTOR:
2600 return LowerSCALAR_TO_VECTOR(Op, DAG);
2601 case ISD::VECTOR_SHUFFLE:
2602 return LowerVECTOR_SHUFFLE(Op, DAG);
2603 case ISD::EXTRACT_VECTOR_ELT:
2604 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2605 case ISD::INSERT_VECTOR_ELT:
2606 return LowerINSERT_VECTOR_ELT(Op, DAG);
2608 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2612 return LowerByteImmed(Op, DAG);
2614 // Vector and i8 multiply:
2617 return LowerI8Math(Op, DAG, Opc, *this);
2620 return LowerCTPOP(Op, DAG);
2622 case ISD::SELECT_CC:
2623 return LowerSELECT_CC(Op, DAG, *this);
2626 return LowerSETCC(Op, DAG, *this);
2629 return LowerTRUNCATE(Op, DAG);
2635 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2636 SmallVectorImpl<SDValue>&Results,
2640 unsigned Opc = (unsigned) N->getOpcode();
2641 MVT OpVT = N->getValueType(0);
2645 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2646 cerr << "Op.getOpcode() = " << Opc << "\n";
2647 cerr << "*Op.getNode():\n";
2655 /* Otherwise, return unchanged */
2658 //===----------------------------------------------------------------------===//
2659 // Target Optimization Hooks
2660 //===----------------------------------------------------------------------===//
2663 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2666 TargetMachine &TM = getTargetMachine();
2668 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2669 SelectionDAG &DAG = DCI.DAG;
2670 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2671 MVT NodeVT = N->getValueType(0); // The node's value type
2672 MVT Op0VT = Op0.getValueType(); // The first operand's result
2673 SDValue Result; // Initially, empty result
2675 switch (N->getOpcode()) {
2678 SDValue Op1 = N->getOperand(1);
2680 if (Op0.getOpcode() == SPUISD::IndirectAddr
2681 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2682 // Normalize the operands to reduce repeated code
2683 SDValue IndirectArg = Op0, AddArg = Op1;
2685 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2690 if (isa<ConstantSDNode>(AddArg)) {
2691 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2692 SDValue IndOp1 = IndirectArg.getOperand(1);
2694 if (CN0->isNullValue()) {
2695 // (add (SPUindirect <arg>, <arg>), 0) ->
2696 // (SPUindirect <arg>, <arg>)
2698 #if !defined(NDEBUG)
2699 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2701 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2702 << "With: (SPUindirect <arg>, <arg>)\n";
2707 } else if (isa<ConstantSDNode>(IndOp1)) {
2708 // (add (SPUindirect <arg>, <const>), <const>) ->
2709 // (SPUindirect <arg>, <const + const>)
2710 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2711 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2712 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2714 #if !defined(NDEBUG)
2715 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2717 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2718 << "), " << CN0->getSExtValue() << ")\n"
2719 << "With: (SPUindirect <arg>, "
2720 << combinedConst << ")\n";
2724 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2725 IndirectArg, combinedValue);
2731 case ISD::SIGN_EXTEND:
2732 case ISD::ZERO_EXTEND:
2733 case ISD::ANY_EXTEND: {
2734 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2735 // (any_extend (SPUextract_elt0 <arg>)) ->
2736 // (SPUextract_elt0 <arg>)
2737 // Types must match, however...
2738 #if !defined(NDEBUG)
2739 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2740 cerr << "\nReplace: ";
2743 Op0.getNode()->dump(&DAG);
2752 case SPUISD::IndirectAddr: {
2753 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2754 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2755 if (CN != 0 && CN->getZExtValue() == 0) {
2756 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2757 // (SPUaform <addr>, 0)
2759 DEBUG(cerr << "Replace: ");
2760 DEBUG(N->dump(&DAG));
2761 DEBUG(cerr << "\nWith: ");
2762 DEBUG(Op0.getNode()->dump(&DAG));
2763 DEBUG(cerr << "\n");
2767 } else if (Op0.getOpcode() == ISD::ADD) {
2768 SDValue Op1 = N->getOperand(1);
2769 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2770 // (SPUindirect (add <arg>, <arg>), 0) ->
2771 // (SPUindirect <arg>, <arg>)
2772 if (CN1->isNullValue()) {
2774 #if !defined(NDEBUG)
2775 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2777 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2778 << "With: (SPUindirect <arg>, <arg>)\n";
2782 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2783 Op0.getOperand(0), Op0.getOperand(1));
2789 case SPUISD::SHLQUAD_L_BITS:
2790 case SPUISD::SHLQUAD_L_BYTES:
2791 case SPUISD::VEC_SHL:
2792 case SPUISD::VEC_SRL:
2793 case SPUISD::VEC_SRA:
2794 case SPUISD::ROTBYTES_LEFT: {
2795 SDValue Op1 = N->getOperand(1);
2797 // Kill degenerate vector shifts:
2798 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2799 if (CN->isNullValue()) {
2805 case SPUISD::PREFSLOT2VEC: {
2806 switch (Op0.getOpcode()) {
2809 case ISD::ANY_EXTEND:
2810 case ISD::ZERO_EXTEND:
2811 case ISD::SIGN_EXTEND: {
2812 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2814 // but only if the SPUprefslot2vec and <arg> types match.
2815 SDValue Op00 = Op0.getOperand(0);
2816 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2817 SDValue Op000 = Op00.getOperand(0);
2818 if (Op000.getValueType() == NodeVT) {
2824 case SPUISD::VEC2PREFSLOT: {
2825 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2827 Result = Op0.getOperand(0);
2835 // Otherwise, return unchanged.
2837 if (Result.getNode()) {
2838 DEBUG(cerr << "\nReplace.SPU: ");
2839 DEBUG(N->dump(&DAG));
2840 DEBUG(cerr << "\nWith: ");
2841 DEBUG(Result.getNode()->dump(&DAG));
2842 DEBUG(cerr << "\n");
2849 //===----------------------------------------------------------------------===//
2850 // Inline Assembly Support
2851 //===----------------------------------------------------------------------===//
2853 /// getConstraintType - Given a constraint letter, return the type of
2854 /// constraint it is for this target.
2855 SPUTargetLowering::ConstraintType
2856 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2857 if (ConstraintLetter.size() == 1) {
2858 switch (ConstraintLetter[0]) {
2865 return C_RegisterClass;
2868 return TargetLowering::getConstraintType(ConstraintLetter);
2871 std::pair<unsigned, const TargetRegisterClass*>
2872 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2875 if (Constraint.size() == 1) {
2876 // GCC RS6000 Constraint Letters
2877 switch (Constraint[0]) {
2881 return std::make_pair(0U, SPU::R64CRegisterClass);
2882 return std::make_pair(0U, SPU::R32CRegisterClass);
2885 return std::make_pair(0U, SPU::R32FPRegisterClass);
2886 else if (VT == MVT::f64)
2887 return std::make_pair(0U, SPU::R64FPRegisterClass);
2890 return std::make_pair(0U, SPU::GPRCRegisterClass);
2894 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2897 //! Compute used/known bits for a SPU operand
2899 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2903 const SelectionDAG &DAG,
2904 unsigned Depth ) const {
2906 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2908 switch (Op.getOpcode()) {
2910 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2916 case SPUISD::PREFSLOT2VEC:
2917 case SPUISD::LDRESULT:
2918 case SPUISD::VEC2PREFSLOT:
2919 case SPUISD::SHLQUAD_L_BITS:
2920 case SPUISD::SHLQUAD_L_BYTES:
2921 case SPUISD::VEC_SHL:
2922 case SPUISD::VEC_SRL:
2923 case SPUISD::VEC_SRA:
2924 case SPUISD::VEC_ROTL:
2925 case SPUISD::VEC_ROTR:
2926 case SPUISD::ROTBYTES_LEFT:
2927 case SPUISD::SELECT_MASK:
2934 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2935 unsigned Depth) const {
2936 switch (Op.getOpcode()) {
2941 MVT VT = Op.getValueType();
2943 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2946 return VT.getSizeInBits();
2951 // LowerAsmOperandForConstraint
2953 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2954 char ConstraintLetter,
2956 std::vector<SDValue> &Ops,
2957 SelectionDAG &DAG) const {
2958 // Default, for the time being, to the base class handler
2959 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2963 /// isLegalAddressImmediate - Return true if the integer value can be used
2964 /// as the offset of the target addressing mode.
2965 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2966 const Type *Ty) const {
2967 // SPU's addresses are 256K:
2968 return (V > -(1 << 18) && V < (1 << 18) - 1);
2971 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2976 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2977 // The SPU target isn't yet aware of offsets.