1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
83 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
87 // Fold away setcc operations if possible.
90 // Use _setjmp/_longjmp instead of setjmp/longjmp.
91 setUseUnderscoreSetJmp(true);
92 setUseUnderscoreLongJmp(true);
94 // Set up the SPU's register classes:
95 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
96 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
97 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
98 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
99 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
100 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
101 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
103 // SPU has no sign or zero extended loads for i1, i8, i16:
104 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
105 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
108 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
109 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
111 // SPU constant load actions are custom lowered:
112 setOperationAction(ISD::Constant, MVT::i64, Custom);
113 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
114 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
116 // SPU's loads and stores have to be custom lowered:
117 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
119 MVT VT = (MVT::SimpleValueType)sctype;
121 setOperationAction(ISD::LOAD, VT, Custom);
122 setOperationAction(ISD::STORE, VT, Custom);
123 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
125 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
127 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
128 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
129 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
131 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
132 MVT StoreVT = (MVT::SimpleValueType) stype;
133 setTruncStoreAction(VT, StoreVT, Expand);
137 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
139 MVT VT = (MVT::SimpleValueType) sctype;
141 setOperationAction(ISD::LOAD, VT, Custom);
142 setOperationAction(ISD::STORE, VT, Custom);
144 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
145 MVT StoreVT = (MVT::SimpleValueType) stype;
146 setTruncStoreAction(VT, StoreVT, Expand);
150 // Expand the jumptable branches
151 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
152 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
154 // Custom lower SELECT_CC for most cases, but expand by default
155 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
156 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
157 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
158 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
159 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
161 // SPU has no intrinsics for these particular operations:
162 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
164 // SPU has no SREM/UREM instructions
165 setOperationAction(ISD::SREM, MVT::i32, Expand);
166 setOperationAction(ISD::UREM, MVT::i32, Expand);
167 setOperationAction(ISD::SREM, MVT::i64, Expand);
168 setOperationAction(ISD::UREM, MVT::i64, Expand);
170 // We don't support sin/cos/sqrt/fmod
171 setOperationAction(ISD::FSIN , MVT::f64, Expand);
172 setOperationAction(ISD::FCOS , MVT::f64, Expand);
173 setOperationAction(ISD::FREM , MVT::f64, Expand);
174 setOperationAction(ISD::FSIN , MVT::f32, Expand);
175 setOperationAction(ISD::FCOS , MVT::f32, Expand);
176 setOperationAction(ISD::FREM , MVT::f32, Expand);
178 // If we're enabling GP optimizations, use hardware square root
179 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
180 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
182 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
183 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
185 // SPU can do rotate right and left, so legalize it... but customize for i8
186 // because instructions don't exist.
188 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
190 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
191 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
192 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
194 setOperationAction(ISD::ROTL, MVT::i32, Legal);
195 setOperationAction(ISD::ROTL, MVT::i16, Legal);
196 setOperationAction(ISD::ROTL, MVT::i8, Custom);
198 // SPU has no native version of shift left/right for i8
199 setOperationAction(ISD::SHL, MVT::i8, Custom);
200 setOperationAction(ISD::SRL, MVT::i8, Custom);
201 setOperationAction(ISD::SRA, MVT::i8, Custom);
203 // Make these operations legal and handle them during instruction selection:
204 setOperationAction(ISD::SHL, MVT::i64, Legal);
205 setOperationAction(ISD::SRL, MVT::i64, Legal);
206 setOperationAction(ISD::SRA, MVT::i64, Legal);
208 // Custom lower i8, i32 and i64 multiplications
209 setOperationAction(ISD::MUL, MVT::i8, Custom);
210 setOperationAction(ISD::MUL, MVT::i32, Legal);
211 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
213 // Need to custom handle (some) common i8, i64 math ops
214 setOperationAction(ISD::ADD, MVT::i8, Custom);
215 setOperationAction(ISD::ADD, MVT::i64, Custom);
216 setOperationAction(ISD::SUB, MVT::i8, Custom);
217 setOperationAction(ISD::SUB, MVT::i64, Custom);
219 // SPU does not have BSWAP. It does have i32 support CTLZ.
220 // CTPOP has to be custom lowered.
221 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
222 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
224 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
225 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
226 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
227 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
229 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
230 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
232 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
234 // SPU has a version of select that implements (a&~c)|(b&c), just like
235 // select ought to work:
236 setOperationAction(ISD::SELECT, MVT::i8, Legal);
237 setOperationAction(ISD::SELECT, MVT::i16, Legal);
238 setOperationAction(ISD::SELECT, MVT::i32, Legal);
239 setOperationAction(ISD::SELECT, MVT::i64, Legal);
241 setOperationAction(ISD::SETCC, MVT::i8, Legal);
242 setOperationAction(ISD::SETCC, MVT::i16, Legal);
243 setOperationAction(ISD::SETCC, MVT::i32, Legal);
244 setOperationAction(ISD::SETCC, MVT::i64, Legal);
246 // Zero extension and sign extension for i64 have to be
248 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
249 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
251 // Custom lower i128 -> i64 truncates
252 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
254 // SPU has a legal FP -> signed INT instruction
255 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
256 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
257 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
258 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
260 // FDIV on SPU requires custom lowering
261 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
263 // SPU has [U|S]INT_TO_FP
264 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
265 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
266 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
267 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
268 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
269 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
273 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
274 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
275 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
276 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
278 // We cannot sextinreg(i1). Expand to shifts.
279 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
281 // Support label based line numbers.
282 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
283 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
285 // We want to legalize GlobalAddress and ConstantPool nodes into the
286 // appropriate instructions to materialize the address.
287 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
289 MVT VT = (MVT::SimpleValueType)sctype;
291 setOperationAction(ISD::GlobalAddress, VT, Custom);
292 setOperationAction(ISD::ConstantPool, VT, Custom);
293 setOperationAction(ISD::JumpTable, VT, Custom);
296 // RET must be custom lowered, to meet ABI requirements
297 setOperationAction(ISD::RET, MVT::Other, Custom);
299 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
300 setOperationAction(ISD::VASTART , MVT::Other, Custom);
302 // Use the default implementation.
303 setOperationAction(ISD::VAARG , MVT::Other, Expand);
304 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
305 setOperationAction(ISD::VAEND , MVT::Other, Expand);
306 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
307 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
308 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
311 // Cell SPU has instructions for converting between i64 and fp.
312 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
313 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
315 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
316 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
318 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
319 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
321 // First set operation action for all vector types to expand. Then we
322 // will selectively turn on ones that can be effectively codegen'd.
323 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
324 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
325 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
326 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
327 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
330 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
331 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
332 MVT VT = (MVT::SimpleValueType)i;
334 // add/sub are legal for all supported vector VT's.
335 setOperationAction(ISD::ADD , VT, Legal);
336 setOperationAction(ISD::SUB , VT, Legal);
337 // mul has to be custom lowered.
338 // TODO: v2i64 vector multiply
339 setOperationAction(ISD::MUL , VT, Legal);
341 setOperationAction(ISD::AND , VT, Legal);
342 setOperationAction(ISD::OR , VT, Legal);
343 setOperationAction(ISD::XOR , VT, Legal);
344 setOperationAction(ISD::LOAD , VT, Legal);
345 setOperationAction(ISD::SELECT, VT, Legal);
346 setOperationAction(ISD::STORE, VT, Legal);
348 // These operations need to be expanded:
349 setOperationAction(ISD::SDIV, VT, Expand);
350 setOperationAction(ISD::SREM, VT, Expand);
351 setOperationAction(ISD::UDIV, VT, Expand);
352 setOperationAction(ISD::UREM, VT, Expand);
354 // Custom lower build_vector, constant pool spills, insert and
355 // extract vector elements:
356 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
357 setOperationAction(ISD::ConstantPool, VT, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
359 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
360 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
361 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
364 setOperationAction(ISD::AND, MVT::v16i8, Custom);
365 setOperationAction(ISD::OR, MVT::v16i8, Custom);
366 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
367 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
369 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
371 setShiftAmountType(MVT::i32);
372 setBooleanContents(ZeroOrNegativeOneBooleanContent);
374 setStackPointerRegisterToSaveRestore(SPU::R1);
376 // We have target-specific dag combine patterns for the following nodes:
377 setTargetDAGCombine(ISD::ADD);
378 setTargetDAGCombine(ISD::ZERO_EXTEND);
379 setTargetDAGCombine(ISD::SIGN_EXTEND);
380 setTargetDAGCombine(ISD::ANY_EXTEND);
382 computeRegisterProperties();
384 // Set pre-RA register scheduler default to BURR, which produces slightly
385 // better code than the default (could also be TDRR, but TargetLowering.h
386 // needs a mod to support that model):
387 setSchedulingPreference(SchedulingForRegPressure);
391 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
393 if (node_names.empty()) {
394 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
395 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
396 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
397 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
398 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
399 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
400 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
401 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
402 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
403 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
404 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
405 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
406 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
407 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
408 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
409 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
410 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
411 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
412 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
413 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
414 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
415 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
416 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
417 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
418 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
419 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
420 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
423 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
425 return ((i != node_names.end()) ? i->second : 0);
428 //===----------------------------------------------------------------------===//
429 // Return the Cell SPU's SETCC result type
430 //===----------------------------------------------------------------------===//
432 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
433 // i16 and i32 are valid SETCC result types
434 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
437 //===----------------------------------------------------------------------===//
438 // Calling convention code:
439 //===----------------------------------------------------------------------===//
441 #include "SPUGenCallingConv.inc"
443 //===----------------------------------------------------------------------===//
444 // LowerOperation implementation
445 //===----------------------------------------------------------------------===//
447 /// Custom lower loads for CellSPU
449 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
450 within a 16-byte block, we have to rotate to extract the requested element.
452 For extending loads, we also want to ensure that the following sequence is
453 emitted, e.g. for MVT::f32 extending load to MVT::f64:
457 %2 v16i8,ch = rotate %1
458 %3 v4f8, ch = bitconvert %2
459 %4 f32 = vec2perfslot %3
460 %5 f64 = fp_extend %4
464 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
465 LoadSDNode *LN = cast<LoadSDNode>(Op);
466 SDValue the_chain = LN->getChain();
467 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
468 MVT InVT = LN->getMemoryVT();
469 MVT OutVT = Op.getValueType();
470 ISD::LoadExtType ExtType = LN->getExtensionType();
471 unsigned alignment = LN->getAlignment();
472 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
474 switch (LN->getAddressingMode()) {
475 case ISD::UNINDEXED: {
477 SDValue basePtr = LN->getBasePtr();
480 if (alignment == 16) {
483 // Special cases for a known aligned load to simplify the base pointer
484 // and the rotation amount:
485 if (basePtr.getOpcode() == ISD::ADD
486 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
487 // Known offset into basePtr
488 int64_t offset = CN->getSExtValue();
489 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
494 rotate = DAG.getConstant(rotamt, MVT::i16);
496 // Simplify the base pointer for this case:
497 basePtr = basePtr.getOperand(0);
498 if ((offset & ~0xf) > 0) {
499 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
501 DAG.getConstant((offset & ~0xf), PtrVT));
503 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
504 || (basePtr.getOpcode() == SPUISD::IndirectAddr
505 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
506 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
507 // Plain aligned a-form address: rotate into preferred slot
508 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
509 int64_t rotamt = -vtm->prefslot_byte;
512 rotate = DAG.getConstant(rotamt, MVT::i16);
514 // Offset the rotate amount by the basePtr and the preferred slot
516 int64_t rotamt = -vtm->prefslot_byte;
519 rotate = DAG.getNode(ISD::ADD, PtrVT,
521 DAG.getConstant(rotamt, PtrVT));
524 // Unaligned load: must be more pessimistic about addressing modes:
525 if (basePtr.getOpcode() == ISD::ADD) {
526 MachineFunction &MF = DAG.getMachineFunction();
527 MachineRegisterInfo &RegInfo = MF.getRegInfo();
528 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
531 SDValue Op0 = basePtr.getOperand(0);
532 SDValue Op1 = basePtr.getOperand(1);
534 if (isa<ConstantSDNode>(Op1)) {
535 // Convert the (add <ptr>, <const>) to an indirect address contained
536 // in a register. Note that this is done because we need to avoid
537 // creating a 0(reg) d-form address due to the SPU's block loads.
538 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
539 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
540 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
542 // Convert the (add <arg1>, <arg2>) to an indirect address, which
543 // will likely be lowered as a reg(reg) x-form address.
544 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
547 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
549 DAG.getConstant(0, PtrVT));
552 // Offset the rotate amount by the basePtr and the preferred slot
554 rotate = DAG.getNode(ISD::ADD, PtrVT,
556 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
559 // Re-emit as a v16i8 vector load
560 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
561 LN->getSrcValue(), LN->getSrcValueOffset(),
562 LN->isVolatile(), 16);
565 the_chain = result.getValue(1);
567 // Rotate into the preferred slot:
568 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
569 result.getValue(0), rotate);
571 // Convert the loaded v16i8 vector to the appropriate vector type
572 // specified by the operand:
573 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
574 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
575 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
577 // Handle extending loads by extending the scalar result:
578 if (ExtType == ISD::SEXTLOAD) {
579 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
580 } else if (ExtType == ISD::ZEXTLOAD) {
581 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
582 } else if (ExtType == ISD::EXTLOAD) {
583 unsigned NewOpc = ISD::ANY_EXTEND;
585 if (OutVT.isFloatingPoint())
586 NewOpc = ISD::FP_EXTEND;
588 result = DAG.getNode(NewOpc, OutVT, result);
591 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
592 SDValue retops[2] = {
597 result = DAG.getNode(SPUISD::LDRESULT, retvts,
598 retops, sizeof(retops) / sizeof(retops[0]));
605 case ISD::LAST_INDEXED_MODE:
606 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
608 cerr << (unsigned) LN->getAddressingMode() << "\n";
616 /// Custom lower stores for CellSPU
618 All CellSPU stores are aligned to 16-byte boundaries, so for elements
619 within a 16-byte block, we have to generate a shuffle to insert the
620 requested element into its place, then store the resulting block.
623 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
624 StoreSDNode *SN = cast<StoreSDNode>(Op);
625 SDValue Value = SN->getValue();
626 MVT VT = Value.getValueType();
627 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
628 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
629 unsigned alignment = SN->getAlignment();
631 switch (SN->getAddressingMode()) {
632 case ISD::UNINDEXED: {
633 // The vector type we really want to load from the 16-byte chunk.
634 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
635 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
637 SDValue alignLoadVec;
638 SDValue basePtr = SN->getBasePtr();
639 SDValue the_chain = SN->getChain();
640 SDValue insertEltOffs;
642 if (alignment == 16) {
645 // Special cases for a known aligned load to simplify the base pointer
646 // and insertion byte:
647 if (basePtr.getOpcode() == ISD::ADD
648 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
649 // Known offset into basePtr
650 int64_t offset = CN->getSExtValue();
652 // Simplify the base pointer for this case:
653 basePtr = basePtr.getOperand(0);
654 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
656 DAG.getConstant((offset & 0xf), PtrVT));
658 if ((offset & ~0xf) > 0) {
659 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
661 DAG.getConstant((offset & ~0xf), PtrVT));
664 // Otherwise, assume it's at byte 0 of basePtr
665 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
667 DAG.getConstant(0, PtrVT));
670 // Unaligned load: must be more pessimistic about addressing modes:
671 if (basePtr.getOpcode() == ISD::ADD) {
672 MachineFunction &MF = DAG.getMachineFunction();
673 MachineRegisterInfo &RegInfo = MF.getRegInfo();
674 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
677 SDValue Op0 = basePtr.getOperand(0);
678 SDValue Op1 = basePtr.getOperand(1);
680 if (isa<ConstantSDNode>(Op1)) {
681 // Convert the (add <ptr>, <const>) to an indirect address contained
682 // in a register. Note that this is done because we need to avoid
683 // creating a 0(reg) d-form address due to the SPU's block loads.
684 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
685 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
686 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
688 // Convert the (add <arg1>, <arg2>) to an indirect address, which
689 // will likely be lowered as a reg(reg) x-form address.
690 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
693 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
695 DAG.getConstant(0, PtrVT));
698 // Insertion point is solely determined by basePtr's contents
699 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
701 DAG.getConstant(0, PtrVT));
704 // Re-emit as a v16i8 vector load
705 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
706 SN->getSrcValue(), SN->getSrcValueOffset(),
707 SN->isVolatile(), 16);
710 the_chain = alignLoadVec.getValue(1);
712 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
713 SDValue theValue = SN->getValue();
717 && (theValue.getOpcode() == ISD::AssertZext
718 || theValue.getOpcode() == ISD::AssertSext)) {
719 // Drill down and get the value for zero- and sign-extended
721 theValue = theValue.getOperand(0);
724 // If the base pointer is already a D-form address, then just create
725 // a new D-form address with a slot offset and the orignal base pointer.
726 // Otherwise generate a D-form address with the slot offset relative
727 // to the stack pointer, which is always aligned.
729 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
730 cerr << "CellSPU LowerSTORE: basePtr = ";
731 basePtr.getNode()->dump(&DAG);
736 SDValue insertEltOp =
737 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
738 SDValue vectorizeOp =
739 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
741 result = DAG.getNode(SPUISD::SHUFB, vecVT,
742 vectorizeOp, alignLoadVec,
743 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
745 result = DAG.getStore(the_chain, result, basePtr,
746 LN->getSrcValue(), LN->getSrcValueOffset(),
747 LN->isVolatile(), LN->getAlignment());
749 #if 0 && !defined(NDEBUG)
750 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
751 const SDValue ¤tRoot = DAG.getRoot();
754 cerr << "------- CellSPU:LowerStore result:\n";
757 DAG.setRoot(currentRoot);
768 case ISD::LAST_INDEXED_MODE:
769 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
771 cerr << (unsigned) SN->getAddressingMode() << "\n";
779 /// Generate the address of a constant pool entry.
781 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
782 MVT PtrVT = Op.getValueType();
783 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
784 Constant *C = CP->getConstVal();
785 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
786 SDValue Zero = DAG.getConstant(0, PtrVT);
787 const TargetMachine &TM = DAG.getTarget();
789 if (TM.getRelocationModel() == Reloc::Static) {
790 if (!ST->usingLargeMem()) {
791 // Just return the SDValue with the constant pool address in it.
792 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
794 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
795 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
796 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
801 "LowerConstantPool: Relocation model other than static"
807 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
808 MVT PtrVT = Op.getValueType();
809 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
810 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
811 SDValue Zero = DAG.getConstant(0, PtrVT);
812 const TargetMachine &TM = DAG.getTarget();
814 if (TM.getRelocationModel() == Reloc::Static) {
815 if (!ST->usingLargeMem()) {
816 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
818 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
819 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
820 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
825 "LowerJumpTable: Relocation model other than static not supported.");
830 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
831 MVT PtrVT = Op.getValueType();
832 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
833 GlobalValue *GV = GSDN->getGlobal();
834 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
835 const TargetMachine &TM = DAG.getTarget();
836 SDValue Zero = DAG.getConstant(0, PtrVT);
838 if (TM.getRelocationModel() == Reloc::Static) {
839 if (!ST->usingLargeMem()) {
840 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
842 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
843 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
844 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
847 cerr << "LowerGlobalAddress: Relocation model other than static not "
856 //! Custom lower i64 integer constants
858 This code inserts all of the necessary juggling that needs to occur to load
859 a 64-bit constant into a register.
862 LowerConstant(SDValue Op, SelectionDAG &DAG) {
863 MVT VT = Op.getValueType();
865 if (VT == MVT::i64) {
866 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
867 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
868 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
869 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
871 cerr << "LowerConstant: unhandled constant type "
881 //! Custom lower double precision floating point constants
883 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
884 MVT VT = Op.getValueType();
886 if (VT == MVT::f64) {
887 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
890 "LowerConstantFP: Node is not ConstantFPSDNode");
892 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
893 SDValue T = DAG.getConstant(dbits, MVT::i64);
894 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
895 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
896 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
903 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
905 MachineFunction &MF = DAG.getMachineFunction();
906 MachineFrameInfo *MFI = MF.getFrameInfo();
907 MachineRegisterInfo &RegInfo = MF.getRegInfo();
908 SmallVector<SDValue, 48> ArgValues;
909 SDValue Root = Op.getOperand(0);
910 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
912 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
913 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
915 unsigned ArgOffset = SPUFrameInfo::minStackSize();
916 unsigned ArgRegIdx = 0;
917 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
919 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
921 // Add DAG nodes to load the arguments or copy them out of registers.
922 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
923 ArgNo != e; ++ArgNo) {
924 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
925 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
928 if (ArgRegIdx < NumArgRegs) {
929 const TargetRegisterClass *ArgRegClass;
931 switch (ObjectVT.getSimpleVT()) {
933 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
934 << ObjectVT.getMVTString()
939 ArgRegClass = &SPU::R8CRegClass;
942 ArgRegClass = &SPU::R16CRegClass;
945 ArgRegClass = &SPU::R32CRegClass;
948 ArgRegClass = &SPU::R64CRegClass;
951 ArgRegClass = &SPU::R32FPRegClass;
954 ArgRegClass = &SPU::R64FPRegClass;
962 ArgRegClass = &SPU::VECREGRegClass;
966 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
967 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
968 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
971 // We need to load the argument to a virtual register if we determined
972 // above that we ran out of physical registers of the appropriate type
973 // or we're forced to do vararg
974 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
975 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
976 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
977 ArgOffset += StackSlotSize;
980 ArgValues.push_back(ArgVal);
982 Root = ArgVal.getOperand(0);
987 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
988 // We will spill (79-3)+1 registers to the stack
989 SmallVector<SDValue, 79-3+1> MemOps;
991 // Create the frame slot
993 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
994 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
995 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
996 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
997 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
998 Root = Store.getOperand(0);
999 MemOps.push_back(Store);
1001 // Increment address by stack slot size for the next stored argument
1002 ArgOffset += StackSlotSize;
1004 if (!MemOps.empty())
1005 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1008 ArgValues.push_back(Root);
1010 // Return the new list of results.
1011 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1012 &ArgValues[0], ArgValues.size());
1015 /// isLSAAddress - Return the immediate to use if the specified
1016 /// value is representable as a LSA address.
1017 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1018 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1021 int Addr = C->getZExtValue();
1022 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1023 (Addr << 14 >> 14) != Addr)
1024 return 0; // Top 14 bits have to be sext of immediate.
1026 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1031 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1032 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1033 SDValue Chain = TheCall->getChain();
1034 SDValue Callee = TheCall->getCallee();
1035 unsigned NumOps = TheCall->getNumArgs();
1036 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1037 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1038 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1040 // Handy pointer type
1041 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1043 // Accumulate how many bytes are to be pushed on the stack, including the
1044 // linkage area, and parameter passing area. According to the SPU ABI,
1045 // we minimally need space for [LR] and [SP]
1046 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1048 // Set up a copy of the stack pointer for use loading and storing any
1049 // arguments that may not fit in the registers available for argument
1051 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1053 // Figure out which arguments are going to go in registers, and which in
1055 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1056 unsigned ArgRegIdx = 0;
1058 // Keep track of registers passing arguments
1059 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1060 // And the arguments passed on the stack
1061 SmallVector<SDValue, 8> MemOpChains;
1063 for (unsigned i = 0; i != NumOps; ++i) {
1064 SDValue Arg = TheCall->getArg(i);
1066 // PtrOff will be used to store the current argument to the stack if a
1067 // register cannot be found for it.
1068 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1069 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1071 switch (Arg.getValueType().getSimpleVT()) {
1072 default: assert(0 && "Unexpected ValueType for argument!");
1076 if (ArgRegIdx != NumArgRegs) {
1077 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1079 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1080 ArgOffset += StackSlotSize;
1085 if (ArgRegIdx != NumArgRegs) {
1086 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1088 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1089 ArgOffset += StackSlotSize;
1098 if (ArgRegIdx != NumArgRegs) {
1099 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1101 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1102 ArgOffset += StackSlotSize;
1108 // Update number of stack bytes actually used, insert a call sequence start
1109 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1110 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1113 if (!MemOpChains.empty()) {
1114 // Adjust the stack pointer for the stack arguments.
1115 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1116 &MemOpChains[0], MemOpChains.size());
1119 // Build a sequence of copy-to-reg nodes chained together with token chain
1120 // and flag operands which copy the outgoing args into the appropriate regs.
1122 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1123 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1125 InFlag = Chain.getValue(1);
1128 SmallVector<SDValue, 8> Ops;
1129 unsigned CallOpc = SPUISD::CALL;
1131 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1132 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1133 // node so that legalize doesn't hack it.
1134 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1135 GlobalValue *GV = G->getGlobal();
1136 MVT CalleeVT = Callee.getValueType();
1137 SDValue Zero = DAG.getConstant(0, PtrVT);
1138 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1140 if (!ST->usingLargeMem()) {
1141 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1142 // style calls, otherwise, external symbols are BRASL calls. This assumes
1143 // that declared/defined symbols are in the same compilation unit and can
1144 // be reached through PC-relative jumps.
1147 // This may be an unsafe assumption for JIT and really large compilation
1149 if (GV->isDeclaration()) {
1150 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1152 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1155 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1157 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1159 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1160 MVT CalleeVT = Callee.getValueType();
1161 SDValue Zero = DAG.getConstant(0, PtrVT);
1162 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1163 Callee.getValueType());
1165 if (!ST->usingLargeMem()) {
1166 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1168 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1170 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1171 // If this is an absolute destination address that appears to be a legal
1172 // local store address, use the munged value.
1173 Callee = SDValue(Dest, 0);
1176 Ops.push_back(Chain);
1177 Ops.push_back(Callee);
1179 // Add argument registers to the end of the list so that they are known live
1181 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1182 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1183 RegsToPass[i].second.getValueType()));
1185 if (InFlag.getNode())
1186 Ops.push_back(InFlag);
1187 // Returns a chain and a flag for retval copy to use.
1188 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1189 &Ops[0], Ops.size());
1190 InFlag = Chain.getValue(1);
1192 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1193 DAG.getIntPtrConstant(0, true), InFlag);
1194 if (TheCall->getValueType(0) != MVT::Other)
1195 InFlag = Chain.getValue(1);
1197 SDValue ResultVals[3];
1198 unsigned NumResults = 0;
1200 // If the call has results, copy the values out of the ret val registers.
1201 switch (TheCall->getValueType(0).getSimpleVT()) {
1202 default: assert(0 && "Unexpected ret value!");
1203 case MVT::Other: break;
1205 if (TheCall->getValueType(1) == MVT::i32) {
1206 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1207 ResultVals[0] = Chain.getValue(0);
1208 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1209 Chain.getValue(2)).getValue(1);
1210 ResultVals[1] = Chain.getValue(0);
1213 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1214 ResultVals[0] = Chain.getValue(0);
1219 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1220 ResultVals[0] = Chain.getValue(0);
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1226 InFlag).getValue(1);
1227 ResultVals[0] = Chain.getValue(0);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1237 InFlag).getValue(1);
1238 ResultVals[0] = Chain.getValue(0);
1243 // If the function returns void, just return the chain.
1244 if (NumResults == 0)
1247 // Otherwise, merge everything together with a MERGE_VALUES node.
1248 ResultVals[NumResults++] = Chain;
1249 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1250 return Res.getValue(Op.getResNo());
1254 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1255 SmallVector<CCValAssign, 16> RVLocs;
1256 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1257 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1258 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1259 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1261 // If this is the first return lowered for this function, add the regs to the
1262 // liveout set for the function.
1263 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1264 for (unsigned i = 0; i != RVLocs.size(); ++i)
1265 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1268 SDValue Chain = Op.getOperand(0);
1271 // Copy the result values into the output registers.
1272 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1273 CCValAssign &VA = RVLocs[i];
1274 assert(VA.isRegLoc() && "Can only return in registers!");
1275 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1276 Flag = Chain.getValue(1);
1280 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1282 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1286 //===----------------------------------------------------------------------===//
1287 // Vector related lowering:
1288 //===----------------------------------------------------------------------===//
1290 static ConstantSDNode *
1291 getVecImm(SDNode *N) {
1292 SDValue OpVal(0, 0);
1294 // Check to see if this buildvec has a single non-undef value in its elements.
1295 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1296 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1297 if (OpVal.getNode() == 0)
1298 OpVal = N->getOperand(i);
1299 else if (OpVal != N->getOperand(i))
1303 if (OpVal.getNode() != 0) {
1304 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1309 return 0; // All UNDEF: use implicit def.; not Constant node
1312 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1313 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1315 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1317 if (ConstantSDNode *CN = getVecImm(N)) {
1318 uint64_t Value = CN->getZExtValue();
1319 if (ValueType == MVT::i64) {
1320 uint64_t UValue = CN->getZExtValue();
1321 uint32_t upper = uint32_t(UValue >> 32);
1322 uint32_t lower = uint32_t(UValue);
1325 Value = Value >> 32;
1327 if (Value <= 0x3ffff)
1328 return DAG.getTargetConstant(Value, ValueType);
1334 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1335 /// and the value fits into a signed 16-bit constant, and if so, return the
1337 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1339 if (ConstantSDNode *CN = getVecImm(N)) {
1340 int64_t Value = CN->getSExtValue();
1341 if (ValueType == MVT::i64) {
1342 uint64_t UValue = CN->getZExtValue();
1343 uint32_t upper = uint32_t(UValue >> 32);
1344 uint32_t lower = uint32_t(UValue);
1347 Value = Value >> 32;
1349 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1350 return DAG.getTargetConstant(Value, ValueType);
1357 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into a signed 10-bit constant, and if so, return the
1360 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1362 if (ConstantSDNode *CN = getVecImm(N)) {
1363 int64_t Value = CN->getSExtValue();
1364 if (ValueType == MVT::i64) {
1365 uint64_t UValue = CN->getZExtValue();
1366 uint32_t upper = uint32_t(UValue >> 32);
1367 uint32_t lower = uint32_t(UValue);
1370 Value = Value >> 32;
1372 if (isS10Constant(Value))
1373 return DAG.getTargetConstant(Value, ValueType);
1379 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1380 /// and the value fits into a signed 8-bit constant, and if so, return the
1383 /// @note: The incoming vector is v16i8 because that's the only way we can load
1384 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1386 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int Value = (int) CN->getZExtValue();
1390 if (ValueType == MVT::i16
1391 && Value <= 0xffff /* truncated from uint64_t */
1392 && ((short) Value >> 8) == ((short) Value & 0xff))
1393 return DAG.getTargetConstant(Value & 0xff, ValueType);
1394 else if (ValueType == MVT::i8
1395 && (Value & 0xff) == Value)
1396 return DAG.getTargetConstant(Value, ValueType);
1402 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 16-bit constant, and if so, return the
1405 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 uint64_t Value = CN->getZExtValue();
1409 if ((ValueType == MVT::i32
1410 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1411 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1412 return DAG.getTargetConstant(Value >> 16, ValueType);
1418 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1419 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1420 if (ConstantSDNode *CN = getVecImm(N)) {
1421 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1427 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1428 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1429 if (ConstantSDNode *CN = getVecImm(N)) {
1430 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1436 // If this is a vector of constants or undefs, get the bits. A bit in
1437 // UndefBits is set if the corresponding element of the vector is an
1438 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1439 // zero. Return true if this is not an array of constants, false if it is.
1441 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1442 uint64_t UndefBits[2]) {
1443 // Start with zero'd results.
1444 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1446 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1447 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1448 SDValue OpVal = BV->getOperand(i);
1450 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1451 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1453 uint64_t EltBits = 0;
1454 if (OpVal.getOpcode() == ISD::UNDEF) {
1455 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1456 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1458 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1459 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1460 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1461 const APFloat &apf = CN->getValueAPF();
1462 EltBits = (CN->getValueType(0) == MVT::f32
1463 ? FloatToBits(apf.convertToFloat())
1464 : DoubleToBits(apf.convertToDouble()));
1466 // Nonconstant element.
1470 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1473 //printf("%llx %llx %llx %llx\n",
1474 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1478 /// If this is a splat (repetition) of a value across the whole vector, return
1479 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1480 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1481 /// SplatSize = 1 byte.
1482 static bool isConstantSplat(const uint64_t Bits128[2],
1483 const uint64_t Undef128[2],
1485 uint64_t &SplatBits, uint64_t &SplatUndef,
1487 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1488 // the same as the lower 64-bits, ignoring undefs.
1489 uint64_t Bits64 = Bits128[0] | Bits128[1];
1490 uint64_t Undef64 = Undef128[0] & Undef128[1];
1491 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1492 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1493 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1494 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1496 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1497 if (MinSplatBits < 64) {
1499 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1501 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1502 if (MinSplatBits < 32) {
1504 // If the top 16-bits are different than the lower 16-bits, ignoring
1505 // undefs, we have an i32 splat.
1506 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1507 if (MinSplatBits < 16) {
1508 // If the top 8-bits are different than the lower 8-bits, ignoring
1509 // undefs, we have an i16 splat.
1510 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1511 == ((Bits16 >> 8) & ~Undef16)) {
1512 // Otherwise, we have an 8-bit splat.
1513 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1514 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1520 SplatUndef = Undef16;
1527 SplatUndef = Undef32;
1533 SplatBits = Bits128[0];
1534 SplatUndef = Undef128[0];
1540 return false; // Can't be a splat if two pieces don't match.
1543 // If this is a case we can't handle, return null and let the default
1544 // expansion code take care of it. If we CAN select this case, and if it
1545 // selects to a single instruction, return Op. Otherwise, if we can codegen
1546 // this case more efficiently than a constant pool load, lower it to the
1547 // sequence of ops that should be used.
1548 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1549 MVT VT = Op.getValueType();
1550 // If this is a vector of constants or undefs, get the bits. A bit in
1551 // UndefBits is set if the corresponding element of the vector is an
1552 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1554 uint64_t VectorBits[2];
1555 uint64_t UndefBits[2];
1556 uint64_t SplatBits, SplatUndef;
1558 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1559 || !isConstantSplat(VectorBits, UndefBits,
1560 VT.getVectorElementType().getSizeInBits(),
1561 SplatBits, SplatUndef, SplatSize))
1562 return SDValue(); // Not a constant vector, not a splat.
1564 switch (VT.getSimpleVT()) {
1567 uint32_t Value32 = SplatBits;
1568 assert(SplatSize == 4
1569 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1570 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1571 SDValue T = DAG.getConstant(Value32, MVT::i32);
1572 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1573 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1577 uint64_t f64val = SplatBits;
1578 assert(SplatSize == 8
1579 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1580 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1581 SDValue T = DAG.getConstant(f64val, MVT::i64);
1582 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1583 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1587 // 8-bit constants have to be expanded to 16-bits
1588 unsigned short Value16 = SplatBits | (SplatBits << 8);
1590 for (int i = 0; i < 8; ++i)
1591 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1592 return DAG.getNode(ISD::BIT_CONVERT, VT,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1596 unsigned short Value16;
1598 Value16 = (unsigned short) (SplatBits & 0xffff);
1600 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1601 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1603 for (int i = 0; i < 8; ++i) Ops[i] = T;
1604 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1607 unsigned int Value = SplatBits;
1608 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1609 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1612 uint64_t val = SplatBits;
1613 uint32_t upper = uint32_t(val >> 32);
1614 uint32_t lower = uint32_t(val);
1616 if (upper == lower) {
1617 // Magic constant that can be matched by IL, ILA, et. al.
1618 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1619 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1623 SmallVector<SDValue, 16> ShufBytes;
1625 bool upper_special, lower_special;
1627 // NOTE: This code creates common-case shuffle masks that can be easily
1628 // detected as common expressions. It is not attempting to create highly
1629 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1631 // Detect if the upper or lower half is a special shuffle mask pattern:
1632 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1633 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1635 // Create lower vector if not a special pattern
1636 if (!lower_special) {
1637 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1638 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1639 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1640 LO32C, LO32C, LO32C, LO32C));
1643 // Create upper vector if not a special pattern
1644 if (!upper_special) {
1645 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1646 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1647 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1648 HI32C, HI32C, HI32C, HI32C));
1651 // If either upper or lower are special, then the two input operands are
1652 // the same (basically, one of them is a "don't care")
1657 if (lower_special && upper_special) {
1658 // Unhappy situation... both upper and lower are special, so punt with
1659 // a target constant:
1660 SDValue Zero = DAG.getConstant(0, MVT::i32);
1661 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1665 for (int i = 0; i < 4; ++i) {
1667 for (int j = 0; j < 4; ++j) {
1669 bool process_upper, process_lower;
1671 process_upper = (upper_special && (i & 1) == 0);
1672 process_lower = (lower_special && (i & 1) == 1);
1674 if (process_upper || process_lower) {
1675 if ((process_upper && upper == 0)
1676 || (process_lower && lower == 0))
1678 else if ((process_upper && upper == 0xffffffff)
1679 || (process_lower && lower == 0xffffffff))
1681 else if ((process_upper && upper == 0x80000000)
1682 || (process_lower && lower == 0x80000000))
1683 val |= (j == 0 ? 0xe0 : 0x80);
1685 val |= i * 4 + j + ((i & 1) * 16);
1688 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1691 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1692 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1693 &ShufBytes[0], ShufBytes.size()));
1701 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1702 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1703 /// permutation vector, V3, is monotonically increasing with one "exception"
1704 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1705 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1706 /// In either case, the net result is going to eventually invoke SHUFB to
1707 /// permute/shuffle the bytes from V1 and V2.
1709 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1710 /// control word for byte/halfword/word insertion. This takes care of a single
1711 /// element move from V2 into V1.
1713 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1714 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1715 SDValue V1 = Op.getOperand(0);
1716 SDValue V2 = Op.getOperand(1);
1717 SDValue PermMask = Op.getOperand(2);
1719 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1721 // If we have a single element being moved from V1 to V2, this can be handled
1722 // using the C*[DX] compute mask instructions, but the vector elements have
1723 // to be monotonically increasing with one exception element.
1724 MVT VecVT = V1.getValueType();
1725 MVT EltVT = VecVT.getVectorElementType();
1726 unsigned EltsFromV2 = 0;
1728 unsigned V2EltIdx0 = 0;
1729 unsigned CurrElt = 0;
1730 unsigned MaxElts = VecVT.getVectorNumElements();
1731 unsigned PrevElt = 0;
1733 bool monotonic = true;
1736 if (EltVT == MVT::i8) {
1738 } else if (EltVT == MVT::i16) {
1740 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1742 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1745 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1747 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1748 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1749 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1752 if (SrcElt >= V2EltIdx0) {
1753 if (1 >= (++EltsFromV2)) {
1754 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1756 } else if (CurrElt != SrcElt) {
1764 if (PrevElt > 0 && SrcElt < MaxElts) {
1765 if ((PrevElt == SrcElt - 1)
1766 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1773 } else if (PrevElt == 0) {
1774 // First time through, need to keep track of previous element
1777 // This isn't a rotation, takes elements from vector 2
1784 if (EltsFromV2 == 1 && monotonic) {
1785 // Compute mask and shuffle
1786 MachineFunction &MF = DAG.getMachineFunction();
1787 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1788 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1789 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1790 // Initialize temporary register to 0
1791 SDValue InitTempReg =
1792 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1793 // Copy register's contents as index in SHUFFLE_MASK:
1794 SDValue ShufMaskOp =
1795 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1796 DAG.getTargetConstant(V2Elt, MVT::i32),
1797 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1798 // Use shuffle mask in SHUFB synthetic instruction:
1799 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1800 } else if (rotate) {
1801 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1803 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1804 V1, DAG.getConstant(rotamt, MVT::i16));
1806 // Convert the SHUFFLE_VECTOR mask's input element units to the
1808 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1810 SmallVector<SDValue, 16> ResultMask;
1811 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1813 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1816 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1818 for (unsigned j = 0; j < BytesPerElement; ++j) {
1819 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1824 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1825 &ResultMask[0], ResultMask.size());
1826 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1830 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1831 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1833 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1834 // For a constant, build the appropriate constant vector, which will
1835 // eventually simplify to a vector register load.
1837 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1838 SmallVector<SDValue, 16> ConstVecValues;
1842 // Create a constant vector:
1843 switch (Op.getValueType().getSimpleVT()) {
1844 default: assert(0 && "Unexpected constant value type in "
1845 "LowerSCALAR_TO_VECTOR");
1846 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1847 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1848 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1849 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1850 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1851 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1854 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1855 for (size_t j = 0; j < n_copies; ++j)
1856 ConstVecValues.push_back(CValue);
1858 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1859 &ConstVecValues[0], ConstVecValues.size());
1861 // Otherwise, copy the value from one register to another:
1862 switch (Op0.getValueType().getSimpleVT()) {
1863 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1870 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1877 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1878 MVT VT = Op.getValueType();
1879 SDValue N = Op.getOperand(0);
1880 SDValue Elt = Op.getOperand(1);
1883 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1884 // Constant argument:
1885 int EltNo = (int) C->getZExtValue();
1888 if (VT == MVT::i8 && EltNo >= 16)
1889 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1890 else if (VT == MVT::i16 && EltNo >= 8)
1891 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1892 else if (VT == MVT::i32 && EltNo >= 4)
1893 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1894 else if (VT == MVT::i64 && EltNo >= 2)
1895 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1897 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1898 // i32 and i64: Element 0 is the preferred slot
1899 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1902 // Need to generate shuffle mask and extract:
1903 int prefslot_begin = -1, prefslot_end = -1;
1904 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1906 switch (VT.getSimpleVT()) {
1908 assert(false && "Invalid value type!");
1910 prefslot_begin = prefslot_end = 3;
1914 prefslot_begin = 2; prefslot_end = 3;
1919 prefslot_begin = 0; prefslot_end = 3;
1924 prefslot_begin = 0; prefslot_end = 7;
1929 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1930 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1932 unsigned int ShufBytes[16];
1933 for (int i = 0; i < 16; ++i) {
1934 // zero fill uppper part of preferred slot, don't care about the
1936 unsigned int mask_val;
1937 if (i <= prefslot_end) {
1939 ((i < prefslot_begin)
1941 : elt_byte + (i - prefslot_begin));
1943 ShufBytes[i] = mask_val;
1945 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1948 SDValue ShufMask[4];
1949 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1950 unsigned bidx = i * 4;
1951 unsigned int bits = ((ShufBytes[bidx] << 24) |
1952 (ShufBytes[bidx+1] << 16) |
1953 (ShufBytes[bidx+2] << 8) |
1955 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1958 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1960 sizeof(ShufMask) / sizeof(ShufMask[0]));
1962 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1963 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1964 N, N, ShufMaskVec));
1966 // Variable index: Rotate the requested element into slot 0, then replicate
1967 // slot 0 across the vector
1968 MVT VecVT = N.getValueType();
1969 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1970 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1974 // Make life easier by making sure the index is zero-extended to i32
1975 if (Elt.getValueType() != MVT::i32)
1976 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
1978 // Scale the index to a bit/byte shift quantity
1980 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1981 unsigned scaleShift = scaleFactor.logBase2();
1984 if (scaleShift > 0) {
1985 // Scale the shift factor:
1986 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
1987 DAG.getConstant(scaleShift, MVT::i32));
1990 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
1992 // Replicate the bytes starting at byte 0 across the entire vector (for
1993 // consistency with the notion of a unified register set)
1996 switch (VT.getSimpleVT()) {
1998 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2002 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2003 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2008 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2009 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2015 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2016 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2022 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2023 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2024 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2025 loFactor, hiFactor);
2030 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2031 DAG.getNode(SPUISD::SHUFB, VecVT,
2032 vecShift, vecShift, replicate));
2038 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2039 SDValue VecOp = Op.getOperand(0);
2040 SDValue ValOp = Op.getOperand(1);
2041 SDValue IdxOp = Op.getOperand(2);
2042 MVT VT = Op.getValueType();
2044 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2045 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2047 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2048 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2049 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2050 DAG.getRegister(SPU::R1, PtrVT),
2051 DAG.getConstant(CN->getSExtValue(), PtrVT));
2052 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2055 DAG.getNode(SPUISD::SHUFB, VT,
2056 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2058 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2063 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2064 const TargetLowering &TLI)
2066 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2067 MVT ShiftVT = TLI.getShiftAmountTy();
2069 assert(Op.getValueType() == MVT::i8);
2072 assert(0 && "Unhandled i8 math operator");
2076 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2078 SDValue N1 = Op.getOperand(1);
2079 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2080 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2081 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2082 DAG.getNode(Opc, MVT::i16, N0, N1));
2087 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2089 SDValue N1 = Op.getOperand(1);
2090 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2091 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2092 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2093 DAG.getNode(Opc, MVT::i16, N0, N1));
2097 SDValue N1 = Op.getOperand(1);
2099 N0 = (N0.getOpcode() != ISD::Constant
2100 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2101 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2103 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2106 N1 = (N1.getOpcode() != ISD::Constant
2107 ? DAG.getNode(N1Opc, ShiftVT, N1)
2108 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2109 TLI.getShiftAmountTy()));
2111 DAG.getNode(ISD::OR, MVT::i16, N0,
2112 DAG.getNode(ISD::SHL, MVT::i16,
2113 N0, DAG.getConstant(8, MVT::i32)));
2114 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2115 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2119 SDValue N1 = Op.getOperand(1);
2121 N0 = (N0.getOpcode() != ISD::Constant
2122 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2123 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2125 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2128 N1 = (N1.getOpcode() != ISD::Constant
2129 ? DAG.getNode(N1Opc, ShiftVT, N1)
2130 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2131 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132 DAG.getNode(Opc, MVT::i16, N0, N1));
2135 SDValue N1 = Op.getOperand(1);
2137 N0 = (N0.getOpcode() != ISD::Constant
2138 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2139 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2141 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2144 N1 = (N1.getOpcode() != ISD::Constant
2145 ? DAG.getNode(N1Opc, ShiftVT, N1)
2146 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2148 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2149 DAG.getNode(Opc, MVT::i16, N0, N1));
2152 SDValue N1 = Op.getOperand(1);
2154 N0 = (N0.getOpcode() != ISD::Constant
2155 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2156 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2158 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2159 N1 = (N1.getOpcode() != ISD::Constant
2160 ? DAG.getNode(N1Opc, MVT::i16, N1)
2161 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2163 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2164 DAG.getNode(Opc, MVT::i16, N0, N1));
2172 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2174 MVT VT = Op.getValueType();
2175 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2177 SDValue Op0 = Op.getOperand(0);
2180 case ISD::ZERO_EXTEND:
2181 case ISD::ANY_EXTEND: {
2182 MVT Op0VT = Op0.getValueType();
2183 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2185 assert(Op0VT == MVT::i32
2186 && "CellSPU: Zero/sign extending something other than i32");
2188 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2190 SDValue PromoteScalar =
2191 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2193 // Use a shuffle to zero extend the i32 to i64 directly:
2194 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2195 DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
2196 MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
2197 0x08090a0b, MVT::i32));
2198 SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
2199 PromoteScalar, shufMask);
2201 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
2202 VecVT, zextShuffle));
2206 // Turn operands into vectors to satisfy type checking (shufb works on
2209 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2211 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2212 SmallVector<SDValue, 16> ShufBytes;
2214 // Create the shuffle mask for "rotating" the borrow up one register slot
2215 // once the borrow is generated.
2216 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2217 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2218 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2219 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2222 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2223 SDValue ShiftedCarry =
2224 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2226 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2227 &ShufBytes[0], ShufBytes.size()));
2229 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2230 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2231 Op0, Op1, ShiftedCarry));
2235 // Turn operands into vectors to satisfy type checking (shufb works on
2238 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2240 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2241 SmallVector<SDValue, 16> ShufBytes;
2243 // Create the shuffle mask for "rotating" the borrow up one register slot
2244 // once the borrow is generated.
2245 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2246 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2247 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2248 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2251 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2252 SDValue ShiftedBorrow =
2253 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2254 BorrowGen, BorrowGen,
2255 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2256 &ShufBytes[0], ShufBytes.size()));
2258 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2259 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2260 Op0, Op1, ShiftedBorrow));
2267 //! Lower byte immediate operations for v16i8 vectors:
2269 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2272 MVT VT = Op.getValueType();
2274 ConstVec = Op.getOperand(0);
2275 Arg = Op.getOperand(1);
2276 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2277 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2278 ConstVec = ConstVec.getOperand(0);
2280 ConstVec = Op.getOperand(1);
2281 Arg = Op.getOperand(0);
2282 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2283 ConstVec = ConstVec.getOperand(0);
2288 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2289 uint64_t VectorBits[2];
2290 uint64_t UndefBits[2];
2291 uint64_t SplatBits, SplatUndef;
2294 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2295 && isConstantSplat(VectorBits, UndefBits,
2296 VT.getVectorElementType().getSizeInBits(),
2297 SplatBits, SplatUndef, SplatSize)) {
2299 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2300 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2302 // Turn the BUILD_VECTOR into a set of target constants:
2303 for (size_t i = 0; i < tcVecSize; ++i)
2306 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2307 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2310 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2311 // lowered. Return the operation, rather than a null SDValue.
2315 //! Custom lowering for CTPOP (count population)
2317 Custom lowering code that counts the number ones in the input
2318 operand. SPU has such an instruction, but it counts the number of
2319 ones per byte, which then have to be accumulated.
2321 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2322 MVT VT = Op.getValueType();
2323 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2325 switch (VT.getSimpleVT()) {
2327 assert(false && "Invalid value type!");
2329 SDValue N = Op.getOperand(0);
2330 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2332 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2333 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2335 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2339 MachineFunction &MF = DAG.getMachineFunction();
2340 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2342 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2344 SDValue N = Op.getOperand(0);
2345 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2346 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2347 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2349 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2350 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2352 // CNTB_result becomes the chain to which all of the virtual registers
2353 // CNTB_reg, SUM1_reg become associated:
2354 SDValue CNTB_result =
2355 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2357 SDValue CNTB_rescopy =
2358 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2360 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2362 return DAG.getNode(ISD::AND, MVT::i16,
2363 DAG.getNode(ISD::ADD, MVT::i16,
2364 DAG.getNode(ISD::SRL, MVT::i16,
2371 MachineFunction &MF = DAG.getMachineFunction();
2372 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2374 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2375 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2377 SDValue N = Op.getOperand(0);
2378 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2379 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2380 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2381 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2383 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2384 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2386 // CNTB_result becomes the chain to which all of the virtual registers
2387 // CNTB_reg, SUM1_reg become associated:
2388 SDValue CNTB_result =
2389 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2391 SDValue CNTB_rescopy =
2392 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2395 DAG.getNode(ISD::SRL, MVT::i32,
2396 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2399 DAG.getNode(ISD::ADD, MVT::i32,
2400 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2402 SDValue Sum1_rescopy =
2403 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2406 DAG.getNode(ISD::SRL, MVT::i32,
2407 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2410 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2411 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2413 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2423 //! Lower ISD::SETCC
2425 Lower i64 condition code handling.
2428 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2429 MVT VT = Op.getValueType();
2430 SDValue lhs = Op.getOperand(0);
2431 SDValue rhs = Op.getOperand(1);
2432 SDValue condition = Op.getOperand(2);
2434 if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2435 // Expand the i64 comparisons to what Cell can actually support,
2436 // which is eq, ugt and sgt:
2438 CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2440 switch (ccvalue->get()) {
2449 //! Lower ISD::SELECT_CC
2451 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2454 \note Need to revisit this in the future: if the code path through the true
2455 and false value computations is longer than the latency of a branch (6
2456 cycles), then it would be more advantageous to branch and insert a new basic
2457 block and branch on the condition. However, this code does not make that
2458 assumption, given the simplisitc uses so far.
2461 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2462 const TargetLowering &TLI) {
2463 MVT VT = Op.getValueType();
2464 SDValue lhs = Op.getOperand(0);
2465 SDValue rhs = Op.getOperand(1);
2466 SDValue trueval = Op.getOperand(2);
2467 SDValue falseval = Op.getOperand(3);
2468 SDValue condition = Op.getOperand(4);
2470 // NOTE: SELB's arguments: $rA, $rB, $mask
2472 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2473 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2474 // condition was true and 0s where the condition was false. Hence, the
2475 // arguments to SELB get reversed.
2477 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2478 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2479 // with another "cannot select select_cc" assert:
2481 SDValue compare = DAG.getNode(ISD::SETCC,
2482 TLI.getSetCCResultType(Op.getValueType()),
2483 lhs, rhs, condition);
2484 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2487 //! Custom lower ISD::TRUNCATE
2488 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2490 MVT VT = Op.getValueType();
2491 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2492 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2494 SDValue Op0 = Op.getOperand(0);
2495 MVT Op0VT = Op0.getValueType();
2496 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2498 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2499 // Create shuffle mask, least significant doubleword of quadword
2500 unsigned maskHigh = 0x08090a0b;
2501 unsigned maskLow = 0x0c0d0e0f;
2502 // Use a shuffle to perform the truncation
2503 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2504 DAG.getConstant(maskHigh, MVT::i32),
2505 DAG.getConstant(maskLow, MVT::i32),
2506 DAG.getConstant(maskHigh, MVT::i32),
2507 DAG.getConstant(maskLow, MVT::i32));
2510 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2512 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2513 PromoteScalar, PromoteScalar, shufMask);
2515 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2516 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2519 return SDValue(); // Leave the truncate unmolested
2522 //! Custom (target-specific) lowering entry point
2524 This is where LLVM's DAG selection process calls to do target-specific
2528 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2530 unsigned Opc = (unsigned) Op.getOpcode();
2531 MVT VT = Op.getValueType();
2535 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2536 cerr << "Op.getOpcode() = " << Opc << "\n";
2537 cerr << "*Op.getNode():\n";
2538 Op.getNode()->dump();
2545 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2547 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2548 case ISD::ConstantPool:
2549 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2550 case ISD::GlobalAddress:
2551 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2552 case ISD::JumpTable:
2553 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2555 return LowerConstant(Op, DAG);
2556 case ISD::ConstantFP:
2557 return LowerConstantFP(Op, DAG);
2558 case ISD::FORMAL_ARGUMENTS:
2559 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2561 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2563 return LowerRET(Op, DAG, getTargetMachine());
2566 case ISD::ZERO_EXTEND:
2567 case ISD::ANY_EXTEND:
2568 return LowerI64Math(Op, DAG, Opc);
2570 // i8, i64 math ops:
2579 return LowerI8Math(Op, DAG, Opc, *this);
2580 else if (VT == MVT::i64)
2581 return LowerI64Math(Op, DAG, Opc);
2585 // Vector-related lowering.
2586 case ISD::BUILD_VECTOR:
2587 return LowerBUILD_VECTOR(Op, DAG);
2588 case ISD::SCALAR_TO_VECTOR:
2589 return LowerSCALAR_TO_VECTOR(Op, DAG);
2590 case ISD::VECTOR_SHUFFLE:
2591 return LowerVECTOR_SHUFFLE(Op, DAG);
2592 case ISD::EXTRACT_VECTOR_ELT:
2593 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2594 case ISD::INSERT_VECTOR_ELT:
2595 return LowerINSERT_VECTOR_ELT(Op, DAG);
2597 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2601 return LowerByteImmed(Op, DAG);
2603 // Vector and i8 multiply:
2606 return LowerI8Math(Op, DAG, Opc, *this);
2609 return LowerCTPOP(Op, DAG);
2611 case ISD::SELECT_CC:
2612 return LowerSELECT_CC(Op, DAG, *this);
2615 return LowerTRUNCATE(Op, DAG);
2618 return LowerSETCC(Op, DAG);
2624 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2625 SmallVectorImpl<SDValue>&Results,
2629 unsigned Opc = (unsigned) N->getOpcode();
2630 MVT OpVT = N->getValueType(0);
2634 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2635 cerr << "Op.getOpcode() = " << Opc << "\n";
2636 cerr << "*Op.getNode():\n";
2644 /* Otherwise, return unchanged */
2647 //===----------------------------------------------------------------------===//
2648 // Target Optimization Hooks
2649 //===----------------------------------------------------------------------===//
2652 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2655 TargetMachine &TM = getTargetMachine();
2657 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2658 SelectionDAG &DAG = DCI.DAG;
2659 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2660 MVT NodeVT = N->getValueType(0); // The node's value type
2661 MVT Op0VT = Op0.getValueType(); // The first operand's result
2662 SDValue Result; // Initially, empty result
2664 switch (N->getOpcode()) {
2667 SDValue Op1 = N->getOperand(1);
2669 if (Op0.getOpcode() == SPUISD::IndirectAddr
2670 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2671 // Normalize the operands to reduce repeated code
2672 SDValue IndirectArg = Op0, AddArg = Op1;
2674 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2679 if (isa<ConstantSDNode>(AddArg)) {
2680 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2681 SDValue IndOp1 = IndirectArg.getOperand(1);
2683 if (CN0->isNullValue()) {
2684 // (add (SPUindirect <arg>, <arg>), 0) ->
2685 // (SPUindirect <arg>, <arg>)
2687 #if !defined(NDEBUG)
2688 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2690 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2691 << "With: (SPUindirect <arg>, <arg>)\n";
2696 } else if (isa<ConstantSDNode>(IndOp1)) {
2697 // (add (SPUindirect <arg>, <const>), <const>) ->
2698 // (SPUindirect <arg>, <const + const>)
2699 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2700 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2701 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2703 #if !defined(NDEBUG)
2704 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2706 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2707 << "), " << CN0->getSExtValue() << ")\n"
2708 << "With: (SPUindirect <arg>, "
2709 << combinedConst << ")\n";
2713 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2714 IndirectArg, combinedValue);
2720 case ISD::SIGN_EXTEND:
2721 case ISD::ZERO_EXTEND:
2722 case ISD::ANY_EXTEND: {
2723 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2724 // (any_extend (SPUextract_elt0 <arg>)) ->
2725 // (SPUextract_elt0 <arg>)
2726 // Types must match, however...
2727 #if !defined(NDEBUG)
2728 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2729 cerr << "\nReplace: ";
2732 Op0.getNode()->dump(&DAG);
2741 case SPUISD::IndirectAddr: {
2742 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2743 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2744 if (CN->getZExtValue() == 0) {
2745 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2746 // (SPUaform <addr>, 0)
2748 DEBUG(cerr << "Replace: ");
2749 DEBUG(N->dump(&DAG));
2750 DEBUG(cerr << "\nWith: ");
2751 DEBUG(Op0.getNode()->dump(&DAG));
2752 DEBUG(cerr << "\n");
2756 } else if (Op0.getOpcode() == ISD::ADD) {
2757 SDValue Op1 = N->getOperand(1);
2758 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2759 // (SPUindirect (add <arg>, <arg>), 0) ->
2760 // (SPUindirect <arg>, <arg>)
2761 if (CN1->isNullValue()) {
2763 #if !defined(NDEBUG)
2764 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2766 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2767 << "With: (SPUindirect <arg>, <arg>)\n";
2771 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2772 Op0.getOperand(0), Op0.getOperand(1));
2778 case SPUISD::SHLQUAD_L_BITS:
2779 case SPUISD::SHLQUAD_L_BYTES:
2780 case SPUISD::VEC_SHL:
2781 case SPUISD::VEC_SRL:
2782 case SPUISD::VEC_SRA:
2783 case SPUISD::ROTBYTES_LEFT: {
2784 SDValue Op1 = N->getOperand(1);
2786 // Kill degenerate vector shifts:
2787 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2788 if (CN->isNullValue()) {
2794 case SPUISD::PREFSLOT2VEC: {
2795 switch (Op0.getOpcode()) {
2798 case ISD::ANY_EXTEND:
2799 case ISD::ZERO_EXTEND:
2800 case ISD::SIGN_EXTEND: {
2801 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2803 // but only if the SPUprefslot2vec and <arg> types match.
2804 SDValue Op00 = Op0.getOperand(0);
2805 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2806 SDValue Op000 = Op00.getOperand(0);
2807 if (Op000.getValueType() == NodeVT) {
2813 case SPUISD::VEC2PREFSLOT: {
2814 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2816 Result = Op0.getOperand(0);
2823 // Otherwise, return unchanged.
2825 if (Result.getNode()) {
2826 DEBUG(cerr << "\nReplace.SPU: ");
2827 DEBUG(N->dump(&DAG));
2828 DEBUG(cerr << "\nWith: ");
2829 DEBUG(Result.getNode()->dump(&DAG));
2830 DEBUG(cerr << "\n");
2837 //===----------------------------------------------------------------------===//
2838 // Inline Assembly Support
2839 //===----------------------------------------------------------------------===//
2841 /// getConstraintType - Given a constraint letter, return the type of
2842 /// constraint it is for this target.
2843 SPUTargetLowering::ConstraintType
2844 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2845 if (ConstraintLetter.size() == 1) {
2846 switch (ConstraintLetter[0]) {
2853 return C_RegisterClass;
2856 return TargetLowering::getConstraintType(ConstraintLetter);
2859 std::pair<unsigned, const TargetRegisterClass*>
2860 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2863 if (Constraint.size() == 1) {
2864 // GCC RS6000 Constraint Letters
2865 switch (Constraint[0]) {
2869 return std::make_pair(0U, SPU::R64CRegisterClass);
2870 return std::make_pair(0U, SPU::R32CRegisterClass);
2873 return std::make_pair(0U, SPU::R32FPRegisterClass);
2874 else if (VT == MVT::f64)
2875 return std::make_pair(0U, SPU::R64FPRegisterClass);
2878 return std::make_pair(0U, SPU::GPRCRegisterClass);
2882 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2885 //! Compute used/known bits for a SPU operand
2887 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2891 const SelectionDAG &DAG,
2892 unsigned Depth ) const {
2894 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2897 switch (Op.getOpcode()) {
2899 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2909 case SPUISD::PREFSLOT2VEC: {
2910 SDValue Op0 = Op.getOperand(0);
2911 MVT Op0VT = Op0.getValueType();
2912 unsigned Op0VTBits = Op0VT.getSizeInBits();
2913 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2914 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2915 KnownOne |= APInt(Op0VTBits, InMask, false);
2919 case SPUISD::LDRESULT:
2920 case SPUISD::VEC2PREFSLOT: {
2921 MVT OpVT = Op.getValueType();
2922 unsigned OpVTBits = OpVT.getSizeInBits();
2923 uint64_t InMask = OpVT.getIntegerVTBitMask();
2924 KnownZero |= APInt(OpVTBits, ~InMask, false);
2925 KnownOne |= APInt(OpVTBits, InMask, false);
2930 case SPUISD::SHLQUAD_L_BITS:
2931 case SPUISD::SHLQUAD_L_BYTES:
2932 case SPUISD::VEC_SHL:
2933 case SPUISD::VEC_SRL:
2934 case SPUISD::VEC_SRA:
2935 case SPUISD::VEC_ROTL:
2936 case SPUISD::VEC_ROTR:
2937 case SPUISD::ROTBYTES_LEFT:
2938 case SPUISD::SELECT_MASK:
2940 case SPUISD::SEXT32TO64:
2946 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2947 unsigned Depth) const {
2948 switch (Op.getOpcode()) {
2953 MVT VT = Op.getValueType();
2955 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2958 return VT.getSizeInBits();
2963 // LowerAsmOperandForConstraint
2965 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2966 char ConstraintLetter,
2968 std::vector<SDValue> &Ops,
2969 SelectionDAG &DAG) const {
2970 // Default, for the time being, to the base class handler
2971 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2975 /// isLegalAddressImmediate - Return true if the integer value can be used
2976 /// as the offset of the target addressing mode.
2977 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2978 const Type *Ty) const {
2979 // SPU's addresses are 256K:
2980 return (V > -(1 << 18) && V < (1 << 18) - 1);
2983 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2988 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2989 // The SPU target isn't yet aware of offsets.