1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
83 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
87 // Fold away setcc operations if possible.
90 // Use _setjmp/_longjmp instead of setjmp/longjmp.
91 setUseUnderscoreSetJmp(true);
92 setUseUnderscoreLongJmp(true);
94 // Set up the SPU's register classes:
95 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
96 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
97 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
98 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
99 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
100 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
101 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
103 // SPU has no sign or zero extended loads for i1, i8, i16:
104 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
105 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
108 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
109 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
111 // SPU constant load actions are custom lowered:
112 setOperationAction(ISD::Constant, MVT::i64, Custom);
113 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
114 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
116 // SPU's loads and stores have to be custom lowered:
117 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
119 MVT VT = (MVT::SimpleValueType)sctype;
121 setOperationAction(ISD::LOAD, VT, Custom);
122 setOperationAction(ISD::STORE, VT, Custom);
123 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
125 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
127 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
128 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
129 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
131 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
132 MVT StoreVT = (MVT::SimpleValueType) stype;
133 setTruncStoreAction(VT, StoreVT, Expand);
137 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
139 MVT VT = (MVT::SimpleValueType) sctype;
141 setOperationAction(ISD::LOAD, VT, Custom);
142 setOperationAction(ISD::STORE, VT, Custom);
144 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
145 MVT StoreVT = (MVT::SimpleValueType) stype;
146 setTruncStoreAction(VT, StoreVT, Expand);
150 // Expand the jumptable branches
151 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
152 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
154 // Custom lower SELECT_CC for most cases, but expand by default
155 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
156 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
157 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
158 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
159 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
161 // SPU has no intrinsics for these particular operations:
162 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
164 // SPU has no SREM/UREM instructions
165 setOperationAction(ISD::SREM, MVT::i32, Expand);
166 setOperationAction(ISD::UREM, MVT::i32, Expand);
167 setOperationAction(ISD::SREM, MVT::i64, Expand);
168 setOperationAction(ISD::UREM, MVT::i64, Expand);
170 // We don't support sin/cos/sqrt/fmod
171 setOperationAction(ISD::FSIN , MVT::f64, Expand);
172 setOperationAction(ISD::FCOS , MVT::f64, Expand);
173 setOperationAction(ISD::FREM , MVT::f64, Expand);
174 setOperationAction(ISD::FSIN , MVT::f32, Expand);
175 setOperationAction(ISD::FCOS , MVT::f32, Expand);
176 setOperationAction(ISD::FREM , MVT::f32, Expand);
178 // If we're enabling GP optimizations, use hardware square root
179 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
180 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
182 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
183 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
185 // SPU can do rotate right and left, so legalize it... but customize for i8
186 // because instructions don't exist.
188 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
190 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
191 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
192 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
194 setOperationAction(ISD::ROTL, MVT::i32, Legal);
195 setOperationAction(ISD::ROTL, MVT::i16, Legal);
196 setOperationAction(ISD::ROTL, MVT::i8, Custom);
198 // SPU has no native version of shift left/right for i8
199 setOperationAction(ISD::SHL, MVT::i8, Custom);
200 setOperationAction(ISD::SRL, MVT::i8, Custom);
201 setOperationAction(ISD::SRA, MVT::i8, Custom);
203 // Make these operations legal and handle them during instruction selection:
204 setOperationAction(ISD::SHL, MVT::i64, Legal);
205 setOperationAction(ISD::SRL, MVT::i64, Legal);
206 setOperationAction(ISD::SRA, MVT::i64, Legal);
208 // Custom lower i8, i32 and i64 multiplications
209 setOperationAction(ISD::MUL, MVT::i8, Custom);
210 setOperationAction(ISD::MUL, MVT::i32, Legal);
211 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
213 // Need to custom handle (some) common i8, i64 math ops
214 setOperationAction(ISD::ADD, MVT::i8, Custom);
215 setOperationAction(ISD::ADD, MVT::i64, Custom);
216 setOperationAction(ISD::SUB, MVT::i8, Custom);
217 setOperationAction(ISD::SUB, MVT::i64, Custom);
219 // SPU does not have BSWAP. It does have i32 support CTLZ.
220 // CTPOP has to be custom lowered.
221 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
222 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
224 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
225 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
226 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
227 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
229 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
230 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
232 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
234 // SPU has a version of select that implements (a&~c)|(b&c), just like
235 // select ought to work:
236 setOperationAction(ISD::SELECT, MVT::i8, Legal);
237 setOperationAction(ISD::SELECT, MVT::i16, Legal);
238 setOperationAction(ISD::SELECT, MVT::i32, Legal);
239 setOperationAction(ISD::SELECT, MVT::i64, Legal);
241 setOperationAction(ISD::SETCC, MVT::i8, Legal);
242 setOperationAction(ISD::SETCC, MVT::i16, Legal);
243 setOperationAction(ISD::SETCC, MVT::i32, Legal);
244 setOperationAction(ISD::SETCC, MVT::i64, Legal);
246 // Zero extension and sign extension for i64 have to be
248 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
249 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
251 // Custom lower i128 -> i64 truncates
252 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
254 // SPU has a legal FP -> signed INT instruction
255 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
256 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
257 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
258 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
260 // FDIV on SPU requires custom lowering
261 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
263 // SPU has [U|S]INT_TO_FP
264 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
265 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
266 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
267 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
268 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
269 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
273 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
274 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
275 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
276 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
278 // We cannot sextinreg(i1). Expand to shifts.
279 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
281 // Support label based line numbers.
282 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
283 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
285 // We want to legalize GlobalAddress and ConstantPool nodes into the
286 // appropriate instructions to materialize the address.
287 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
289 MVT VT = (MVT::SimpleValueType)sctype;
291 setOperationAction(ISD::GlobalAddress, VT, Custom);
292 setOperationAction(ISD::ConstantPool, VT, Custom);
293 setOperationAction(ISD::JumpTable, VT, Custom);
296 // RET must be custom lowered, to meet ABI requirements
297 setOperationAction(ISD::RET, MVT::Other, Custom);
299 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
300 setOperationAction(ISD::VASTART , MVT::Other, Custom);
302 // Use the default implementation.
303 setOperationAction(ISD::VAARG , MVT::Other, Expand);
304 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
305 setOperationAction(ISD::VAEND , MVT::Other, Expand);
306 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
307 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
308 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
311 // Cell SPU has instructions for converting between i64 and fp.
312 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
313 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
315 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
316 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
318 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
319 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
321 // First set operation action for all vector types to expand. Then we
322 // will selectively turn on ones that can be effectively codegen'd.
323 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
324 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
325 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
326 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
327 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
330 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
331 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
332 MVT VT = (MVT::SimpleValueType)i;
334 // add/sub are legal for all supported vector VT's.
335 setOperationAction(ISD::ADD , VT, Legal);
336 setOperationAction(ISD::SUB , VT, Legal);
337 // mul has to be custom lowered.
338 // TODO: v2i64 vector multiply
339 setOperationAction(ISD::MUL , VT, Legal);
341 setOperationAction(ISD::AND , VT, Legal);
342 setOperationAction(ISD::OR , VT, Legal);
343 setOperationAction(ISD::XOR , VT, Legal);
344 setOperationAction(ISD::LOAD , VT, Legal);
345 setOperationAction(ISD::SELECT, VT, Legal);
346 setOperationAction(ISD::STORE, VT, Legal);
348 // These operations need to be expanded:
349 setOperationAction(ISD::SDIV, VT, Expand);
350 setOperationAction(ISD::SREM, VT, Expand);
351 setOperationAction(ISD::UDIV, VT, Expand);
352 setOperationAction(ISD::UREM, VT, Expand);
354 // Custom lower build_vector, constant pool spills, insert and
355 // extract vector elements:
356 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
357 setOperationAction(ISD::ConstantPool, VT, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
359 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
360 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
361 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
364 setOperationAction(ISD::AND, MVT::v16i8, Custom);
365 setOperationAction(ISD::OR, MVT::v16i8, Custom);
366 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
367 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
369 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
371 setShiftAmountType(MVT::i32);
372 setBooleanContents(ZeroOrNegativeOneBooleanContent);
374 setStackPointerRegisterToSaveRestore(SPU::R1);
376 // We have target-specific dag combine patterns for the following nodes:
377 setTargetDAGCombine(ISD::ADD);
378 setTargetDAGCombine(ISD::ZERO_EXTEND);
379 setTargetDAGCombine(ISD::SIGN_EXTEND);
380 setTargetDAGCombine(ISD::ANY_EXTEND);
382 computeRegisterProperties();
384 // Set pre-RA register scheduler default to BURR, which produces slightly
385 // better code than the default (could also be TDRR, but TargetLowering.h
386 // needs a mod to support that model):
387 setSchedulingPreference(SchedulingForRegPressure);
391 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
393 if (node_names.empty()) {
394 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
395 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
396 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
397 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
398 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
399 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
400 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
401 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
402 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
403 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
404 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
405 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
406 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
407 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
408 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
409 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
410 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
411 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
412 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
413 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
414 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
415 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
416 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
417 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
418 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
419 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
420 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
423 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
425 return ((i != node_names.end()) ? i->second : 0);
428 //===----------------------------------------------------------------------===//
429 // Return the Cell SPU's SETCC result type
430 //===----------------------------------------------------------------------===//
432 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
433 // i16 and i32 are valid SETCC result types
434 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
437 //===----------------------------------------------------------------------===//
438 // Calling convention code:
439 //===----------------------------------------------------------------------===//
441 #include "SPUGenCallingConv.inc"
443 //===----------------------------------------------------------------------===//
444 // LowerOperation implementation
445 //===----------------------------------------------------------------------===//
447 /// Custom lower loads for CellSPU
449 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
450 within a 16-byte block, we have to rotate to extract the requested element.
452 For extending loads, we also want to ensure that the following sequence is
453 emitted, e.g. for MVT::f32 extending load to MVT::f64:
457 %2 v16i8,ch = rotate %1
458 %3 v4f8, ch = bitconvert %2
459 %4 f32 = vec2perfslot %3
460 %5 f64 = fp_extend %4
464 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
465 LoadSDNode *LN = cast<LoadSDNode>(Op);
466 SDValue the_chain = LN->getChain();
467 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
468 MVT InVT = LN->getMemoryVT();
469 MVT OutVT = Op.getValueType();
470 ISD::LoadExtType ExtType = LN->getExtensionType();
471 unsigned alignment = LN->getAlignment();
472 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
474 switch (LN->getAddressingMode()) {
475 case ISD::UNINDEXED: {
477 SDValue basePtr = LN->getBasePtr();
480 if (alignment == 16) {
483 // Special cases for a known aligned load to simplify the base pointer
484 // and the rotation amount:
485 if (basePtr.getOpcode() == ISD::ADD
486 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
487 // Known offset into basePtr
488 int64_t offset = CN->getSExtValue();
489 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
494 rotate = DAG.getConstant(rotamt, MVT::i16);
496 // Simplify the base pointer for this case:
497 basePtr = basePtr.getOperand(0);
498 if ((offset & ~0xf) > 0) {
499 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
501 DAG.getConstant((offset & ~0xf), PtrVT));
503 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
504 || (basePtr.getOpcode() == SPUISD::IndirectAddr
505 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
506 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
507 // Plain aligned a-form address: rotate into preferred slot
508 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
509 int64_t rotamt = -vtm->prefslot_byte;
512 rotate = DAG.getConstant(rotamt, MVT::i16);
514 // Offset the rotate amount by the basePtr and the preferred slot
516 int64_t rotamt = -vtm->prefslot_byte;
519 rotate = DAG.getNode(ISD::ADD, PtrVT,
521 DAG.getConstant(rotamt, PtrVT));
524 // Unaligned load: must be more pessimistic about addressing modes:
525 if (basePtr.getOpcode() == ISD::ADD) {
526 MachineFunction &MF = DAG.getMachineFunction();
527 MachineRegisterInfo &RegInfo = MF.getRegInfo();
528 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
531 SDValue Op0 = basePtr.getOperand(0);
532 SDValue Op1 = basePtr.getOperand(1);
534 if (isa<ConstantSDNode>(Op1)) {
535 // Convert the (add <ptr>, <const>) to an indirect address contained
536 // in a register. Note that this is done because we need to avoid
537 // creating a 0(reg) d-form address due to the SPU's block loads.
538 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
539 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
540 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
542 // Convert the (add <arg1>, <arg2>) to an indirect address, which
543 // will likely be lowered as a reg(reg) x-form address.
544 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
547 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
549 DAG.getConstant(0, PtrVT));
552 // Offset the rotate amount by the basePtr and the preferred slot
554 rotate = DAG.getNode(ISD::ADD, PtrVT,
556 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
559 // Re-emit as a v16i8 vector load
560 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
561 LN->getSrcValue(), LN->getSrcValueOffset(),
562 LN->isVolatile(), 16);
565 the_chain = result.getValue(1);
567 // Rotate into the preferred slot:
568 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
569 result.getValue(0), rotate);
571 // Convert the loaded v16i8 vector to the appropriate vector type
572 // specified by the operand:
573 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
574 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
575 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
577 // Handle extending loads by extending the scalar result:
578 if (ExtType == ISD::SEXTLOAD) {
579 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
580 } else if (ExtType == ISD::ZEXTLOAD) {
581 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
582 } else if (ExtType == ISD::EXTLOAD) {
583 unsigned NewOpc = ISD::ANY_EXTEND;
585 if (OutVT.isFloatingPoint())
586 NewOpc = ISD::FP_EXTEND;
588 result = DAG.getNode(NewOpc, OutVT, result);
591 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
592 SDValue retops[2] = {
597 result = DAG.getNode(SPUISD::LDRESULT, retvts,
598 retops, sizeof(retops) / sizeof(retops[0]));
605 case ISD::LAST_INDEXED_MODE:
606 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
608 cerr << (unsigned) LN->getAddressingMode() << "\n";
616 /// Custom lower stores for CellSPU
618 All CellSPU stores are aligned to 16-byte boundaries, so for elements
619 within a 16-byte block, we have to generate a shuffle to insert the
620 requested element into its place, then store the resulting block.
623 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
624 StoreSDNode *SN = cast<StoreSDNode>(Op);
625 SDValue Value = SN->getValue();
626 MVT VT = Value.getValueType();
627 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
628 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
629 unsigned alignment = SN->getAlignment();
631 switch (SN->getAddressingMode()) {
632 case ISD::UNINDEXED: {
633 // The vector type we really want to load from the 16-byte chunk.
634 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
635 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
637 SDValue alignLoadVec;
638 SDValue basePtr = SN->getBasePtr();
639 SDValue the_chain = SN->getChain();
640 SDValue insertEltOffs;
642 if (alignment == 16) {
645 // Special cases for a known aligned load to simplify the base pointer
646 // and insertion byte:
647 if (basePtr.getOpcode() == ISD::ADD
648 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
649 // Known offset into basePtr
650 int64_t offset = CN->getSExtValue();
652 // Simplify the base pointer for this case:
653 basePtr = basePtr.getOperand(0);
654 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
656 DAG.getConstant((offset & 0xf), PtrVT));
658 if ((offset & ~0xf) > 0) {
659 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
661 DAG.getConstant((offset & ~0xf), PtrVT));
664 // Otherwise, assume it's at byte 0 of basePtr
665 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
667 DAG.getConstant(0, PtrVT));
670 // Unaligned load: must be more pessimistic about addressing modes:
671 if (basePtr.getOpcode() == ISD::ADD) {
672 MachineFunction &MF = DAG.getMachineFunction();
673 MachineRegisterInfo &RegInfo = MF.getRegInfo();
674 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
677 SDValue Op0 = basePtr.getOperand(0);
678 SDValue Op1 = basePtr.getOperand(1);
680 if (isa<ConstantSDNode>(Op1)) {
681 // Convert the (add <ptr>, <const>) to an indirect address contained
682 // in a register. Note that this is done because we need to avoid
683 // creating a 0(reg) d-form address due to the SPU's block loads.
684 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
685 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
686 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
688 // Convert the (add <arg1>, <arg2>) to an indirect address, which
689 // will likely be lowered as a reg(reg) x-form address.
690 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
693 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
695 DAG.getConstant(0, PtrVT));
698 // Insertion point is solely determined by basePtr's contents
699 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
701 DAG.getConstant(0, PtrVT));
704 // Re-emit as a v16i8 vector load
705 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
706 SN->getSrcValue(), SN->getSrcValueOffset(),
707 SN->isVolatile(), 16);
710 the_chain = alignLoadVec.getValue(1);
712 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
713 SDValue theValue = SN->getValue();
717 && (theValue.getOpcode() == ISD::AssertZext
718 || theValue.getOpcode() == ISD::AssertSext)) {
719 // Drill down and get the value for zero- and sign-extended
721 theValue = theValue.getOperand(0);
724 // If the base pointer is already a D-form address, then just create
725 // a new D-form address with a slot offset and the orignal base pointer.
726 // Otherwise generate a D-form address with the slot offset relative
727 // to the stack pointer, which is always aligned.
729 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
730 cerr << "CellSPU LowerSTORE: basePtr = ";
731 basePtr.getNode()->dump(&DAG);
736 SDValue insertEltOp =
737 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
738 SDValue vectorizeOp =
739 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
741 result = DAG.getNode(SPUISD::SHUFB, vecVT,
742 vectorizeOp, alignLoadVec,
743 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
745 result = DAG.getStore(the_chain, result, basePtr,
746 LN->getSrcValue(), LN->getSrcValueOffset(),
747 LN->isVolatile(), LN->getAlignment());
749 #if 0 && !defined(NDEBUG)
750 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
751 const SDValue ¤tRoot = DAG.getRoot();
754 cerr << "------- CellSPU:LowerStore result:\n";
757 DAG.setRoot(currentRoot);
768 case ISD::LAST_INDEXED_MODE:
769 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
771 cerr << (unsigned) SN->getAddressingMode() << "\n";
779 /// Generate the address of a constant pool entry.
781 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
782 MVT PtrVT = Op.getValueType();
783 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
784 Constant *C = CP->getConstVal();
785 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
786 SDValue Zero = DAG.getConstant(0, PtrVT);
787 const TargetMachine &TM = DAG.getTarget();
789 if (TM.getRelocationModel() == Reloc::Static) {
790 if (!ST->usingLargeMem()) {
791 // Just return the SDValue with the constant pool address in it.
792 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
794 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
795 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
796 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
801 "LowerConstantPool: Relocation model other than static"
807 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
808 MVT PtrVT = Op.getValueType();
809 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
810 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
811 SDValue Zero = DAG.getConstant(0, PtrVT);
812 const TargetMachine &TM = DAG.getTarget();
814 if (TM.getRelocationModel() == Reloc::Static) {
815 if (!ST->usingLargeMem()) {
816 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
818 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
819 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
820 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
825 "LowerJumpTable: Relocation model other than static not supported.");
830 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
831 MVT PtrVT = Op.getValueType();
832 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
833 GlobalValue *GV = GSDN->getGlobal();
834 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
835 const TargetMachine &TM = DAG.getTarget();
836 SDValue Zero = DAG.getConstant(0, PtrVT);
838 if (TM.getRelocationModel() == Reloc::Static) {
839 if (!ST->usingLargeMem()) {
840 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
842 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
843 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
844 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
847 cerr << "LowerGlobalAddress: Relocation model other than static not "
856 //! Custom lower i64 integer constants
858 This code inserts all of the necessary juggling that needs to occur to load
859 a 64-bit constant into a register.
862 LowerConstant(SDValue Op, SelectionDAG &DAG) {
863 MVT VT = Op.getValueType();
865 if (VT == MVT::i64) {
866 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
867 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
868 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
869 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
871 cerr << "LowerConstant: unhandled constant type "
881 //! Custom lower double precision floating point constants
883 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
884 MVT VT = Op.getValueType();
886 if (VT == MVT::f64) {
887 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
890 "LowerConstantFP: Node is not ConstantFPSDNode");
892 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
893 SDValue T = DAG.getConstant(dbits, MVT::i64);
894 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
895 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
896 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
903 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
905 MachineFunction &MF = DAG.getMachineFunction();
906 MachineFrameInfo *MFI = MF.getFrameInfo();
907 MachineRegisterInfo &RegInfo = MF.getRegInfo();
908 SmallVector<SDValue, 48> ArgValues;
909 SDValue Root = Op.getOperand(0);
910 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
912 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
913 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
915 unsigned ArgOffset = SPUFrameInfo::minStackSize();
916 unsigned ArgRegIdx = 0;
917 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
919 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
921 // Add DAG nodes to load the arguments or copy them out of registers.
922 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
923 ArgNo != e; ++ArgNo) {
924 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
925 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
928 if (ArgRegIdx < NumArgRegs) {
929 const TargetRegisterClass *ArgRegClass;
931 switch (ObjectVT.getSimpleVT()) {
933 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
934 << ObjectVT.getMVTString()
939 ArgRegClass = &SPU::R8CRegClass;
942 ArgRegClass = &SPU::R16CRegClass;
945 ArgRegClass = &SPU::R32CRegClass;
948 ArgRegClass = &SPU::R64CRegClass;
951 ArgRegClass = &SPU::GPRCRegClass;
954 ArgRegClass = &SPU::R32FPRegClass;
957 ArgRegClass = &SPU::R64FPRegClass;
965 ArgRegClass = &SPU::VECREGRegClass;
969 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
974 // We need to load the argument to a virtual register if we determined
975 // above that we ran out of physical registers of the appropriate type
976 // or we're forced to do vararg
977 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
978 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
979 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
980 ArgOffset += StackSlotSize;
983 ArgValues.push_back(ArgVal);
985 Root = ArgVal.getOperand(0);
990 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
991 // We will spill (79-3)+1 registers to the stack
992 SmallVector<SDValue, 79-3+1> MemOps;
994 // Create the frame slot
996 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
997 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
998 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
999 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1000 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1001 Root = Store.getOperand(0);
1002 MemOps.push_back(Store);
1004 // Increment address by stack slot size for the next stored argument
1005 ArgOffset += StackSlotSize;
1007 if (!MemOps.empty())
1008 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1011 ArgValues.push_back(Root);
1013 // Return the new list of results.
1014 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1015 &ArgValues[0], ArgValues.size());
1018 /// isLSAAddress - Return the immediate to use if the specified
1019 /// value is representable as a LSA address.
1020 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1021 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1024 int Addr = C->getZExtValue();
1025 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1026 (Addr << 14 >> 14) != Addr)
1027 return 0; // Top 14 bits have to be sext of immediate.
1029 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1034 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1035 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1036 SDValue Chain = TheCall->getChain();
1037 SDValue Callee = TheCall->getCallee();
1038 unsigned NumOps = TheCall->getNumArgs();
1039 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1040 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1041 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1043 // Handy pointer type
1044 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1046 // Accumulate how many bytes are to be pushed on the stack, including the
1047 // linkage area, and parameter passing area. According to the SPU ABI,
1048 // we minimally need space for [LR] and [SP]
1049 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1051 // Set up a copy of the stack pointer for use loading and storing any
1052 // arguments that may not fit in the registers available for argument
1054 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1056 // Figure out which arguments are going to go in registers, and which in
1058 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1059 unsigned ArgRegIdx = 0;
1061 // Keep track of registers passing arguments
1062 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1063 // And the arguments passed on the stack
1064 SmallVector<SDValue, 8> MemOpChains;
1066 for (unsigned i = 0; i != NumOps; ++i) {
1067 SDValue Arg = TheCall->getArg(i);
1069 // PtrOff will be used to store the current argument to the stack if a
1070 // register cannot be found for it.
1071 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1072 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1074 switch (Arg.getValueType().getSimpleVT()) {
1075 default: assert(0 && "Unexpected ValueType for argument!");
1081 if (ArgRegIdx != NumArgRegs) {
1082 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1084 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1085 ArgOffset += StackSlotSize;
1090 if (ArgRegIdx != NumArgRegs) {
1091 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1093 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1094 ArgOffset += StackSlotSize;
1103 if (ArgRegIdx != NumArgRegs) {
1104 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1106 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1107 ArgOffset += StackSlotSize;
1113 // Update number of stack bytes actually used, insert a call sequence start
1114 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1115 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1118 if (!MemOpChains.empty()) {
1119 // Adjust the stack pointer for the stack arguments.
1120 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1121 &MemOpChains[0], MemOpChains.size());
1124 // Build a sequence of copy-to-reg nodes chained together with token chain
1125 // and flag operands which copy the outgoing args into the appropriate regs.
1127 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1128 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1130 InFlag = Chain.getValue(1);
1133 SmallVector<SDValue, 8> Ops;
1134 unsigned CallOpc = SPUISD::CALL;
1136 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1137 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1138 // node so that legalize doesn't hack it.
1139 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1140 GlobalValue *GV = G->getGlobal();
1141 MVT CalleeVT = Callee.getValueType();
1142 SDValue Zero = DAG.getConstant(0, PtrVT);
1143 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1145 if (!ST->usingLargeMem()) {
1146 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1147 // style calls, otherwise, external symbols are BRASL calls. This assumes
1148 // that declared/defined symbols are in the same compilation unit and can
1149 // be reached through PC-relative jumps.
1152 // This may be an unsafe assumption for JIT and really large compilation
1154 if (GV->isDeclaration()) {
1155 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1157 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1160 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1162 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1164 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1165 MVT CalleeVT = Callee.getValueType();
1166 SDValue Zero = DAG.getConstant(0, PtrVT);
1167 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1168 Callee.getValueType());
1170 if (!ST->usingLargeMem()) {
1171 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1173 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1175 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1176 // If this is an absolute destination address that appears to be a legal
1177 // local store address, use the munged value.
1178 Callee = SDValue(Dest, 0);
1181 Ops.push_back(Chain);
1182 Ops.push_back(Callee);
1184 // Add argument registers to the end of the list so that they are known live
1186 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1187 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1188 RegsToPass[i].second.getValueType()));
1190 if (InFlag.getNode())
1191 Ops.push_back(InFlag);
1192 // Returns a chain and a flag for retval copy to use.
1193 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1194 &Ops[0], Ops.size());
1195 InFlag = Chain.getValue(1);
1197 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1198 DAG.getIntPtrConstant(0, true), InFlag);
1199 if (TheCall->getValueType(0) != MVT::Other)
1200 InFlag = Chain.getValue(1);
1202 SDValue ResultVals[3];
1203 unsigned NumResults = 0;
1205 // If the call has results, copy the values out of the ret val registers.
1206 switch (TheCall->getValueType(0).getSimpleVT()) {
1207 default: assert(0 && "Unexpected ret value!");
1208 case MVT::Other: break;
1210 if (TheCall->getValueType(1) == MVT::i32) {
1211 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1212 ResultVals[0] = Chain.getValue(0);
1213 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1214 Chain.getValue(2)).getValue(1);
1215 ResultVals[1] = Chain.getValue(0);
1218 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1219 ResultVals[0] = Chain.getValue(0);
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1230 ResultVals[0] = Chain.getValue(0);
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1236 InFlag).getValue(1);
1237 ResultVals[0] = Chain.getValue(0);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1247 InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1253 // If the function returns void, just return the chain.
1254 if (NumResults == 0)
1257 // Otherwise, merge everything together with a MERGE_VALUES node.
1258 ResultVals[NumResults++] = Chain;
1259 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1260 return Res.getValue(Op.getResNo());
1264 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1265 SmallVector<CCValAssign, 16> RVLocs;
1266 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1267 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1268 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1269 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1271 // If this is the first return lowered for this function, add the regs to the
1272 // liveout set for the function.
1273 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1274 for (unsigned i = 0; i != RVLocs.size(); ++i)
1275 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1278 SDValue Chain = Op.getOperand(0);
1281 // Copy the result values into the output registers.
1282 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1283 CCValAssign &VA = RVLocs[i];
1284 assert(VA.isRegLoc() && "Can only return in registers!");
1285 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1286 Flag = Chain.getValue(1);
1290 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1292 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1296 //===----------------------------------------------------------------------===//
1297 // Vector related lowering:
1298 //===----------------------------------------------------------------------===//
1300 static ConstantSDNode *
1301 getVecImm(SDNode *N) {
1302 SDValue OpVal(0, 0);
1304 // Check to see if this buildvec has a single non-undef value in its elements.
1305 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1306 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1307 if (OpVal.getNode() == 0)
1308 OpVal = N->getOperand(i);
1309 else if (OpVal != N->getOperand(i))
1313 if (OpVal.getNode() != 0) {
1314 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1319 return 0; // All UNDEF: use implicit def.; not Constant node
1322 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1323 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1325 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1327 if (ConstantSDNode *CN = getVecImm(N)) {
1328 uint64_t Value = CN->getZExtValue();
1329 if (ValueType == MVT::i64) {
1330 uint64_t UValue = CN->getZExtValue();
1331 uint32_t upper = uint32_t(UValue >> 32);
1332 uint32_t lower = uint32_t(UValue);
1335 Value = Value >> 32;
1337 if (Value <= 0x3ffff)
1338 return DAG.getTargetConstant(Value, ValueType);
1344 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1345 /// and the value fits into a signed 16-bit constant, and if so, return the
1347 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1349 if (ConstantSDNode *CN = getVecImm(N)) {
1350 int64_t Value = CN->getSExtValue();
1351 if (ValueType == MVT::i64) {
1352 uint64_t UValue = CN->getZExtValue();
1353 uint32_t upper = uint32_t(UValue >> 32);
1354 uint32_t lower = uint32_t(UValue);
1357 Value = Value >> 32;
1359 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1360 return DAG.getTargetConstant(Value, ValueType);
1367 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1368 /// and the value fits into a signed 10-bit constant, and if so, return the
1370 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1372 if (ConstantSDNode *CN = getVecImm(N)) {
1373 int64_t Value = CN->getSExtValue();
1374 if (ValueType == MVT::i64) {
1375 uint64_t UValue = CN->getZExtValue();
1376 uint32_t upper = uint32_t(UValue >> 32);
1377 uint32_t lower = uint32_t(UValue);
1380 Value = Value >> 32;
1382 if (isS10Constant(Value))
1383 return DAG.getTargetConstant(Value, ValueType);
1389 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1390 /// and the value fits into a signed 8-bit constant, and if so, return the
1393 /// @note: The incoming vector is v16i8 because that's the only way we can load
1394 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1396 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1398 if (ConstantSDNode *CN = getVecImm(N)) {
1399 int Value = (int) CN->getZExtValue();
1400 if (ValueType == MVT::i16
1401 && Value <= 0xffff /* truncated from uint64_t */
1402 && ((short) Value >> 8) == ((short) Value & 0xff))
1403 return DAG.getTargetConstant(Value & 0xff, ValueType);
1404 else if (ValueType == MVT::i8
1405 && (Value & 0xff) == Value)
1406 return DAG.getTargetConstant(Value, ValueType);
1412 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1413 /// and the value fits into a signed 16-bit constant, and if so, return the
1415 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1417 if (ConstantSDNode *CN = getVecImm(N)) {
1418 uint64_t Value = CN->getZExtValue();
1419 if ((ValueType == MVT::i32
1420 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1421 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1422 return DAG.getTargetConstant(Value >> 16, ValueType);
1428 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1429 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1430 if (ConstantSDNode *CN = getVecImm(N)) {
1431 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1437 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1438 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1446 // If this is a vector of constants or undefs, get the bits. A bit in
1447 // UndefBits is set if the corresponding element of the vector is an
1448 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1449 // zero. Return true if this is not an array of constants, false if it is.
1451 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1452 uint64_t UndefBits[2]) {
1453 // Start with zero'd results.
1454 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1456 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1457 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1458 SDValue OpVal = BV->getOperand(i);
1460 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1461 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1463 uint64_t EltBits = 0;
1464 if (OpVal.getOpcode() == ISD::UNDEF) {
1465 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1466 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1468 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1469 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1470 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1471 const APFloat &apf = CN->getValueAPF();
1472 EltBits = (CN->getValueType(0) == MVT::f32
1473 ? FloatToBits(apf.convertToFloat())
1474 : DoubleToBits(apf.convertToDouble()));
1476 // Nonconstant element.
1480 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1483 //printf("%llx %llx %llx %llx\n",
1484 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1488 /// If this is a splat (repetition) of a value across the whole vector, return
1489 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1490 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1491 /// SplatSize = 1 byte.
1492 static bool isConstantSplat(const uint64_t Bits128[2],
1493 const uint64_t Undef128[2],
1495 uint64_t &SplatBits, uint64_t &SplatUndef,
1497 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1498 // the same as the lower 64-bits, ignoring undefs.
1499 uint64_t Bits64 = Bits128[0] | Bits128[1];
1500 uint64_t Undef64 = Undef128[0] & Undef128[1];
1501 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1502 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1503 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1504 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1506 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1507 if (MinSplatBits < 64) {
1509 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1511 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1512 if (MinSplatBits < 32) {
1514 // If the top 16-bits are different than the lower 16-bits, ignoring
1515 // undefs, we have an i32 splat.
1516 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1517 if (MinSplatBits < 16) {
1518 // If the top 8-bits are different than the lower 8-bits, ignoring
1519 // undefs, we have an i16 splat.
1520 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1521 == ((Bits16 >> 8) & ~Undef16)) {
1522 // Otherwise, we have an 8-bit splat.
1523 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1524 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1530 SplatUndef = Undef16;
1537 SplatUndef = Undef32;
1543 SplatBits = Bits128[0];
1544 SplatUndef = Undef128[0];
1550 return false; // Can't be a splat if two pieces don't match.
1553 // If this is a case we can't handle, return null and let the default
1554 // expansion code take care of it. If we CAN select this case, and if it
1555 // selects to a single instruction, return Op. Otherwise, if we can codegen
1556 // this case more efficiently than a constant pool load, lower it to the
1557 // sequence of ops that should be used.
1558 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1559 MVT VT = Op.getValueType();
1560 // If this is a vector of constants or undefs, get the bits. A bit in
1561 // UndefBits is set if the corresponding element of the vector is an
1562 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1564 uint64_t VectorBits[2];
1565 uint64_t UndefBits[2];
1566 uint64_t SplatBits, SplatUndef;
1568 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1569 || !isConstantSplat(VectorBits, UndefBits,
1570 VT.getVectorElementType().getSizeInBits(),
1571 SplatBits, SplatUndef, SplatSize))
1572 return SDValue(); // Not a constant vector, not a splat.
1574 switch (VT.getSimpleVT()) {
1577 uint32_t Value32 = SplatBits;
1578 assert(SplatSize == 4
1579 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1580 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1581 SDValue T = DAG.getConstant(Value32, MVT::i32);
1582 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1583 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1587 uint64_t f64val = SplatBits;
1588 assert(SplatSize == 8
1589 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1590 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591 SDValue T = DAG.getConstant(f64val, MVT::i64);
1592 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1597 // 8-bit constants have to be expanded to 16-bits
1598 unsigned short Value16 = SplatBits | (SplatBits << 8);
1600 for (int i = 0; i < 8; ++i)
1601 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1602 return DAG.getNode(ISD::BIT_CONVERT, VT,
1603 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1606 unsigned short Value16;
1608 Value16 = (unsigned short) (SplatBits & 0xffff);
1610 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1611 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1613 for (int i = 0; i < 8; ++i) Ops[i] = T;
1614 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1617 unsigned int Value = SplatBits;
1618 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1619 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1622 uint64_t val = SplatBits;
1623 uint32_t upper = uint32_t(val >> 32);
1624 uint32_t lower = uint32_t(val);
1626 if (upper == lower) {
1627 // Magic constant that can be matched by IL, ILA, et. al.
1628 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1633 SmallVector<SDValue, 16> ShufBytes;
1635 bool upper_special, lower_special;
1637 // NOTE: This code creates common-case shuffle masks that can be easily
1638 // detected as common expressions. It is not attempting to create highly
1639 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1641 // Detect if the upper or lower half is a special shuffle mask pattern:
1642 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1643 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1645 // Create lower vector if not a special pattern
1646 if (!lower_special) {
1647 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1648 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1649 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1650 LO32C, LO32C, LO32C, LO32C));
1653 // Create upper vector if not a special pattern
1654 if (!upper_special) {
1655 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1656 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1658 HI32C, HI32C, HI32C, HI32C));
1661 // If either upper or lower are special, then the two input operands are
1662 // the same (basically, one of them is a "don't care")
1667 if (lower_special && upper_special) {
1668 // Unhappy situation... both upper and lower are special, so punt with
1669 // a target constant:
1670 SDValue Zero = DAG.getConstant(0, MVT::i32);
1671 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1675 for (int i = 0; i < 4; ++i) {
1677 for (int j = 0; j < 4; ++j) {
1679 bool process_upper, process_lower;
1681 process_upper = (upper_special && (i & 1) == 0);
1682 process_lower = (lower_special && (i & 1) == 1);
1684 if (process_upper || process_lower) {
1685 if ((process_upper && upper == 0)
1686 || (process_lower && lower == 0))
1688 else if ((process_upper && upper == 0xffffffff)
1689 || (process_lower && lower == 0xffffffff))
1691 else if ((process_upper && upper == 0x80000000)
1692 || (process_lower && lower == 0x80000000))
1693 val |= (j == 0 ? 0xe0 : 0x80);
1695 val |= i * 4 + j + ((i & 1) * 16);
1698 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1701 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1702 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1703 &ShufBytes[0], ShufBytes.size()));
1711 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1712 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1713 /// permutation vector, V3, is monotonically increasing with one "exception"
1714 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1715 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1716 /// In either case, the net result is going to eventually invoke SHUFB to
1717 /// permute/shuffle the bytes from V1 and V2.
1719 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1720 /// control word for byte/halfword/word insertion. This takes care of a single
1721 /// element move from V2 into V1.
1723 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1724 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1725 SDValue V1 = Op.getOperand(0);
1726 SDValue V2 = Op.getOperand(1);
1727 SDValue PermMask = Op.getOperand(2);
1729 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1731 // If we have a single element being moved from V1 to V2, this can be handled
1732 // using the C*[DX] compute mask instructions, but the vector elements have
1733 // to be monotonically increasing with one exception element.
1734 MVT VecVT = V1.getValueType();
1735 MVT EltVT = VecVT.getVectorElementType();
1736 unsigned EltsFromV2 = 0;
1738 unsigned V2EltIdx0 = 0;
1739 unsigned CurrElt = 0;
1740 unsigned MaxElts = VecVT.getVectorNumElements();
1741 unsigned PrevElt = 0;
1743 bool monotonic = true;
1746 if (EltVT == MVT::i8) {
1748 } else if (EltVT == MVT::i16) {
1750 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1752 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1755 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1757 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1758 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1759 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1762 if (SrcElt >= V2EltIdx0) {
1763 if (1 >= (++EltsFromV2)) {
1764 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1766 } else if (CurrElt != SrcElt) {
1774 if (PrevElt > 0 && SrcElt < MaxElts) {
1775 if ((PrevElt == SrcElt - 1)
1776 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1783 } else if (PrevElt == 0) {
1784 // First time through, need to keep track of previous element
1787 // This isn't a rotation, takes elements from vector 2
1794 if (EltsFromV2 == 1 && monotonic) {
1795 // Compute mask and shuffle
1796 MachineFunction &MF = DAG.getMachineFunction();
1797 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1798 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1799 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1800 // Initialize temporary register to 0
1801 SDValue InitTempReg =
1802 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1803 // Copy register's contents as index in SHUFFLE_MASK:
1804 SDValue ShufMaskOp =
1805 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1806 DAG.getTargetConstant(V2Elt, MVT::i32),
1807 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1808 // Use shuffle mask in SHUFB synthetic instruction:
1809 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1810 } else if (rotate) {
1811 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1813 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1814 V1, DAG.getConstant(rotamt, MVT::i16));
1816 // Convert the SHUFFLE_VECTOR mask's input element units to the
1818 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1820 SmallVector<SDValue, 16> ResultMask;
1821 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1823 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1826 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1828 for (unsigned j = 0; j < BytesPerElement; ++j) {
1829 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1834 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1835 &ResultMask[0], ResultMask.size());
1836 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1840 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1841 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1843 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1844 // For a constant, build the appropriate constant vector, which will
1845 // eventually simplify to a vector register load.
1847 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1848 SmallVector<SDValue, 16> ConstVecValues;
1852 // Create a constant vector:
1853 switch (Op.getValueType().getSimpleVT()) {
1854 default: assert(0 && "Unexpected constant value type in "
1855 "LowerSCALAR_TO_VECTOR");
1856 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1857 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1858 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1859 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1860 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1861 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1864 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1865 for (size_t j = 0; j < n_copies; ++j)
1866 ConstVecValues.push_back(CValue);
1868 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1869 &ConstVecValues[0], ConstVecValues.size());
1871 // Otherwise, copy the value from one register to another:
1872 switch (Op0.getValueType().getSimpleVT()) {
1873 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1880 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1887 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1888 MVT VT = Op.getValueType();
1889 SDValue N = Op.getOperand(0);
1890 SDValue Elt = Op.getOperand(1);
1893 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1894 // Constant argument:
1895 int EltNo = (int) C->getZExtValue();
1898 if (VT == MVT::i8 && EltNo >= 16)
1899 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1900 else if (VT == MVT::i16 && EltNo >= 8)
1901 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1902 else if (VT == MVT::i32 && EltNo >= 4)
1903 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1904 else if (VT == MVT::i64 && EltNo >= 2)
1905 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1907 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1908 // i32 and i64: Element 0 is the preferred slot
1909 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1912 // Need to generate shuffle mask and extract:
1913 int prefslot_begin = -1, prefslot_end = -1;
1914 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1916 switch (VT.getSimpleVT()) {
1918 assert(false && "Invalid value type!");
1920 prefslot_begin = prefslot_end = 3;
1924 prefslot_begin = 2; prefslot_end = 3;
1929 prefslot_begin = 0; prefslot_end = 3;
1934 prefslot_begin = 0; prefslot_end = 7;
1939 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1940 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1942 unsigned int ShufBytes[16];
1943 for (int i = 0; i < 16; ++i) {
1944 // zero fill uppper part of preferred slot, don't care about the
1946 unsigned int mask_val;
1947 if (i <= prefslot_end) {
1949 ((i < prefslot_begin)
1951 : elt_byte + (i - prefslot_begin));
1953 ShufBytes[i] = mask_val;
1955 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1958 SDValue ShufMask[4];
1959 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1960 unsigned bidx = i * 4;
1961 unsigned int bits = ((ShufBytes[bidx] << 24) |
1962 (ShufBytes[bidx+1] << 16) |
1963 (ShufBytes[bidx+2] << 8) |
1965 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1968 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1970 sizeof(ShufMask) / sizeof(ShufMask[0]));
1972 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1973 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1974 N, N, ShufMaskVec));
1976 // Variable index: Rotate the requested element into slot 0, then replicate
1977 // slot 0 across the vector
1978 MVT VecVT = N.getValueType();
1979 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1980 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1984 // Make life easier by making sure the index is zero-extended to i32
1985 if (Elt.getValueType() != MVT::i32)
1986 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
1988 // Scale the index to a bit/byte shift quantity
1990 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1991 unsigned scaleShift = scaleFactor.logBase2();
1994 if (scaleShift > 0) {
1995 // Scale the shift factor:
1996 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
1997 DAG.getConstant(scaleShift, MVT::i32));
2000 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2002 // Replicate the bytes starting at byte 0 across the entire vector (for
2003 // consistency with the notion of a unified register set)
2006 switch (VT.getSimpleVT()) {
2008 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2012 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2013 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2018 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2019 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2025 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2026 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2032 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2033 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2034 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2035 loFactor, hiFactor);
2040 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2041 DAG.getNode(SPUISD::SHUFB, VecVT,
2042 vecShift, vecShift, replicate));
2048 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2049 SDValue VecOp = Op.getOperand(0);
2050 SDValue ValOp = Op.getOperand(1);
2051 SDValue IdxOp = Op.getOperand(2);
2052 MVT VT = Op.getValueType();
2054 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2055 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2057 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2058 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2059 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2060 DAG.getRegister(SPU::R1, PtrVT),
2061 DAG.getConstant(CN->getSExtValue(), PtrVT));
2062 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2065 DAG.getNode(SPUISD::SHUFB, VT,
2066 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2068 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2073 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2074 const TargetLowering &TLI)
2076 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2077 MVT ShiftVT = TLI.getShiftAmountTy();
2079 assert(Op.getValueType() == MVT::i8);
2082 assert(0 && "Unhandled i8 math operator");
2086 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2088 SDValue N1 = Op.getOperand(1);
2089 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2090 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2091 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2092 DAG.getNode(Opc, MVT::i16, N0, N1));
2097 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2099 SDValue N1 = Op.getOperand(1);
2100 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2101 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2102 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2103 DAG.getNode(Opc, MVT::i16, N0, N1));
2107 SDValue N1 = Op.getOperand(1);
2109 N0 = (N0.getOpcode() != ISD::Constant
2110 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2111 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2113 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2116 N1 = (N1.getOpcode() != ISD::Constant
2117 ? DAG.getNode(N1Opc, ShiftVT, N1)
2118 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2119 TLI.getShiftAmountTy()));
2121 DAG.getNode(ISD::OR, MVT::i16, N0,
2122 DAG.getNode(ISD::SHL, MVT::i16,
2123 N0, DAG.getConstant(8, MVT::i32)));
2124 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2125 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2129 SDValue N1 = Op.getOperand(1);
2131 N0 = (N0.getOpcode() != ISD::Constant
2132 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2133 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2135 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2138 N1 = (N1.getOpcode() != ISD::Constant
2139 ? DAG.getNode(N1Opc, ShiftVT, N1)
2140 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2141 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2142 DAG.getNode(Opc, MVT::i16, N0, N1));
2145 SDValue N1 = Op.getOperand(1);
2147 N0 = (N0.getOpcode() != ISD::Constant
2148 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2149 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2151 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2154 N1 = (N1.getOpcode() != ISD::Constant
2155 ? DAG.getNode(N1Opc, ShiftVT, N1)
2156 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2158 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2159 DAG.getNode(Opc, MVT::i16, N0, N1));
2162 SDValue N1 = Op.getOperand(1);
2164 N0 = (N0.getOpcode() != ISD::Constant
2165 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2166 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2168 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2169 N1 = (N1.getOpcode() != ISD::Constant
2170 ? DAG.getNode(N1Opc, MVT::i16, N1)
2171 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2173 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2174 DAG.getNode(Opc, MVT::i16, N0, N1));
2182 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2184 MVT VT = Op.getValueType();
2185 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2187 SDValue Op0 = Op.getOperand(0);
2190 case ISD::ZERO_EXTEND:
2191 case ISD::ANY_EXTEND: {
2192 MVT Op0VT = Op0.getValueType();
2193 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2195 SDValue PromoteScalar =
2196 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2198 // Use a shuffle to zero extend the i32 to i64 directly:
2201 switch (Op0VT.getSimpleVT()) {
2203 cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n";
2208 shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2209 DAG.getConstant(0x80808080, MVT::i32),
2210 DAG.getConstant(0x00010203, MVT::i32),
2211 DAG.getConstant(0x80808080, MVT::i32),
2212 DAG.getConstant(0x08090a0b, MVT::i32));
2216 shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2217 DAG.getConstant(0x80808080, MVT::i32),
2218 DAG.getConstant(0x80800203, MVT::i32),
2219 DAG.getConstant(0x80808080, MVT::i32),
2220 DAG.getConstant(0x80800a0b, MVT::i32));
2224 shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2225 DAG.getConstant(0x80808080, MVT::i32),
2226 DAG.getConstant(0x80808003, MVT::i32),
2227 DAG.getConstant(0x80808080, MVT::i32),
2228 DAG.getConstant(0x8080800b, MVT::i32));
2232 SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2233 PromoteScalar, PromoteScalar, shufMask);
2235 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2236 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2240 // Turn operands into vectors to satisfy type checking (shufb works on
2243 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2245 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2246 SmallVector<SDValue, 16> ShufBytes;
2248 // Create the shuffle mask for "rotating" the borrow up one register slot
2249 // once the borrow is generated.
2250 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2251 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2252 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2253 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2256 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2257 SDValue ShiftedCarry =
2258 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2260 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2261 &ShufBytes[0], ShufBytes.size()));
2263 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2264 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2265 Op0, Op1, ShiftedCarry));
2269 // Turn operands into vectors to satisfy type checking (shufb works on
2272 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2274 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2275 SmallVector<SDValue, 16> ShufBytes;
2277 // Create the shuffle mask for "rotating" the borrow up one register slot
2278 // once the borrow is generated.
2279 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2280 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2281 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2282 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2285 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2286 SDValue ShiftedBorrow =
2287 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2288 BorrowGen, BorrowGen,
2289 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2290 &ShufBytes[0], ShufBytes.size()));
2292 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2293 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2294 Op0, Op1, ShiftedBorrow));
2301 //! Lower byte immediate operations for v16i8 vectors:
2303 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2306 MVT VT = Op.getValueType();
2308 ConstVec = Op.getOperand(0);
2309 Arg = Op.getOperand(1);
2310 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2311 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2312 ConstVec = ConstVec.getOperand(0);
2314 ConstVec = Op.getOperand(1);
2315 Arg = Op.getOperand(0);
2316 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2317 ConstVec = ConstVec.getOperand(0);
2322 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2323 uint64_t VectorBits[2];
2324 uint64_t UndefBits[2];
2325 uint64_t SplatBits, SplatUndef;
2328 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2329 && isConstantSplat(VectorBits, UndefBits,
2330 VT.getVectorElementType().getSizeInBits(),
2331 SplatBits, SplatUndef, SplatSize)) {
2333 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2334 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2336 // Turn the BUILD_VECTOR into a set of target constants:
2337 for (size_t i = 0; i < tcVecSize; ++i)
2340 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2341 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2344 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2345 // lowered. Return the operation, rather than a null SDValue.
2349 //! Custom lowering for CTPOP (count population)
2351 Custom lowering code that counts the number ones in the input
2352 operand. SPU has such an instruction, but it counts the number of
2353 ones per byte, which then have to be accumulated.
2355 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2356 MVT VT = Op.getValueType();
2357 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2359 switch (VT.getSimpleVT()) {
2361 assert(false && "Invalid value type!");
2363 SDValue N = Op.getOperand(0);
2364 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2366 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2367 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2369 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2373 MachineFunction &MF = DAG.getMachineFunction();
2374 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2376 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2378 SDValue N = Op.getOperand(0);
2379 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2380 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2381 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2383 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2384 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2386 // CNTB_result becomes the chain to which all of the virtual registers
2387 // CNTB_reg, SUM1_reg become associated:
2388 SDValue CNTB_result =
2389 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2391 SDValue CNTB_rescopy =
2392 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2394 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2396 return DAG.getNode(ISD::AND, MVT::i16,
2397 DAG.getNode(ISD::ADD, MVT::i16,
2398 DAG.getNode(ISD::SRL, MVT::i16,
2405 MachineFunction &MF = DAG.getMachineFunction();
2406 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2408 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2409 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2411 SDValue N = Op.getOperand(0);
2412 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2413 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2414 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2415 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2417 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2418 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2420 // CNTB_result becomes the chain to which all of the virtual registers
2421 // CNTB_reg, SUM1_reg become associated:
2422 SDValue CNTB_result =
2423 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2425 SDValue CNTB_rescopy =
2426 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2429 DAG.getNode(ISD::SRL, MVT::i32,
2430 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2433 DAG.getNode(ISD::ADD, MVT::i32,
2434 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2436 SDValue Sum1_rescopy =
2437 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2440 DAG.getNode(ISD::SRL, MVT::i32,
2441 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2444 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2445 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2447 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2457 //! Lower ISD::SETCC
2459 Lower i64 condition code handling.
2462 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2463 MVT VT = Op.getValueType();
2464 SDValue lhs = Op.getOperand(0);
2465 SDValue rhs = Op.getOperand(1);
2466 SDValue condition = Op.getOperand(2);
2468 if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2469 // Expand the i64 comparisons to what Cell can actually support,
2470 // which is eq, ugt and sgt:
2472 CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2474 switch (ccvalue->get()) {
2483 //! Lower ISD::SELECT_CC
2485 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2488 \note Need to revisit this in the future: if the code path through the true
2489 and false value computations is longer than the latency of a branch (6
2490 cycles), then it would be more advantageous to branch and insert a new basic
2491 block and branch on the condition. However, this code does not make that
2492 assumption, given the simplisitc uses so far.
2495 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2496 const TargetLowering &TLI) {
2497 MVT VT = Op.getValueType();
2498 SDValue lhs = Op.getOperand(0);
2499 SDValue rhs = Op.getOperand(1);
2500 SDValue trueval = Op.getOperand(2);
2501 SDValue falseval = Op.getOperand(3);
2502 SDValue condition = Op.getOperand(4);
2504 // NOTE: SELB's arguments: $rA, $rB, $mask
2506 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2507 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2508 // condition was true and 0s where the condition was false. Hence, the
2509 // arguments to SELB get reversed.
2511 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2512 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2513 // with another "cannot select select_cc" assert:
2515 SDValue compare = DAG.getNode(ISD::SETCC,
2516 TLI.getSetCCResultType(Op.getValueType()),
2517 lhs, rhs, condition);
2518 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2521 //! Custom lower ISD::TRUNCATE
2522 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2524 MVT VT = Op.getValueType();
2525 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2526 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2528 SDValue Op0 = Op.getOperand(0);
2529 MVT Op0VT = Op0.getValueType();
2530 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2532 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2533 // Create shuffle mask, least significant doubleword of quadword
2534 unsigned maskHigh = 0x08090a0b;
2535 unsigned maskLow = 0x0c0d0e0f;
2536 // Use a shuffle to perform the truncation
2537 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2538 DAG.getConstant(maskHigh, MVT::i32),
2539 DAG.getConstant(maskLow, MVT::i32),
2540 DAG.getConstant(maskHigh, MVT::i32),
2541 DAG.getConstant(maskLow, MVT::i32));
2544 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2546 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2547 PromoteScalar, PromoteScalar, shufMask);
2549 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2550 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2553 return SDValue(); // Leave the truncate unmolested
2556 //! Custom (target-specific) lowering entry point
2558 This is where LLVM's DAG selection process calls to do target-specific
2562 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2564 unsigned Opc = (unsigned) Op.getOpcode();
2565 MVT VT = Op.getValueType();
2569 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2570 cerr << "Op.getOpcode() = " << Opc << "\n";
2571 cerr << "*Op.getNode():\n";
2572 Op.getNode()->dump();
2579 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2581 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2582 case ISD::ConstantPool:
2583 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2584 case ISD::GlobalAddress:
2585 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2586 case ISD::JumpTable:
2587 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2589 return LowerConstant(Op, DAG);
2590 case ISD::ConstantFP:
2591 return LowerConstantFP(Op, DAG);
2592 case ISD::FORMAL_ARGUMENTS:
2593 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2595 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2597 return LowerRET(Op, DAG, getTargetMachine());
2600 case ISD::ZERO_EXTEND:
2601 case ISD::ANY_EXTEND:
2602 return LowerI64Math(Op, DAG, Opc);
2604 // i8, i64 math ops:
2613 return LowerI8Math(Op, DAG, Opc, *this);
2614 else if (VT == MVT::i64)
2615 return LowerI64Math(Op, DAG, Opc);
2619 // Vector-related lowering.
2620 case ISD::BUILD_VECTOR:
2621 return LowerBUILD_VECTOR(Op, DAG);
2622 case ISD::SCALAR_TO_VECTOR:
2623 return LowerSCALAR_TO_VECTOR(Op, DAG);
2624 case ISD::VECTOR_SHUFFLE:
2625 return LowerVECTOR_SHUFFLE(Op, DAG);
2626 case ISD::EXTRACT_VECTOR_ELT:
2627 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2628 case ISD::INSERT_VECTOR_ELT:
2629 return LowerINSERT_VECTOR_ELT(Op, DAG);
2631 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2635 return LowerByteImmed(Op, DAG);
2637 // Vector and i8 multiply:
2640 return LowerI8Math(Op, DAG, Opc, *this);
2643 return LowerCTPOP(Op, DAG);
2645 case ISD::SELECT_CC:
2646 return LowerSELECT_CC(Op, DAG, *this);
2649 return LowerTRUNCATE(Op, DAG);
2652 return LowerSETCC(Op, DAG);
2658 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2659 SmallVectorImpl<SDValue>&Results,
2663 unsigned Opc = (unsigned) N->getOpcode();
2664 MVT OpVT = N->getValueType(0);
2668 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2669 cerr << "Op.getOpcode() = " << Opc << "\n";
2670 cerr << "*Op.getNode():\n";
2678 /* Otherwise, return unchanged */
2681 //===----------------------------------------------------------------------===//
2682 // Target Optimization Hooks
2683 //===----------------------------------------------------------------------===//
2686 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2689 TargetMachine &TM = getTargetMachine();
2691 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2692 SelectionDAG &DAG = DCI.DAG;
2693 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2694 MVT NodeVT = N->getValueType(0); // The node's value type
2695 MVT Op0VT = Op0.getValueType(); // The first operand's result
2696 SDValue Result; // Initially, empty result
2698 switch (N->getOpcode()) {
2701 SDValue Op1 = N->getOperand(1);
2703 if (Op0.getOpcode() == SPUISD::IndirectAddr
2704 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2705 // Normalize the operands to reduce repeated code
2706 SDValue IndirectArg = Op0, AddArg = Op1;
2708 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2713 if (isa<ConstantSDNode>(AddArg)) {
2714 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2715 SDValue IndOp1 = IndirectArg.getOperand(1);
2717 if (CN0->isNullValue()) {
2718 // (add (SPUindirect <arg>, <arg>), 0) ->
2719 // (SPUindirect <arg>, <arg>)
2721 #if !defined(NDEBUG)
2722 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2724 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2725 << "With: (SPUindirect <arg>, <arg>)\n";
2730 } else if (isa<ConstantSDNode>(IndOp1)) {
2731 // (add (SPUindirect <arg>, <const>), <const>) ->
2732 // (SPUindirect <arg>, <const + const>)
2733 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2734 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2735 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2737 #if !defined(NDEBUG)
2738 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2740 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2741 << "), " << CN0->getSExtValue() << ")\n"
2742 << "With: (SPUindirect <arg>, "
2743 << combinedConst << ")\n";
2747 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2748 IndirectArg, combinedValue);
2754 case ISD::SIGN_EXTEND:
2755 case ISD::ZERO_EXTEND:
2756 case ISD::ANY_EXTEND: {
2757 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2758 // (any_extend (SPUextract_elt0 <arg>)) ->
2759 // (SPUextract_elt0 <arg>)
2760 // Types must match, however...
2761 #if !defined(NDEBUG)
2762 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2763 cerr << "\nReplace: ";
2766 Op0.getNode()->dump(&DAG);
2775 case SPUISD::IndirectAddr: {
2776 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2777 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2778 if (CN->getZExtValue() == 0) {
2779 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2780 // (SPUaform <addr>, 0)
2782 DEBUG(cerr << "Replace: ");
2783 DEBUG(N->dump(&DAG));
2784 DEBUG(cerr << "\nWith: ");
2785 DEBUG(Op0.getNode()->dump(&DAG));
2786 DEBUG(cerr << "\n");
2790 } else if (Op0.getOpcode() == ISD::ADD) {
2791 SDValue Op1 = N->getOperand(1);
2792 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2793 // (SPUindirect (add <arg>, <arg>), 0) ->
2794 // (SPUindirect <arg>, <arg>)
2795 if (CN1->isNullValue()) {
2797 #if !defined(NDEBUG)
2798 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2800 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2801 << "With: (SPUindirect <arg>, <arg>)\n";
2805 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2806 Op0.getOperand(0), Op0.getOperand(1));
2812 case SPUISD::SHLQUAD_L_BITS:
2813 case SPUISD::SHLQUAD_L_BYTES:
2814 case SPUISD::VEC_SHL:
2815 case SPUISD::VEC_SRL:
2816 case SPUISD::VEC_SRA:
2817 case SPUISD::ROTBYTES_LEFT: {
2818 SDValue Op1 = N->getOperand(1);
2820 // Kill degenerate vector shifts:
2821 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2822 if (CN->isNullValue()) {
2828 case SPUISD::PREFSLOT2VEC: {
2829 switch (Op0.getOpcode()) {
2832 case ISD::ANY_EXTEND:
2833 case ISD::ZERO_EXTEND:
2834 case ISD::SIGN_EXTEND: {
2835 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2837 // but only if the SPUprefslot2vec and <arg> types match.
2838 SDValue Op00 = Op0.getOperand(0);
2839 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2840 SDValue Op000 = Op00.getOperand(0);
2841 if (Op000.getValueType() == NodeVT) {
2847 case SPUISD::VEC2PREFSLOT: {
2848 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2850 Result = Op0.getOperand(0);
2857 // Otherwise, return unchanged.
2859 if (Result.getNode()) {
2860 DEBUG(cerr << "\nReplace.SPU: ");
2861 DEBUG(N->dump(&DAG));
2862 DEBUG(cerr << "\nWith: ");
2863 DEBUG(Result.getNode()->dump(&DAG));
2864 DEBUG(cerr << "\n");
2871 //===----------------------------------------------------------------------===//
2872 // Inline Assembly Support
2873 //===----------------------------------------------------------------------===//
2875 /// getConstraintType - Given a constraint letter, return the type of
2876 /// constraint it is for this target.
2877 SPUTargetLowering::ConstraintType
2878 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2879 if (ConstraintLetter.size() == 1) {
2880 switch (ConstraintLetter[0]) {
2887 return C_RegisterClass;
2890 return TargetLowering::getConstraintType(ConstraintLetter);
2893 std::pair<unsigned, const TargetRegisterClass*>
2894 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2897 if (Constraint.size() == 1) {
2898 // GCC RS6000 Constraint Letters
2899 switch (Constraint[0]) {
2903 return std::make_pair(0U, SPU::R64CRegisterClass);
2904 return std::make_pair(0U, SPU::R32CRegisterClass);
2907 return std::make_pair(0U, SPU::R32FPRegisterClass);
2908 else if (VT == MVT::f64)
2909 return std::make_pair(0U, SPU::R64FPRegisterClass);
2912 return std::make_pair(0U, SPU::GPRCRegisterClass);
2916 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2919 //! Compute used/known bits for a SPU operand
2921 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2925 const SelectionDAG &DAG,
2926 unsigned Depth ) const {
2928 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2931 switch (Op.getOpcode()) {
2933 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2943 case SPUISD::PREFSLOT2VEC: {
2944 SDValue Op0 = Op.getOperand(0);
2945 MVT Op0VT = Op0.getValueType();
2946 unsigned Op0VTBits = Op0VT.getSizeInBits();
2947 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2948 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2949 KnownOne |= APInt(Op0VTBits, InMask, false);
2953 case SPUISD::LDRESULT:
2954 case SPUISD::VEC2PREFSLOT: {
2955 MVT OpVT = Op.getValueType();
2956 unsigned OpVTBits = OpVT.getSizeInBits();
2957 uint64_t InMask = OpVT.getIntegerVTBitMask();
2958 KnownZero |= APInt(OpVTBits, ~InMask, false);
2959 KnownOne |= APInt(OpVTBits, InMask, false);
2964 case SPUISD::SHLQUAD_L_BITS:
2965 case SPUISD::SHLQUAD_L_BYTES:
2966 case SPUISD::VEC_SHL:
2967 case SPUISD::VEC_SRL:
2968 case SPUISD::VEC_SRA:
2969 case SPUISD::VEC_ROTL:
2970 case SPUISD::VEC_ROTR:
2971 case SPUISD::ROTBYTES_LEFT:
2972 case SPUISD::SELECT_MASK:
2974 case SPUISD::SEXT32TO64:
2980 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2981 unsigned Depth) const {
2982 switch (Op.getOpcode()) {
2987 MVT VT = Op.getValueType();
2989 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2992 return VT.getSizeInBits();
2997 // LowerAsmOperandForConstraint
2999 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3000 char ConstraintLetter,
3002 std::vector<SDValue> &Ops,
3003 SelectionDAG &DAG) const {
3004 // Default, for the time being, to the base class handler
3005 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3009 /// isLegalAddressImmediate - Return true if the integer value can be used
3010 /// as the offset of the target addressing mode.
3011 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3012 const Type *Ty) const {
3013 // SPU's addresses are 256K:
3014 return (V > -(1 << 18) && V < (1 << 18) - 1);
3017 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3022 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3023 // The SPU target isn't yet aware of offsets.