1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
84 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
88 // Fold away setcc operations if possible.
91 // Use _setjmp/_longjmp instead of setjmp/longjmp.
92 setUseUnderscoreSetJmp(true);
93 setUseUnderscoreLongJmp(true);
95 // Set up the SPU's register classes:
96 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
97 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
98 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
99 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
100 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
101 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
102 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
104 // SPU has no sign or zero extended loads for i1, i8, i16:
105 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
106 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
107 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
109 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
110 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
112 // SPU constant load actions are custom lowered:
113 setOperationAction(ISD::Constant, MVT::i64, Custom);
114 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
115 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
117 // SPU's loads and stores have to be custom lowered:
118 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
120 MVT VT = (MVT::SimpleValueType)sctype;
122 setOperationAction(ISD::LOAD, VT, Custom);
123 setOperationAction(ISD::STORE, VT, Custom);
124 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
125 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
126 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
128 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
129 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
130 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
132 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
133 MVT StoreVT = (MVT::SimpleValueType) stype;
134 setTruncStoreAction(VT, StoreVT, Expand);
138 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
140 MVT VT = (MVT::SimpleValueType) sctype;
142 setOperationAction(ISD::LOAD, VT, Custom);
143 setOperationAction(ISD::STORE, VT, Custom);
145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
146 MVT StoreVT = (MVT::SimpleValueType) stype;
147 setTruncStoreAction(VT, StoreVT, Expand);
151 // Expand the jumptable branches
152 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
153 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
155 // Custom lower SELECT_CC for most cases, but expand by default
156 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
157 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
158 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
159 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
160 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
162 // SPU has no intrinsics for these particular operations:
163 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
165 // SPU has no SREM/UREM instructions
166 setOperationAction(ISD::SREM, MVT::i32, Expand);
167 setOperationAction(ISD::UREM, MVT::i32, Expand);
168 setOperationAction(ISD::SREM, MVT::i64, Expand);
169 setOperationAction(ISD::UREM, MVT::i64, Expand);
171 // We don't support sin/cos/sqrt/fmod
172 setOperationAction(ISD::FSIN , MVT::f64, Expand);
173 setOperationAction(ISD::FCOS , MVT::f64, Expand);
174 setOperationAction(ISD::FREM , MVT::f64, Expand);
175 setOperationAction(ISD::FSIN , MVT::f32, Expand);
176 setOperationAction(ISD::FCOS , MVT::f32, Expand);
177 setOperationAction(ISD::FREM , MVT::f32, Expand);
179 // If we're enabling GP optimizations, use hardware square root
180 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
181 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
183 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
184 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
186 // SPU can do rotate right and left, so legalize it... but customize for i8
187 // because instructions don't exist.
189 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
191 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
192 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
193 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
195 setOperationAction(ISD::ROTL, MVT::i32, Legal);
196 setOperationAction(ISD::ROTL, MVT::i16, Legal);
197 setOperationAction(ISD::ROTL, MVT::i8, Custom);
199 // SPU has no native version of shift left/right for i8
200 setOperationAction(ISD::SHL, MVT::i8, Custom);
201 setOperationAction(ISD::SRL, MVT::i8, Custom);
202 setOperationAction(ISD::SRA, MVT::i8, Custom);
204 // Make these operations legal and handle them during instruction selection:
205 setOperationAction(ISD::SHL, MVT::i64, Legal);
206 setOperationAction(ISD::SRL, MVT::i64, Legal);
207 setOperationAction(ISD::SRA, MVT::i64, Legal);
209 // Custom lower i8, i32 and i64 multiplications
210 setOperationAction(ISD::MUL, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i32, Legal);
212 setOperationAction(ISD::MUL, MVT::i64, Legal);
214 // Need to custom handle (some) common i8, i64 math ops
215 setOperationAction(ISD::ADD, MVT::i8, Custom);
216 setOperationAction(ISD::ADD, MVT::i64, Legal);
217 setOperationAction(ISD::SUB, MVT::i8, Custom);
218 setOperationAction(ISD::SUB, MVT::i64, Legal);
220 // SPU does not have BSWAP. It does have i32 support CTLZ.
221 // CTPOP has to be custom lowered.
222 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
223 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
225 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
226 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
227 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
228 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
230 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
231 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
233 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
235 // SPU has a version of select that implements (a&~c)|(b&c), just like
236 // select ought to work:
237 setOperationAction(ISD::SELECT, MVT::i8, Legal);
238 setOperationAction(ISD::SELECT, MVT::i16, Legal);
239 setOperationAction(ISD::SELECT, MVT::i32, Legal);
240 setOperationAction(ISD::SELECT, MVT::i64, Legal);
242 setOperationAction(ISD::SETCC, MVT::i8, Legal);
243 setOperationAction(ISD::SETCC, MVT::i16, Legal);
244 setOperationAction(ISD::SETCC, MVT::i32, Legal);
245 setOperationAction(ISD::SETCC, MVT::i64, Legal);
247 // Custom lower i128 -> i64 truncates
248 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
250 // SPU has a legal FP -> signed INT instruction
251 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
252 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
253 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
254 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
256 // FDIV on SPU requires custom lowering
257 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
259 // SPU has [U|S]INT_TO_FP
260 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
265 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
266 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
267 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
269 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
270 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
271 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
272 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
274 // We cannot sextinreg(i1). Expand to shifts.
275 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
277 // Support label based line numbers.
278 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
279 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
281 // We want to legalize GlobalAddress and ConstantPool nodes into the
282 // appropriate instructions to materialize the address.
283 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
285 MVT VT = (MVT::SimpleValueType)sctype;
287 setOperationAction(ISD::GlobalAddress, VT, Custom);
288 setOperationAction(ISD::ConstantPool, VT, Custom);
289 setOperationAction(ISD::JumpTable, VT, Custom);
292 // RET must be custom lowered, to meet ABI requirements
293 setOperationAction(ISD::RET, MVT::Other, Custom);
295 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
296 setOperationAction(ISD::VASTART , MVT::Other, Custom);
298 // Use the default implementation.
299 setOperationAction(ISD::VAARG , MVT::Other, Expand);
300 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
301 setOperationAction(ISD::VAEND , MVT::Other, Expand);
302 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
303 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
304 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
305 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
307 // Cell SPU has instructions for converting between i64 and fp.
308 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
309 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
311 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
312 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
314 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
315 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
317 // First set operation action for all vector types to expand. Then we
318 // will selectively turn on ones that can be effectively codegen'd.
319 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
322 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
323 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
324 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
326 // "Odd size" vector classes that we're willing to support:
327 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
329 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
330 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
331 MVT VT = (MVT::SimpleValueType)i;
333 // add/sub are legal for all supported vector VT's.
334 setOperationAction(ISD::ADD , VT, Legal);
335 setOperationAction(ISD::SUB , VT, Legal);
336 // mul has to be custom lowered.
337 // TODO: v2i64 vector multiply
338 setOperationAction(ISD::MUL , VT, Legal);
340 setOperationAction(ISD::AND , VT, Legal);
341 setOperationAction(ISD::OR , VT, Legal);
342 setOperationAction(ISD::XOR , VT, Legal);
343 setOperationAction(ISD::LOAD , VT, Legal);
344 setOperationAction(ISD::SELECT, VT, Legal);
345 setOperationAction(ISD::STORE, VT, Legal);
347 // These operations need to be expanded:
348 setOperationAction(ISD::SDIV, VT, Expand);
349 setOperationAction(ISD::SREM, VT, Expand);
350 setOperationAction(ISD::UDIV, VT, Expand);
351 setOperationAction(ISD::UREM, VT, Expand);
353 // Custom lower build_vector, constant pool spills, insert and
354 // extract vector elements:
355 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
356 setOperationAction(ISD::ConstantPool, VT, Custom);
357 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
358 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
359 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
360 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
363 setOperationAction(ISD::AND, MVT::v16i8, Custom);
364 setOperationAction(ISD::OR, MVT::v16i8, Custom);
365 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
366 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
368 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
370 setShiftAmountType(MVT::i32);
371 setBooleanContents(ZeroOrNegativeOneBooleanContent);
373 setStackPointerRegisterToSaveRestore(SPU::R1);
375 // We have target-specific dag combine patterns for the following nodes:
376 setTargetDAGCombine(ISD::ADD);
377 setTargetDAGCombine(ISD::ZERO_EXTEND);
378 setTargetDAGCombine(ISD::SIGN_EXTEND);
379 setTargetDAGCombine(ISD::ANY_EXTEND);
381 computeRegisterProperties();
383 // Set pre-RA register scheduler default to BURR, which produces slightly
384 // better code than the default (could also be TDRR, but TargetLowering.h
385 // needs a mod to support that model):
386 setSchedulingPreference(SchedulingForRegPressure);
390 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
392 if (node_names.empty()) {
393 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
394 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
395 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
396 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
397 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
398 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
399 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
400 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
401 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
402 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
403 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
404 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
405 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
406 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
407 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
408 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
409 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
410 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
411 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
412 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
413 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
414 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
415 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
416 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
417 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
420 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
422 return ((i != node_names.end()) ? i->second : 0);
425 //===----------------------------------------------------------------------===//
426 // Return the Cell SPU's SETCC result type
427 //===----------------------------------------------------------------------===//
429 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
430 // i16 and i32 are valid SETCC result types
431 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
434 //===----------------------------------------------------------------------===//
435 // Calling convention code:
436 //===----------------------------------------------------------------------===//
438 #include "SPUGenCallingConv.inc"
440 //===----------------------------------------------------------------------===//
441 // LowerOperation implementation
442 //===----------------------------------------------------------------------===//
444 /// Custom lower loads for CellSPU
446 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
447 within a 16-byte block, we have to rotate to extract the requested element.
449 For extending loads, we also want to ensure that the following sequence is
450 emitted, e.g. for MVT::f32 extending load to MVT::f64:
454 %2 v16i8,ch = rotate %1
455 %3 v4f8, ch = bitconvert %2
456 %4 f32 = vec2perfslot %3
457 %5 f64 = fp_extend %4
461 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
462 LoadSDNode *LN = cast<LoadSDNode>(Op);
463 SDValue the_chain = LN->getChain();
464 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
465 MVT InVT = LN->getMemoryVT();
466 MVT OutVT = Op.getValueType();
467 ISD::LoadExtType ExtType = LN->getExtensionType();
468 unsigned alignment = LN->getAlignment();
469 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
471 switch (LN->getAddressingMode()) {
472 case ISD::UNINDEXED: {
474 SDValue basePtr = LN->getBasePtr();
477 if (alignment == 16) {
480 // Special cases for a known aligned load to simplify the base pointer
481 // and the rotation amount:
482 if (basePtr.getOpcode() == ISD::ADD
483 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
484 // Known offset into basePtr
485 int64_t offset = CN->getSExtValue();
486 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
491 rotate = DAG.getConstant(rotamt, MVT::i16);
493 // Simplify the base pointer for this case:
494 basePtr = basePtr.getOperand(0);
495 if ((offset & ~0xf) > 0) {
496 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
498 DAG.getConstant((offset & ~0xf), PtrVT));
500 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
501 || (basePtr.getOpcode() == SPUISD::IndirectAddr
502 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
503 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
504 // Plain aligned a-form address: rotate into preferred slot
505 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
506 int64_t rotamt = -vtm->prefslot_byte;
509 rotate = DAG.getConstant(rotamt, MVT::i16);
511 // Offset the rotate amount by the basePtr and the preferred slot
513 int64_t rotamt = -vtm->prefslot_byte;
516 rotate = DAG.getNode(ISD::ADD, PtrVT,
518 DAG.getConstant(rotamt, PtrVT));
521 // Unaligned load: must be more pessimistic about addressing modes:
522 if (basePtr.getOpcode() == ISD::ADD) {
523 MachineFunction &MF = DAG.getMachineFunction();
524 MachineRegisterInfo &RegInfo = MF.getRegInfo();
525 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
528 SDValue Op0 = basePtr.getOperand(0);
529 SDValue Op1 = basePtr.getOperand(1);
531 if (isa<ConstantSDNode>(Op1)) {
532 // Convert the (add <ptr>, <const>) to an indirect address contained
533 // in a register. Note that this is done because we need to avoid
534 // creating a 0(reg) d-form address due to the SPU's block loads.
535 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
536 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
537 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
539 // Convert the (add <arg1>, <arg2>) to an indirect address, which
540 // will likely be lowered as a reg(reg) x-form address.
541 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
544 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
546 DAG.getConstant(0, PtrVT));
549 // Offset the rotate amount by the basePtr and the preferred slot
551 rotate = DAG.getNode(ISD::ADD, PtrVT,
553 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
556 // Re-emit as a v16i8 vector load
557 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
558 LN->getSrcValue(), LN->getSrcValueOffset(),
559 LN->isVolatile(), 16);
562 the_chain = result.getValue(1);
564 // Rotate into the preferred slot:
565 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
566 result.getValue(0), rotate);
568 // Convert the loaded v16i8 vector to the appropriate vector type
569 // specified by the operand:
570 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
571 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
572 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
574 // Handle extending loads by extending the scalar result:
575 if (ExtType == ISD::SEXTLOAD) {
576 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
577 } else if (ExtType == ISD::ZEXTLOAD) {
578 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
579 } else if (ExtType == ISD::EXTLOAD) {
580 unsigned NewOpc = ISD::ANY_EXTEND;
582 if (OutVT.isFloatingPoint())
583 NewOpc = ISD::FP_EXTEND;
585 result = DAG.getNode(NewOpc, OutVT, result);
588 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
589 SDValue retops[2] = {
594 result = DAG.getNode(SPUISD::LDRESULT, retvts,
595 retops, sizeof(retops) / sizeof(retops[0]));
602 case ISD::LAST_INDEXED_MODE:
603 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
605 cerr << (unsigned) LN->getAddressingMode() << "\n";
613 /// Custom lower stores for CellSPU
615 All CellSPU stores are aligned to 16-byte boundaries, so for elements
616 within a 16-byte block, we have to generate a shuffle to insert the
617 requested element into its place, then store the resulting block.
620 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
621 StoreSDNode *SN = cast<StoreSDNode>(Op);
622 SDValue Value = SN->getValue();
623 MVT VT = Value.getValueType();
624 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
625 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
626 unsigned alignment = SN->getAlignment();
628 switch (SN->getAddressingMode()) {
629 case ISD::UNINDEXED: {
630 // The vector type we really want to load from the 16-byte chunk.
631 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
632 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
634 SDValue alignLoadVec;
635 SDValue basePtr = SN->getBasePtr();
636 SDValue the_chain = SN->getChain();
637 SDValue insertEltOffs;
639 if (alignment == 16) {
642 // Special cases for a known aligned load to simplify the base pointer
643 // and insertion byte:
644 if (basePtr.getOpcode() == ISD::ADD
645 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
646 // Known offset into basePtr
647 int64_t offset = CN->getSExtValue();
649 // Simplify the base pointer for this case:
650 basePtr = basePtr.getOperand(0);
651 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
653 DAG.getConstant((offset & 0xf), PtrVT));
655 if ((offset & ~0xf) > 0) {
656 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
658 DAG.getConstant((offset & ~0xf), PtrVT));
661 // Otherwise, assume it's at byte 0 of basePtr
662 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
664 DAG.getConstant(0, PtrVT));
667 // Unaligned load: must be more pessimistic about addressing modes:
668 if (basePtr.getOpcode() == ISD::ADD) {
669 MachineFunction &MF = DAG.getMachineFunction();
670 MachineRegisterInfo &RegInfo = MF.getRegInfo();
671 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
674 SDValue Op0 = basePtr.getOperand(0);
675 SDValue Op1 = basePtr.getOperand(1);
677 if (isa<ConstantSDNode>(Op1)) {
678 // Convert the (add <ptr>, <const>) to an indirect address contained
679 // in a register. Note that this is done because we need to avoid
680 // creating a 0(reg) d-form address due to the SPU's block loads.
681 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
682 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
683 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
685 // Convert the (add <arg1>, <arg2>) to an indirect address, which
686 // will likely be lowered as a reg(reg) x-form address.
687 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
690 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
692 DAG.getConstant(0, PtrVT));
695 // Insertion point is solely determined by basePtr's contents
696 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
698 DAG.getConstant(0, PtrVT));
701 // Re-emit as a v16i8 vector load
702 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
703 SN->getSrcValue(), SN->getSrcValueOffset(),
704 SN->isVolatile(), 16);
707 the_chain = alignLoadVec.getValue(1);
709 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
710 SDValue theValue = SN->getValue();
714 && (theValue.getOpcode() == ISD::AssertZext
715 || theValue.getOpcode() == ISD::AssertSext)) {
716 // Drill down and get the value for zero- and sign-extended
718 theValue = theValue.getOperand(0);
721 // If the base pointer is already a D-form address, then just create
722 // a new D-form address with a slot offset and the orignal base pointer.
723 // Otherwise generate a D-form address with the slot offset relative
724 // to the stack pointer, which is always aligned.
726 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
727 cerr << "CellSPU LowerSTORE: basePtr = ";
728 basePtr.getNode()->dump(&DAG);
733 SDValue insertEltOp =
734 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
735 SDValue vectorizeOp =
736 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
738 result = DAG.getNode(SPUISD::SHUFB, vecVT,
739 vectorizeOp, alignLoadVec,
740 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
742 result = DAG.getStore(the_chain, result, basePtr,
743 LN->getSrcValue(), LN->getSrcValueOffset(),
744 LN->isVolatile(), LN->getAlignment());
746 #if 0 && !defined(NDEBUG)
747 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
748 const SDValue ¤tRoot = DAG.getRoot();
751 cerr << "------- CellSPU:LowerStore result:\n";
754 DAG.setRoot(currentRoot);
765 case ISD::LAST_INDEXED_MODE:
766 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
768 cerr << (unsigned) SN->getAddressingMode() << "\n";
776 //! Generate the address of a constant pool entry.
778 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
779 MVT PtrVT = Op.getValueType();
780 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
781 Constant *C = CP->getConstVal();
782 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
783 SDValue Zero = DAG.getConstant(0, PtrVT);
784 const TargetMachine &TM = DAG.getTarget();
786 if (TM.getRelocationModel() == Reloc::Static) {
787 if (!ST->usingLargeMem()) {
788 // Just return the SDValue with the constant pool address in it.
789 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
791 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
792 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
793 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
798 "LowerConstantPool: Relocation model other than static"
803 //! Alternate entry point for generating the address of a constant pool entry
805 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
806 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
810 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT PtrVT = Op.getValueType();
812 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
813 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
814 SDValue Zero = DAG.getConstant(0, PtrVT);
815 const TargetMachine &TM = DAG.getTarget();
817 if (TM.getRelocationModel() == Reloc::Static) {
818 if (!ST->usingLargeMem()) {
819 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
821 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
822 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
823 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
828 "LowerJumpTable: Relocation model other than static not supported.");
833 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
834 MVT PtrVT = Op.getValueType();
835 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
836 GlobalValue *GV = GSDN->getGlobal();
837 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
838 const TargetMachine &TM = DAG.getTarget();
839 SDValue Zero = DAG.getConstant(0, PtrVT);
841 if (TM.getRelocationModel() == Reloc::Static) {
842 if (!ST->usingLargeMem()) {
843 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
845 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
846 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
847 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
850 cerr << "LowerGlobalAddress: Relocation model other than static not "
859 //! Custom lower i64 integer constants
861 This code inserts all of the necessary juggling that needs to occur to load
862 a 64-bit constant into a register.
865 LowerConstant(SDValue Op, SelectionDAG &DAG) {
866 MVT VT = Op.getValueType();
868 if (VT == MVT::i64) {
869 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
870 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
871 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
872 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
874 cerr << "LowerConstant: unhandled constant type "
884 //! Custom lower double precision floating point constants
886 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
887 MVT VT = Op.getValueType();
889 if (VT == MVT::f64) {
890 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
893 "LowerConstantFP: Node is not ConstantFPSDNode");
895 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
896 SDValue T = DAG.getConstant(dbits, MVT::i64);
897 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
898 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
899 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
906 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
908 MachineFunction &MF = DAG.getMachineFunction();
909 MachineFrameInfo *MFI = MF.getFrameInfo();
910 MachineRegisterInfo &RegInfo = MF.getRegInfo();
911 SmallVector<SDValue, 48> ArgValues;
912 SDValue Root = Op.getOperand(0);
913 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
915 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
916 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
918 unsigned ArgOffset = SPUFrameInfo::minStackSize();
919 unsigned ArgRegIdx = 0;
920 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
922 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
924 // Add DAG nodes to load the arguments or copy them out of registers.
925 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
926 ArgNo != e; ++ArgNo) {
927 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
928 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
931 if (ArgRegIdx < NumArgRegs) {
932 const TargetRegisterClass *ArgRegClass;
934 switch (ObjectVT.getSimpleVT()) {
936 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
937 << ObjectVT.getMVTString()
942 ArgRegClass = &SPU::R8CRegClass;
945 ArgRegClass = &SPU::R16CRegClass;
948 ArgRegClass = &SPU::R32CRegClass;
951 ArgRegClass = &SPU::R64CRegClass;
954 ArgRegClass = &SPU::GPRCRegClass;
957 ArgRegClass = &SPU::R32FPRegClass;
960 ArgRegClass = &SPU::R64FPRegClass;
968 ArgRegClass = &SPU::VECREGRegClass;
972 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
973 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
974 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
977 // We need to load the argument to a virtual register if we determined
978 // above that we ran out of physical registers of the appropriate type
979 // or we're forced to do vararg
980 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
981 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
982 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
983 ArgOffset += StackSlotSize;
986 ArgValues.push_back(ArgVal);
988 Root = ArgVal.getOperand(0);
993 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
994 // We will spill (79-3)+1 registers to the stack
995 SmallVector<SDValue, 79-3+1> MemOps;
997 // Create the frame slot
999 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1000 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1001 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1002 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1003 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1004 Root = Store.getOperand(0);
1005 MemOps.push_back(Store);
1007 // Increment address by stack slot size for the next stored argument
1008 ArgOffset += StackSlotSize;
1010 if (!MemOps.empty())
1011 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1014 ArgValues.push_back(Root);
1016 // Return the new list of results.
1017 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1018 &ArgValues[0], ArgValues.size());
1021 /// isLSAAddress - Return the immediate to use if the specified
1022 /// value is representable as a LSA address.
1023 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1024 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1027 int Addr = C->getZExtValue();
1028 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1029 (Addr << 14 >> 14) != Addr)
1030 return 0; // Top 14 bits have to be sext of immediate.
1032 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1036 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1037 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1038 SDValue Chain = TheCall->getChain();
1039 SDValue Callee = TheCall->getCallee();
1040 unsigned NumOps = TheCall->getNumArgs();
1041 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1042 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1043 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1045 // Handy pointer type
1046 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1048 // Accumulate how many bytes are to be pushed on the stack, including the
1049 // linkage area, and parameter passing area. According to the SPU ABI,
1050 // we minimally need space for [LR] and [SP]
1051 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1053 // Set up a copy of the stack pointer for use loading and storing any
1054 // arguments that may not fit in the registers available for argument
1056 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1058 // Figure out which arguments are going to go in registers, and which in
1060 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1061 unsigned ArgRegIdx = 0;
1063 // Keep track of registers passing arguments
1064 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1065 // And the arguments passed on the stack
1066 SmallVector<SDValue, 8> MemOpChains;
1068 for (unsigned i = 0; i != NumOps; ++i) {
1069 SDValue Arg = TheCall->getArg(i);
1071 // PtrOff will be used to store the current argument to the stack if a
1072 // register cannot be found for it.
1073 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1074 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1076 switch (Arg.getValueType().getSimpleVT()) {
1077 default: assert(0 && "Unexpected ValueType for argument!");
1083 if (ArgRegIdx != NumArgRegs) {
1084 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1086 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1087 ArgOffset += StackSlotSize;
1092 if (ArgRegIdx != NumArgRegs) {
1093 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1095 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1096 ArgOffset += StackSlotSize;
1105 if (ArgRegIdx != NumArgRegs) {
1106 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1108 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1109 ArgOffset += StackSlotSize;
1115 // Update number of stack bytes actually used, insert a call sequence start
1116 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1117 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1120 if (!MemOpChains.empty()) {
1121 // Adjust the stack pointer for the stack arguments.
1122 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1123 &MemOpChains[0], MemOpChains.size());
1126 // Build a sequence of copy-to-reg nodes chained together with token chain
1127 // and flag operands which copy the outgoing args into the appropriate regs.
1129 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1130 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1132 InFlag = Chain.getValue(1);
1135 SmallVector<SDValue, 8> Ops;
1136 unsigned CallOpc = SPUISD::CALL;
1138 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1139 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1140 // node so that legalize doesn't hack it.
1141 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1142 GlobalValue *GV = G->getGlobal();
1143 MVT CalleeVT = Callee.getValueType();
1144 SDValue Zero = DAG.getConstant(0, PtrVT);
1145 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1147 if (!ST->usingLargeMem()) {
1148 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1149 // style calls, otherwise, external symbols are BRASL calls. This assumes
1150 // that declared/defined symbols are in the same compilation unit and can
1151 // be reached through PC-relative jumps.
1154 // This may be an unsafe assumption for JIT and really large compilation
1156 if (GV->isDeclaration()) {
1157 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1159 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1162 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1164 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1166 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1167 MVT CalleeVT = Callee.getValueType();
1168 SDValue Zero = DAG.getConstant(0, PtrVT);
1169 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1170 Callee.getValueType());
1172 if (!ST->usingLargeMem()) {
1173 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1175 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1177 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1178 // If this is an absolute destination address that appears to be a legal
1179 // local store address, use the munged value.
1180 Callee = SDValue(Dest, 0);
1183 Ops.push_back(Chain);
1184 Ops.push_back(Callee);
1186 // Add argument registers to the end of the list so that they are known live
1188 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1189 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1190 RegsToPass[i].second.getValueType()));
1192 if (InFlag.getNode())
1193 Ops.push_back(InFlag);
1194 // Returns a chain and a flag for retval copy to use.
1195 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1196 &Ops[0], Ops.size());
1197 InFlag = Chain.getValue(1);
1199 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1200 DAG.getIntPtrConstant(0, true), InFlag);
1201 if (TheCall->getValueType(0) != MVT::Other)
1202 InFlag = Chain.getValue(1);
1204 SDValue ResultVals[3];
1205 unsigned NumResults = 0;
1207 // If the call has results, copy the values out of the ret val registers.
1208 switch (TheCall->getValueType(0).getSimpleVT()) {
1209 default: assert(0 && "Unexpected ret value!");
1210 case MVT::Other: break;
1212 if (TheCall->getValueType(1) == MVT::i32) {
1213 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1214 ResultVals[0] = Chain.getValue(0);
1215 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1216 Chain.getValue(2)).getValue(1);
1217 ResultVals[1] = Chain.getValue(0);
1220 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1221 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1227 ResultVals[0] = Chain.getValue(0);
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1237 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1238 InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1249 InFlag).getValue(1);
1250 ResultVals[0] = Chain.getValue(0);
1255 // If the function returns void, just return the chain.
1256 if (NumResults == 0)
1259 // Otherwise, merge everything together with a MERGE_VALUES node.
1260 ResultVals[NumResults++] = Chain;
1261 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1262 return Res.getValue(Op.getResNo());
1266 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1267 SmallVector<CCValAssign, 16> RVLocs;
1268 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1269 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1270 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1271 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1273 // If this is the first return lowered for this function, add the regs to the
1274 // liveout set for the function.
1275 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1276 for (unsigned i = 0; i != RVLocs.size(); ++i)
1277 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1280 SDValue Chain = Op.getOperand(0);
1283 // Copy the result values into the output registers.
1284 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1285 CCValAssign &VA = RVLocs[i];
1286 assert(VA.isRegLoc() && "Can only return in registers!");
1287 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1288 Flag = Chain.getValue(1);
1292 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1294 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1298 //===----------------------------------------------------------------------===//
1299 // Vector related lowering:
1300 //===----------------------------------------------------------------------===//
1302 static ConstantSDNode *
1303 getVecImm(SDNode *N) {
1304 SDValue OpVal(0, 0);
1306 // Check to see if this buildvec has a single non-undef value in its elements.
1307 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1308 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1309 if (OpVal.getNode() == 0)
1310 OpVal = N->getOperand(i);
1311 else if (OpVal != N->getOperand(i))
1315 if (OpVal.getNode() != 0) {
1316 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1321 return 0; // All UNDEF: use implicit def.; not Constant node
1324 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1325 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1327 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1329 if (ConstantSDNode *CN = getVecImm(N)) {
1330 uint64_t Value = CN->getZExtValue();
1331 if (ValueType == MVT::i64) {
1332 uint64_t UValue = CN->getZExtValue();
1333 uint32_t upper = uint32_t(UValue >> 32);
1334 uint32_t lower = uint32_t(UValue);
1337 Value = Value >> 32;
1339 if (Value <= 0x3ffff)
1340 return DAG.getTargetConstant(Value, ValueType);
1346 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1347 /// and the value fits into a signed 16-bit constant, and if so, return the
1349 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1351 if (ConstantSDNode *CN = getVecImm(N)) {
1352 int64_t Value = CN->getSExtValue();
1353 if (ValueType == MVT::i64) {
1354 uint64_t UValue = CN->getZExtValue();
1355 uint32_t upper = uint32_t(UValue >> 32);
1356 uint32_t lower = uint32_t(UValue);
1359 Value = Value >> 32;
1361 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1362 return DAG.getTargetConstant(Value, ValueType);
1369 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1370 /// and the value fits into a signed 10-bit constant, and if so, return the
1372 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1374 if (ConstantSDNode *CN = getVecImm(N)) {
1375 int64_t Value = CN->getSExtValue();
1376 if (ValueType == MVT::i64) {
1377 uint64_t UValue = CN->getZExtValue();
1378 uint32_t upper = uint32_t(UValue >> 32);
1379 uint32_t lower = uint32_t(UValue);
1382 Value = Value >> 32;
1384 if (isS10Constant(Value))
1385 return DAG.getTargetConstant(Value, ValueType);
1391 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1392 /// and the value fits into a signed 8-bit constant, and if so, return the
1395 /// @note: The incoming vector is v16i8 because that's the only way we can load
1396 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1398 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1400 if (ConstantSDNode *CN = getVecImm(N)) {
1401 int Value = (int) CN->getZExtValue();
1402 if (ValueType == MVT::i16
1403 && Value <= 0xffff /* truncated from uint64_t */
1404 && ((short) Value >> 8) == ((short) Value & 0xff))
1405 return DAG.getTargetConstant(Value & 0xff, ValueType);
1406 else if (ValueType == MVT::i8
1407 && (Value & 0xff) == Value)
1408 return DAG.getTargetConstant(Value, ValueType);
1414 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into a signed 16-bit constant, and if so, return the
1417 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1419 if (ConstantSDNode *CN = getVecImm(N)) {
1420 uint64_t Value = CN->getZExtValue();
1421 if ((ValueType == MVT::i32
1422 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1423 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1424 return DAG.getTargetConstant(Value >> 16, ValueType);
1430 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1431 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1432 if (ConstantSDNode *CN = getVecImm(N)) {
1433 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1439 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1440 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1441 if (ConstantSDNode *CN = getVecImm(N)) {
1442 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1448 // If this is a vector of constants or undefs, get the bits. A bit in
1449 // UndefBits is set if the corresponding element of the vector is an
1450 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1451 // zero. Return true if this is not an array of constants, false if it is.
1453 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1454 uint64_t UndefBits[2]) {
1455 // Start with zero'd results.
1456 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1458 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1459 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1460 SDValue OpVal = BV->getOperand(i);
1462 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1463 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1465 uint64_t EltBits = 0;
1466 if (OpVal.getOpcode() == ISD::UNDEF) {
1467 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1468 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1470 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1471 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1472 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1473 const APFloat &apf = CN->getValueAPF();
1474 EltBits = (CN->getValueType(0) == MVT::f32
1475 ? FloatToBits(apf.convertToFloat())
1476 : DoubleToBits(apf.convertToDouble()));
1478 // Nonconstant element.
1482 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1485 //printf("%llx %llx %llx %llx\n",
1486 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1490 /// If this is a splat (repetition) of a value across the whole vector, return
1491 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1492 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1493 /// SplatSize = 1 byte.
1494 static bool isConstantSplat(const uint64_t Bits128[2],
1495 const uint64_t Undef128[2],
1497 uint64_t &SplatBits, uint64_t &SplatUndef,
1499 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1500 // the same as the lower 64-bits, ignoring undefs.
1501 uint64_t Bits64 = Bits128[0] | Bits128[1];
1502 uint64_t Undef64 = Undef128[0] & Undef128[1];
1503 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1504 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1505 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1506 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1508 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1509 if (MinSplatBits < 64) {
1511 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1513 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1514 if (MinSplatBits < 32) {
1516 // If the top 16-bits are different than the lower 16-bits, ignoring
1517 // undefs, we have an i32 splat.
1518 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1519 if (MinSplatBits < 16) {
1520 // If the top 8-bits are different than the lower 8-bits, ignoring
1521 // undefs, we have an i16 splat.
1522 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1523 == ((Bits16 >> 8) & ~Undef16)) {
1524 // Otherwise, we have an 8-bit splat.
1525 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1526 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1532 SplatUndef = Undef16;
1539 SplatUndef = Undef32;
1545 SplatBits = Bits128[0];
1546 SplatUndef = Undef128[0];
1552 return false; // Can't be a splat if two pieces don't match.
1555 // If this is a case we can't handle, return null and let the default
1556 // expansion code take care of it. If we CAN select this case, and if it
1557 // selects to a single instruction, return Op. Otherwise, if we can codegen
1558 // this case more efficiently than a constant pool load, lower it to the
1559 // sequence of ops that should be used.
1560 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561 MVT VT = Op.getValueType();
1562 // If this is a vector of constants or undefs, get the bits. A bit in
1563 // UndefBits is set if the corresponding element of the vector is an
1564 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1566 uint64_t VectorBits[2];
1567 uint64_t UndefBits[2];
1568 uint64_t SplatBits, SplatUndef;
1570 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1571 || !isConstantSplat(VectorBits, UndefBits,
1572 VT.getVectorElementType().getSizeInBits(),
1573 SplatBits, SplatUndef, SplatSize))
1574 return SDValue(); // Not a constant vector, not a splat.
1576 switch (VT.getSimpleVT()) {
1579 uint32_t Value32 = SplatBits;
1580 assert(SplatSize == 4
1581 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1582 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1583 SDValue T = DAG.getConstant(Value32, MVT::i32);
1584 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1585 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1589 uint64_t f64val = SplatBits;
1590 assert(SplatSize == 8
1591 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1592 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593 SDValue T = DAG.getConstant(f64val, MVT::i64);
1594 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1595 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1599 // 8-bit constants have to be expanded to 16-bits
1600 unsigned short Value16 = SplatBits | (SplatBits << 8);
1602 for (int i = 0; i < 8; ++i)
1603 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1604 return DAG.getNode(ISD::BIT_CONVERT, VT,
1605 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1608 unsigned short Value16;
1610 Value16 = (unsigned short) (SplatBits & 0xffff);
1612 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1613 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1615 for (int i = 0; i < 8; ++i) Ops[i] = T;
1616 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1619 unsigned int Value = SplatBits;
1620 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1621 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1624 unsigned int Value = SplatBits;
1625 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1626 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1629 uint64_t val = SplatBits;
1630 uint32_t upper = uint32_t(val >> 32);
1631 uint32_t lower = uint32_t(val);
1633 if (upper == lower) {
1634 // Magic constant that can be matched by IL, ILA, et. al.
1635 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1636 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1640 SmallVector<SDValue, 16> ShufBytes;
1642 bool upper_special, lower_special;
1644 // NOTE: This code creates common-case shuffle masks that can be easily
1645 // detected as common expressions. It is not attempting to create highly
1646 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1648 // Detect if the upper or lower half is a special shuffle mask pattern:
1649 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1650 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1652 // Create lower vector if not a special pattern
1653 if (!lower_special) {
1654 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1655 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1656 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1657 LO32C, LO32C, LO32C, LO32C));
1660 // Create upper vector if not a special pattern
1661 if (!upper_special) {
1662 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1663 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665 HI32C, HI32C, HI32C, HI32C));
1668 // If either upper or lower are special, then the two input operands are
1669 // the same (basically, one of them is a "don't care")
1674 if (lower_special && upper_special) {
1675 // Unhappy situation... both upper and lower are special, so punt with
1676 // a target constant:
1677 SDValue Zero = DAG.getConstant(0, MVT::i32);
1678 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1682 for (int i = 0; i < 4; ++i) {
1684 for (int j = 0; j < 4; ++j) {
1686 bool process_upper, process_lower;
1688 process_upper = (upper_special && (i & 1) == 0);
1689 process_lower = (lower_special && (i & 1) == 1);
1691 if (process_upper || process_lower) {
1692 if ((process_upper && upper == 0)
1693 || (process_lower && lower == 0))
1695 else if ((process_upper && upper == 0xffffffff)
1696 || (process_lower && lower == 0xffffffff))
1698 else if ((process_upper && upper == 0x80000000)
1699 || (process_lower && lower == 0x80000000))
1700 val |= (j == 0 ? 0xe0 : 0x80);
1702 val |= i * 4 + j + ((i & 1) * 16);
1705 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1708 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1709 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1710 &ShufBytes[0], ShufBytes.size()));
1718 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1719 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1720 /// permutation vector, V3, is monotonically increasing with one "exception"
1721 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1722 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1723 /// In either case, the net result is going to eventually invoke SHUFB to
1724 /// permute/shuffle the bytes from V1 and V2.
1726 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1727 /// control word for byte/halfword/word insertion. This takes care of a single
1728 /// element move from V2 into V1.
1730 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1731 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1732 SDValue V1 = Op.getOperand(0);
1733 SDValue V2 = Op.getOperand(1);
1734 SDValue PermMask = Op.getOperand(2);
1736 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1738 // If we have a single element being moved from V1 to V2, this can be handled
1739 // using the C*[DX] compute mask instructions, but the vector elements have
1740 // to be monotonically increasing with one exception element.
1741 MVT VecVT = V1.getValueType();
1742 MVT EltVT = VecVT.getVectorElementType();
1743 unsigned EltsFromV2 = 0;
1745 unsigned V2EltIdx0 = 0;
1746 unsigned CurrElt = 0;
1747 unsigned MaxElts = VecVT.getVectorNumElements();
1748 unsigned PrevElt = 0;
1750 bool monotonic = true;
1753 if (EltVT == MVT::i8) {
1755 } else if (EltVT == MVT::i16) {
1757 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1759 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1762 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1764 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1765 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1766 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1769 if (SrcElt >= V2EltIdx0) {
1770 if (1 >= (++EltsFromV2)) {
1771 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1773 } else if (CurrElt != SrcElt) {
1781 if (PrevElt > 0 && SrcElt < MaxElts) {
1782 if ((PrevElt == SrcElt - 1)
1783 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1790 } else if (PrevElt == 0) {
1791 // First time through, need to keep track of previous element
1794 // This isn't a rotation, takes elements from vector 2
1801 if (EltsFromV2 == 1 && monotonic) {
1802 // Compute mask and shuffle
1803 MachineFunction &MF = DAG.getMachineFunction();
1804 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1805 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1806 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1807 // Initialize temporary register to 0
1808 SDValue InitTempReg =
1809 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1810 // Copy register's contents as index in SHUFFLE_MASK:
1811 SDValue ShufMaskOp =
1812 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1813 DAG.getTargetConstant(V2Elt, MVT::i32),
1814 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1815 // Use shuffle mask in SHUFB synthetic instruction:
1816 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1817 } else if (rotate) {
1818 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1820 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1821 V1, DAG.getConstant(rotamt, MVT::i16));
1823 // Convert the SHUFFLE_VECTOR mask's input element units to the
1825 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1827 SmallVector<SDValue, 16> ResultMask;
1828 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1830 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1833 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1835 for (unsigned j = 0; j < BytesPerElement; ++j) {
1836 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1841 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1842 &ResultMask[0], ResultMask.size());
1843 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1847 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1848 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1850 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1851 // For a constant, build the appropriate constant vector, which will
1852 // eventually simplify to a vector register load.
1854 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1855 SmallVector<SDValue, 16> ConstVecValues;
1859 // Create a constant vector:
1860 switch (Op.getValueType().getSimpleVT()) {
1861 default: assert(0 && "Unexpected constant value type in "
1862 "LowerSCALAR_TO_VECTOR");
1863 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1864 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1865 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1866 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1867 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1868 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1871 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1872 for (size_t j = 0; j < n_copies; ++j)
1873 ConstVecValues.push_back(CValue);
1875 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1876 &ConstVecValues[0], ConstVecValues.size());
1878 // Otherwise, copy the value from one register to another:
1879 switch (Op0.getValueType().getSimpleVT()) {
1880 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1887 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1894 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1895 MVT VT = Op.getValueType();
1896 SDValue N = Op.getOperand(0);
1897 SDValue Elt = Op.getOperand(1);
1900 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1901 // Constant argument:
1902 int EltNo = (int) C->getZExtValue();
1905 if (VT == MVT::i8 && EltNo >= 16)
1906 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1907 else if (VT == MVT::i16 && EltNo >= 8)
1908 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1909 else if (VT == MVT::i32 && EltNo >= 4)
1910 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1911 else if (VT == MVT::i64 && EltNo >= 2)
1912 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1914 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1915 // i32 and i64: Element 0 is the preferred slot
1916 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1919 // Need to generate shuffle mask and extract:
1920 int prefslot_begin = -1, prefslot_end = -1;
1921 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1923 switch (VT.getSimpleVT()) {
1925 assert(false && "Invalid value type!");
1927 prefslot_begin = prefslot_end = 3;
1931 prefslot_begin = 2; prefslot_end = 3;
1936 prefslot_begin = 0; prefslot_end = 3;
1941 prefslot_begin = 0; prefslot_end = 7;
1946 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1947 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1949 unsigned int ShufBytes[16];
1950 for (int i = 0; i < 16; ++i) {
1951 // zero fill uppper part of preferred slot, don't care about the
1953 unsigned int mask_val;
1954 if (i <= prefslot_end) {
1956 ((i < prefslot_begin)
1958 : elt_byte + (i - prefslot_begin));
1960 ShufBytes[i] = mask_val;
1962 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1965 SDValue ShufMask[4];
1966 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1967 unsigned bidx = i * 4;
1968 unsigned int bits = ((ShufBytes[bidx] << 24) |
1969 (ShufBytes[bidx+1] << 16) |
1970 (ShufBytes[bidx+2] << 8) |
1972 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1975 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1977 sizeof(ShufMask) / sizeof(ShufMask[0]));
1979 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1980 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1981 N, N, ShufMaskVec));
1983 // Variable index: Rotate the requested element into slot 0, then replicate
1984 // slot 0 across the vector
1985 MVT VecVT = N.getValueType();
1986 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1987 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1991 // Make life easier by making sure the index is zero-extended to i32
1992 if (Elt.getValueType() != MVT::i32)
1993 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
1995 // Scale the index to a bit/byte shift quantity
1997 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1998 unsigned scaleShift = scaleFactor.logBase2();
2001 if (scaleShift > 0) {
2002 // Scale the shift factor:
2003 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2004 DAG.getConstant(scaleShift, MVT::i32));
2007 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2009 // Replicate the bytes starting at byte 0 across the entire vector (for
2010 // consistency with the notion of a unified register set)
2013 switch (VT.getSimpleVT()) {
2015 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2019 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2020 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2025 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2026 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2032 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2033 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2039 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2040 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2041 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2042 loFactor, hiFactor);
2047 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2048 DAG.getNode(SPUISD::SHUFB, VecVT,
2049 vecShift, vecShift, replicate));
2055 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2056 SDValue VecOp = Op.getOperand(0);
2057 SDValue ValOp = Op.getOperand(1);
2058 SDValue IdxOp = Op.getOperand(2);
2059 MVT VT = Op.getValueType();
2061 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2062 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2064 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2065 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2066 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2067 DAG.getRegister(SPU::R1, PtrVT),
2068 DAG.getConstant(CN->getSExtValue(), PtrVT));
2069 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2072 DAG.getNode(SPUISD::SHUFB, VT,
2073 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2075 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2080 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2081 const TargetLowering &TLI)
2083 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2084 MVT ShiftVT = TLI.getShiftAmountTy();
2086 assert(Op.getValueType() == MVT::i8);
2089 assert(0 && "Unhandled i8 math operator");
2093 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2095 SDValue N1 = Op.getOperand(1);
2096 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2097 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2098 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2099 DAG.getNode(Opc, MVT::i16, N0, N1));
2104 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2106 SDValue N1 = Op.getOperand(1);
2107 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2108 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2109 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2110 DAG.getNode(Opc, MVT::i16, N0, N1));
2114 SDValue N1 = Op.getOperand(1);
2116 N0 = (N0.getOpcode() != ISD::Constant
2117 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2118 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2120 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2123 N1 = (N1.getOpcode() != ISD::Constant
2124 ? DAG.getNode(N1Opc, ShiftVT, N1)
2125 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2126 TLI.getShiftAmountTy()));
2128 DAG.getNode(ISD::OR, MVT::i16, N0,
2129 DAG.getNode(ISD::SHL, MVT::i16,
2130 N0, DAG.getConstant(8, MVT::i32)));
2131 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2136 SDValue N1 = Op.getOperand(1);
2138 N0 = (N0.getOpcode() != ISD::Constant
2139 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2140 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2142 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2145 N1 = (N1.getOpcode() != ISD::Constant
2146 ? DAG.getNode(N1Opc, ShiftVT, N1)
2147 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2148 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2149 DAG.getNode(Opc, MVT::i16, N0, N1));
2152 SDValue N1 = Op.getOperand(1);
2154 N0 = (N0.getOpcode() != ISD::Constant
2155 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2156 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2158 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2161 N1 = (N1.getOpcode() != ISD::Constant
2162 ? DAG.getNode(N1Opc, ShiftVT, N1)
2163 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2165 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2166 DAG.getNode(Opc, MVT::i16, N0, N1));
2169 SDValue N1 = Op.getOperand(1);
2171 N0 = (N0.getOpcode() != ISD::Constant
2172 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2173 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2175 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2176 N1 = (N1.getOpcode() != ISD::Constant
2177 ? DAG.getNode(N1Opc, MVT::i16, N1)
2178 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2180 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2181 DAG.getNode(Opc, MVT::i16, N0, N1));
2189 //! Generate the carry-generate shuffle mask.
2190 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2191 SmallVector<SDValue, 16> ShufBytes;
2193 // Create the shuffle mask for "rotating" the borrow up one register slot
2194 // once the borrow is generated.
2195 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2196 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2197 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2198 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2200 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2201 &ShufBytes[0], ShufBytes.size());
2204 //! Generate the borrow-generate shuffle mask
2205 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2206 SmallVector<SDValue, 16> ShufBytes;
2208 // Create the shuffle mask for "rotating" the borrow up one register slot
2209 // once the borrow is generated.
2210 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2211 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2212 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2213 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2215 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2216 &ShufBytes[0], ShufBytes.size());
2219 //! Lower byte immediate operations for v16i8 vectors:
2221 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2224 MVT VT = Op.getValueType();
2226 ConstVec = Op.getOperand(0);
2227 Arg = Op.getOperand(1);
2228 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2229 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2230 ConstVec = ConstVec.getOperand(0);
2232 ConstVec = Op.getOperand(1);
2233 Arg = Op.getOperand(0);
2234 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2235 ConstVec = ConstVec.getOperand(0);
2240 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2241 uint64_t VectorBits[2];
2242 uint64_t UndefBits[2];
2243 uint64_t SplatBits, SplatUndef;
2246 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2247 && isConstantSplat(VectorBits, UndefBits,
2248 VT.getVectorElementType().getSizeInBits(),
2249 SplatBits, SplatUndef, SplatSize)) {
2251 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2252 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2254 // Turn the BUILD_VECTOR into a set of target constants:
2255 for (size_t i = 0; i < tcVecSize; ++i)
2258 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2259 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2262 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2263 // lowered. Return the operation, rather than a null SDValue.
2267 //! Custom lowering for CTPOP (count population)
2269 Custom lowering code that counts the number ones in the input
2270 operand. SPU has such an instruction, but it counts the number of
2271 ones per byte, which then have to be accumulated.
2273 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2274 MVT VT = Op.getValueType();
2275 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2277 switch (VT.getSimpleVT()) {
2279 assert(false && "Invalid value type!");
2281 SDValue N = Op.getOperand(0);
2282 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2284 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2285 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2287 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2291 MachineFunction &MF = DAG.getMachineFunction();
2292 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2294 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2296 SDValue N = Op.getOperand(0);
2297 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2298 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2299 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2301 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2302 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2304 // CNTB_result becomes the chain to which all of the virtual registers
2305 // CNTB_reg, SUM1_reg become associated:
2306 SDValue CNTB_result =
2307 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2309 SDValue CNTB_rescopy =
2310 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2312 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2314 return DAG.getNode(ISD::AND, MVT::i16,
2315 DAG.getNode(ISD::ADD, MVT::i16,
2316 DAG.getNode(ISD::SRL, MVT::i16,
2323 MachineFunction &MF = DAG.getMachineFunction();
2324 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2326 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2327 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2329 SDValue N = Op.getOperand(0);
2330 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2331 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2332 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2333 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2335 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2336 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2338 // CNTB_result becomes the chain to which all of the virtual registers
2339 // CNTB_reg, SUM1_reg become associated:
2340 SDValue CNTB_result =
2341 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2343 SDValue CNTB_rescopy =
2344 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2347 DAG.getNode(ISD::SRL, MVT::i32,
2348 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2351 DAG.getNode(ISD::ADD, MVT::i32,
2352 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2354 SDValue Sum1_rescopy =
2355 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2358 DAG.getNode(ISD::SRL, MVT::i32,
2359 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2362 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2363 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2365 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2375 //! Lower ISD::SELECT_CC
2377 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2380 \note Need to revisit this in the future: if the code path through the true
2381 and false value computations is longer than the latency of a branch (6
2382 cycles), then it would be more advantageous to branch and insert a new basic
2383 block and branch on the condition. However, this code does not make that
2384 assumption, given the simplisitc uses so far.
2387 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2388 const TargetLowering &TLI) {
2389 MVT VT = Op.getValueType();
2390 SDValue lhs = Op.getOperand(0);
2391 SDValue rhs = Op.getOperand(1);
2392 SDValue trueval = Op.getOperand(2);
2393 SDValue falseval = Op.getOperand(3);
2394 SDValue condition = Op.getOperand(4);
2396 // NOTE: SELB's arguments: $rA, $rB, $mask
2398 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2399 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2400 // condition was true and 0s where the condition was false. Hence, the
2401 // arguments to SELB get reversed.
2403 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2404 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2405 // with another "cannot select select_cc" assert:
2407 SDValue compare = DAG.getNode(ISD::SETCC,
2408 TLI.getSetCCResultType(Op.getValueType()),
2409 lhs, rhs, condition);
2410 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2413 //! Custom lower ISD::TRUNCATE
2414 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2416 MVT VT = Op.getValueType();
2417 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2418 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2420 SDValue Op0 = Op.getOperand(0);
2421 MVT Op0VT = Op0.getValueType();
2422 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2424 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2425 // Create shuffle mask, least significant doubleword of quadword
2426 unsigned maskHigh = 0x08090a0b;
2427 unsigned maskLow = 0x0c0d0e0f;
2428 // Use a shuffle to perform the truncation
2429 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2430 DAG.getConstant(maskHigh, MVT::i32),
2431 DAG.getConstant(maskLow, MVT::i32),
2432 DAG.getConstant(maskHigh, MVT::i32),
2433 DAG.getConstant(maskLow, MVT::i32));
2436 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2438 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2439 PromoteScalar, PromoteScalar, shufMask);
2441 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2442 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2445 return SDValue(); // Leave the truncate unmolested
2448 //! Custom (target-specific) lowering entry point
2450 This is where LLVM's DAG selection process calls to do target-specific
2454 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2456 unsigned Opc = (unsigned) Op.getOpcode();
2457 MVT VT = Op.getValueType();
2461 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2462 cerr << "Op.getOpcode() = " << Opc << "\n";
2463 cerr << "*Op.getNode():\n";
2464 Op.getNode()->dump();
2471 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2473 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2474 case ISD::ConstantPool:
2475 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2476 case ISD::GlobalAddress:
2477 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2478 case ISD::JumpTable:
2479 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2481 return LowerConstant(Op, DAG);
2482 case ISD::ConstantFP:
2483 return LowerConstantFP(Op, DAG);
2484 case ISD::FORMAL_ARGUMENTS:
2485 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2487 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2489 return LowerRET(Op, DAG, getTargetMachine());
2491 // i8, i64 math ops:
2500 return LowerI8Math(Op, DAG, Opc, *this);
2504 // Vector-related lowering.
2505 case ISD::BUILD_VECTOR:
2506 return LowerBUILD_VECTOR(Op, DAG);
2507 case ISD::SCALAR_TO_VECTOR:
2508 return LowerSCALAR_TO_VECTOR(Op, DAG);
2509 case ISD::VECTOR_SHUFFLE:
2510 return LowerVECTOR_SHUFFLE(Op, DAG);
2511 case ISD::EXTRACT_VECTOR_ELT:
2512 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2513 case ISD::INSERT_VECTOR_ELT:
2514 return LowerINSERT_VECTOR_ELT(Op, DAG);
2516 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2520 return LowerByteImmed(Op, DAG);
2522 // Vector and i8 multiply:
2525 return LowerI8Math(Op, DAG, Opc, *this);
2528 return LowerCTPOP(Op, DAG);
2530 case ISD::SELECT_CC:
2531 return LowerSELECT_CC(Op, DAG, *this);
2534 return LowerTRUNCATE(Op, DAG);
2540 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2541 SmallVectorImpl<SDValue>&Results,
2545 unsigned Opc = (unsigned) N->getOpcode();
2546 MVT OpVT = N->getValueType(0);
2550 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2551 cerr << "Op.getOpcode() = " << Opc << "\n";
2552 cerr << "*Op.getNode():\n";
2560 /* Otherwise, return unchanged */
2563 //===----------------------------------------------------------------------===//
2564 // Target Optimization Hooks
2565 //===----------------------------------------------------------------------===//
2568 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2571 TargetMachine &TM = getTargetMachine();
2573 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2574 SelectionDAG &DAG = DCI.DAG;
2575 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2576 MVT NodeVT = N->getValueType(0); // The node's value type
2577 MVT Op0VT = Op0.getValueType(); // The first operand's result
2578 SDValue Result; // Initially, empty result
2580 switch (N->getOpcode()) {
2583 SDValue Op1 = N->getOperand(1);
2585 if (Op0.getOpcode() == SPUISD::IndirectAddr
2586 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2587 // Normalize the operands to reduce repeated code
2588 SDValue IndirectArg = Op0, AddArg = Op1;
2590 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2595 if (isa<ConstantSDNode>(AddArg)) {
2596 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2597 SDValue IndOp1 = IndirectArg.getOperand(1);
2599 if (CN0->isNullValue()) {
2600 // (add (SPUindirect <arg>, <arg>), 0) ->
2601 // (SPUindirect <arg>, <arg>)
2603 #if !defined(NDEBUG)
2604 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2606 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2607 << "With: (SPUindirect <arg>, <arg>)\n";
2612 } else if (isa<ConstantSDNode>(IndOp1)) {
2613 // (add (SPUindirect <arg>, <const>), <const>) ->
2614 // (SPUindirect <arg>, <const + const>)
2615 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2616 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2617 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2619 #if !defined(NDEBUG)
2620 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2622 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2623 << "), " << CN0->getSExtValue() << ")\n"
2624 << "With: (SPUindirect <arg>, "
2625 << combinedConst << ")\n";
2629 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2630 IndirectArg, combinedValue);
2636 case ISD::SIGN_EXTEND:
2637 case ISD::ZERO_EXTEND:
2638 case ISD::ANY_EXTEND: {
2639 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2640 // (any_extend (SPUextract_elt0 <arg>)) ->
2641 // (SPUextract_elt0 <arg>)
2642 // Types must match, however...
2643 #if !defined(NDEBUG)
2644 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2645 cerr << "\nReplace: ";
2648 Op0.getNode()->dump(&DAG);
2657 case SPUISD::IndirectAddr: {
2658 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2659 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2660 if (CN->getZExtValue() == 0) {
2661 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2662 // (SPUaform <addr>, 0)
2664 DEBUG(cerr << "Replace: ");
2665 DEBUG(N->dump(&DAG));
2666 DEBUG(cerr << "\nWith: ");
2667 DEBUG(Op0.getNode()->dump(&DAG));
2668 DEBUG(cerr << "\n");
2672 } else if (Op0.getOpcode() == ISD::ADD) {
2673 SDValue Op1 = N->getOperand(1);
2674 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2675 // (SPUindirect (add <arg>, <arg>), 0) ->
2676 // (SPUindirect <arg>, <arg>)
2677 if (CN1->isNullValue()) {
2679 #if !defined(NDEBUG)
2680 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2682 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2683 << "With: (SPUindirect <arg>, <arg>)\n";
2687 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2688 Op0.getOperand(0), Op0.getOperand(1));
2694 case SPUISD::SHLQUAD_L_BITS:
2695 case SPUISD::SHLQUAD_L_BYTES:
2696 case SPUISD::VEC_SHL:
2697 case SPUISD::VEC_SRL:
2698 case SPUISD::VEC_SRA:
2699 case SPUISD::ROTBYTES_LEFT: {
2700 SDValue Op1 = N->getOperand(1);
2702 // Kill degenerate vector shifts:
2703 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2704 if (CN->isNullValue()) {
2710 case SPUISD::PREFSLOT2VEC: {
2711 switch (Op0.getOpcode()) {
2714 case ISD::ANY_EXTEND:
2715 case ISD::ZERO_EXTEND:
2716 case ISD::SIGN_EXTEND: {
2717 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2719 // but only if the SPUprefslot2vec and <arg> types match.
2720 SDValue Op00 = Op0.getOperand(0);
2721 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2722 SDValue Op000 = Op00.getOperand(0);
2723 if (Op000.getValueType() == NodeVT) {
2729 case SPUISD::VEC2PREFSLOT: {
2730 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2732 Result = Op0.getOperand(0);
2740 // Otherwise, return unchanged.
2742 if (Result.getNode()) {
2743 DEBUG(cerr << "\nReplace.SPU: ");
2744 DEBUG(N->dump(&DAG));
2745 DEBUG(cerr << "\nWith: ");
2746 DEBUG(Result.getNode()->dump(&DAG));
2747 DEBUG(cerr << "\n");
2754 //===----------------------------------------------------------------------===//
2755 // Inline Assembly Support
2756 //===----------------------------------------------------------------------===//
2758 /// getConstraintType - Given a constraint letter, return the type of
2759 /// constraint it is for this target.
2760 SPUTargetLowering::ConstraintType
2761 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2762 if (ConstraintLetter.size() == 1) {
2763 switch (ConstraintLetter[0]) {
2770 return C_RegisterClass;
2773 return TargetLowering::getConstraintType(ConstraintLetter);
2776 std::pair<unsigned, const TargetRegisterClass*>
2777 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2780 if (Constraint.size() == 1) {
2781 // GCC RS6000 Constraint Letters
2782 switch (Constraint[0]) {
2786 return std::make_pair(0U, SPU::R64CRegisterClass);
2787 return std::make_pair(0U, SPU::R32CRegisterClass);
2790 return std::make_pair(0U, SPU::R32FPRegisterClass);
2791 else if (VT == MVT::f64)
2792 return std::make_pair(0U, SPU::R64FPRegisterClass);
2795 return std::make_pair(0U, SPU::GPRCRegisterClass);
2799 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2802 //! Compute used/known bits for a SPU operand
2804 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2808 const SelectionDAG &DAG,
2809 unsigned Depth ) const {
2811 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2814 switch (Op.getOpcode()) {
2816 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2826 case SPUISD::PREFSLOT2VEC: {
2827 SDValue Op0 = Op.getOperand(0);
2828 MVT Op0VT = Op0.getValueType();
2829 unsigned Op0VTBits = Op0VT.getSizeInBits();
2830 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2831 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2832 KnownOne |= APInt(Op0VTBits, InMask, false);
2836 case SPUISD::LDRESULT:
2837 case SPUISD::VEC2PREFSLOT: {
2838 MVT OpVT = Op.getValueType();
2839 unsigned OpVTBits = OpVT.getSizeInBits();
2840 uint64_t InMask = OpVT.getIntegerVTBitMask();
2841 KnownZero |= APInt(OpVTBits, ~InMask, false);
2842 KnownOne |= APInt(OpVTBits, InMask, false);
2847 case SPUISD::SHLQUAD_L_BITS:
2848 case SPUISD::SHLQUAD_L_BYTES:
2849 case SPUISD::VEC_SHL:
2850 case SPUISD::VEC_SRL:
2851 case SPUISD::VEC_SRA:
2852 case SPUISD::VEC_ROTL:
2853 case SPUISD::VEC_ROTR:
2854 case SPUISD::ROTBYTES_LEFT:
2855 case SPUISD::SELECT_MASK:
2862 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2863 unsigned Depth) const {
2864 switch (Op.getOpcode()) {
2869 MVT VT = Op.getValueType();
2871 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2874 return VT.getSizeInBits();
2879 // LowerAsmOperandForConstraint
2881 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2882 char ConstraintLetter,
2884 std::vector<SDValue> &Ops,
2885 SelectionDAG &DAG) const {
2886 // Default, for the time being, to the base class handler
2887 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2891 /// isLegalAddressImmediate - Return true if the integer value can be used
2892 /// as the offset of the target addressing mode.
2893 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2894 const Type *Ty) const {
2895 // SPU's addresses are 256K:
2896 return (V > -(1 << 18) && V < (1 << 18) - 1);
2899 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2904 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2905 // The SPU target isn't yet aware of offsets.