2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 DebugLoc dl = Op.getDebugLoc();
512 switch (LN->getAddressingMode()) {
513 case ISD::UNINDEXED: {
515 SDValue basePtr = LN->getBasePtr();
518 if (alignment == 16) {
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr.getOpcode() == ISD::ADD
524 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset = CN->getSExtValue();
527 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
532 rotate = DAG.getConstant(rotamt, MVT::i16);
534 // Simplify the base pointer for this case:
535 basePtr = basePtr.getOperand(0);
536 if ((offset & ~0xf) > 0) {
537 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
539 DAG.getConstant((offset & ~0xf), PtrVT));
541 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
542 || (basePtr.getOpcode() == SPUISD::IndirectAddr
543 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt = -vtm->prefslot_byte;
550 rotate = DAG.getConstant(rotamt, MVT::i16);
552 // Offset the rotate amount by the basePtr and the preferred slot
554 int64_t rotamt = -vtm->prefslot_byte;
557 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
559 DAG.getConstant(rotamt, PtrVT));
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr.getOpcode() == ISD::ADD) {
564 MachineFunction &MF = DAG.getMachineFunction();
565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
566 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
569 SDValue Op0 = basePtr.getOperand(0);
570 SDValue Op1 = basePtr.getOperand(1);
572 if (isa<ConstantSDNode>(Op1)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
577 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
578 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
585 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
587 DAG.getConstant(0, PtrVT));
590 // Offset the rotate amount by the basePtr and the preferred slot
592 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
594 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
597 // Re-emit as a v16i8 vector load
598 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
599 LN->getSrcValue(), LN->getSrcValueOffset(),
600 LN->isVolatile(), 16);
603 the_chain = result.getValue(1);
605 // Rotate into the preferred slot:
606 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
607 result.getValue(0), rotate);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
612 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
613 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType == ISD::SEXTLOAD) {
617 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
618 } else if (ExtType == ISD::ZEXTLOAD) {
619 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
620 } else if (ExtType == ISD::EXTLOAD) {
621 unsigned NewOpc = ISD::ANY_EXTEND;
623 if (OutVT.isFloatingPoint())
624 NewOpc = ISD::FP_EXTEND;
626 result = DAG.getNode(NewOpc, dl, OutVT, result);
629 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
630 SDValue retops[2] = {
635 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
636 retops, sizeof(retops) / sizeof(retops[0]));
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDValue Value = SN->getValue();
664 MVT VT = Value.getValueType();
665 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
666 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl = Op.getDebugLoc();
668 unsigned alignment = SN->getAlignment();
670 switch (SN->getAddressingMode()) {
671 case ISD::UNINDEXED: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
674 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
676 SDValue alignLoadVec;
677 SDValue basePtr = SN->getBasePtr();
678 SDValue the_chain = SN->getChain();
679 SDValue insertEltOffs;
681 if (alignment == 16) {
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr.getOpcode() == ISD::ADD
687 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset = CN->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr = basePtr.getOperand(0);
693 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
695 DAG.getConstant((offset & 0xf), PtrVT));
697 if ((offset & ~0xf) > 0) {
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
700 DAG.getConstant((offset & ~0xf), PtrVT));
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
706 DAG.getConstant(0, PtrVT));
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr.getOpcode() == ISD::ADD) {
711 MachineFunction &MF = DAG.getMachineFunction();
712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
713 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
716 SDValue Op0 = basePtr.getOperand(0);
717 SDValue Op1 = basePtr.getOperand(1);
719 if (isa<ConstantSDNode>(Op1)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
724 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
725 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
732 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
734 DAG.getConstant(0, PtrVT));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
740 DAG.getConstant(0, PtrVT));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
745 SN->getSrcValue(), SN->getSrcValueOffset(),
746 SN->isVolatile(), 16);
749 the_chain = alignLoadVec.getValue(1);
751 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
752 SDValue theValue = SN->getValue();
756 && (theValue.getOpcode() == ISD::AssertZext
757 || theValue.getOpcode() == ISD::AssertSext)) {
758 // Drill down and get the value for zero- and sign-extended
760 theValue = theValue.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
768 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
769 cerr << "CellSPU LowerSTORE: basePtr = ";
770 basePtr.getNode()->dump(&DAG);
775 SDValue insertEltOp =
776 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
777 SDValue vectorizeOp =
778 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
780 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
781 vectorizeOp, alignLoadVec,
782 DAG.getNode(ISD::BIT_CONVERT, dl,
783 MVT::v4i32, insertEltOp));
785 result = DAG.getStore(the_chain, dl, result, basePtr,
786 LN->getSrcValue(), LN->getSrcValueOffset(),
787 LN->isVolatile(), LN->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
791 const SDValue ¤tRoot = DAG.getRoot();
794 cerr << "------- CellSPU:LowerStore result:\n";
797 DAG.setRoot(currentRoot);
808 case ISD::LAST_INDEXED_MODE:
809 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
811 cerr << (unsigned) SN->getAddressingMode() << "\n";
819 //! Generate the address of a constant pool entry.
821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
822 MVT PtrVT = Op.getValueType();
823 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
824 Constant *C = CP->getConstVal();
825 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl = Op.getDebugLoc();
831 if (TM.getRelocationModel() == Reloc::Static) {
832 if (!ST->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
836 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
837 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
838 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
843 "LowerConstantPool: Relocation model other than static"
848 //! Alternate entry point for generating the address of a constant pool entry
850 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
851 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
855 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
856 MVT PtrVT = Op.getValueType();
857 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
858 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
859 SDValue Zero = DAG.getConstant(0, PtrVT);
860 const TargetMachine &TM = DAG.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl = Op.getDebugLoc();
864 if (TM.getRelocationModel() == Reloc::Static) {
865 if (!ST->usingLargeMem()) {
866 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
868 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
869 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
870 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
875 "LowerJumpTable: Relocation model other than static not supported.");
880 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
881 MVT PtrVT = Op.getValueType();
882 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
883 GlobalValue *GV = GSDN->getGlobal();
884 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
885 const TargetMachine &TM = DAG.getTarget();
886 SDValue Zero = DAG.getConstant(0, PtrVT);
887 // FIXME there is no actual debug info here
888 DebugLoc dl = Op.getDebugLoc();
890 if (TM.getRelocationModel() == Reloc::Static) {
891 if (!ST->usingLargeMem()) {
892 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
894 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
895 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
896 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
899 cerr << "LowerGlobalAddress: Relocation model other than static not "
908 //! Custom lower double precision floating point constants
910 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
911 MVT VT = Op.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl = Op.getDebugLoc();
915 if (VT == MVT::f64) {
916 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
922 SDValue T = DAG.getConstant(dbits, MVT::i64);
923 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
924 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
925 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
932 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
934 MachineFunction &MF = DAG.getMachineFunction();
935 MachineFrameInfo *MFI = MF.getFrameInfo();
936 MachineRegisterInfo &RegInfo = MF.getRegInfo();
937 SmallVector<SDValue, 48> ArgValues;
938 SDValue Root = Op.getOperand(0);
939 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl = Op.getDebugLoc();
942 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset = SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx = 0;
947 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
949 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
953 ArgNo != e; ++ArgNo) {
954 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
955 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
958 if (ArgRegIdx < NumArgRegs) {
959 const TargetRegisterClass *ArgRegClass;
961 switch (ObjectVT.getSimpleVT()) {
963 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT.getMVTString()
969 ArgRegClass = &SPU::R8CRegClass;
972 ArgRegClass = &SPU::R16CRegClass;
975 ArgRegClass = &SPU::R32CRegClass;
978 ArgRegClass = &SPU::R64CRegClass;
981 ArgRegClass = &SPU::GPRCRegClass;
984 ArgRegClass = &SPU::R32FPRegClass;
987 ArgRegClass = &SPU::R64FPRegClass;
995 ArgRegClass = &SPU::VECREGRegClass;
999 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1008 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1009 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1010 ArgOffset += StackSlotSize;
1013 ArgValues.push_back(ArgVal);
1015 Root = ArgVal.getOperand(0);
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector<SDValue, 79-3+1> MemOps;
1024 // Create the frame slot
1026 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1027 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1028 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1030 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1031 Root = Store.getOperand(0);
1032 MemOps.push_back(Store);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset += StackSlotSize;
1037 if (!MemOps.empty())
1038 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1039 &MemOps[0], MemOps.size());
1042 ArgValues.push_back(Root);
1044 // Return the new list of results.
1045 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1046 &ArgValues[0], ArgValues.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1055 int Addr = C->getZExtValue();
1056 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr << 14 >> 14) != Addr)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1064 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1065 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1066 SDValue Chain = TheCall->getChain();
1067 SDValue Callee = TheCall->getCallee();
1068 unsigned NumOps = TheCall->getNumArgs();
1069 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl = TheCall->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1085 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1087 // Figure out which arguments are going to go in registers, and which in
1089 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx = 0;
1092 // Keep track of registers passing arguments
1093 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1094 // And the arguments passed on the stack
1095 SmallVector<SDValue, 8> MemOpChains;
1097 for (unsigned i = 0; i != NumOps; ++i) {
1098 SDValue Arg = TheCall->getArg(i);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1103 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1105 switch (Arg.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1112 if (ArgRegIdx != NumArgRegs) {
1113 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1116 ArgOffset += StackSlotSize;
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1124 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1160 RegsToPass[i].second, InFlag);
1161 InFlag = Chain.getValue(1);
1164 SmallVector<SDValue, 8> Ops;
1165 unsigned CallOpc = SPUISD::CALL;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1171 GlobalValue *GV = G->getGlobal();
1172 MVT CalleeVT = Callee.getValueType();
1173 SDValue Zero = DAG.getConstant(0, PtrVT);
1174 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176 if (!ST->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1183 // This may be an unsafe assumption for JIT and really large compilation
1185 if (GV->isDeclaration()) {
1186 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1188 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1196 MVT CalleeVT = Callee.getValueType();
1197 SDValue Zero = DAG.getConstant(0, PtrVT);
1198 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1199 Callee.getValueType());
1201 if (!ST->usingLargeMem()) {
1202 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1204 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1206 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee = SDValue(Dest, 0);
1212 Ops.push_back(Chain);
1213 Ops.push_back(Callee);
1215 // Add argument registers to the end of the list so that they are known live
1217 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1218 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1219 RegsToPass[i].second.getValueType()));
1221 if (InFlag.getNode())
1222 Ops.push_back(InFlag);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1225 &Ops[0], Ops.size());
1226 InFlag = Chain.getValue(1);
1228 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1229 DAG.getIntPtrConstant(0, true), InFlag);
1230 if (TheCall->getValueType(0) != MVT::Other)
1231 InFlag = Chain.getValue(1);
1233 SDValue ResultVals[3];
1234 unsigned NumResults = 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other: break;
1241 if (TheCall->getValueType(1) == MVT::i32) {
1242 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1243 MVT::i32, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1246 Chain.getValue(2)).getValue(1);
1247 ResultVals[1] = Chain.getValue(0);
1250 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1257 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1263 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1270 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1271 InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1281 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1288 // If the function returns void, just return the chain.
1289 if (NumResults == 0)
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals[NumResults++] = Chain;
1294 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1295 return Res.getValue(Op.getResNo());
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300 SmallVector<CCValAssign, 16> RVLocs;
1301 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl = Op.getDebugLoc();
1304 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1305 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i = 0; i != RVLocs.size(); ++i)
1311 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314 SDValue Chain = Op.getOperand(0);
1317 // Copy the result values into the output registers.
1318 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1319 CCValAssign &VA = RVLocs[i];
1320 assert(VA.isRegLoc() && "Can only return in registers!");
1321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1322 Op.getOperand(i*2+1), Flag);
1323 Flag = Chain.getValue(1);
1327 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1329 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode *
1338 getVecImm(SDNode *N) {
1339 SDValue OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1343 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1344 if (OpVal.getNode() == 0)
1345 OpVal = N->getOperand(i);
1346 else if (OpVal != N->getOperand(i))
1350 if (OpVal.getNode() != 0) {
1351 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1362 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1364 if (ConstantSDNode *CN = getVecImm(N)) {
1365 uint64_t Value = CN->getZExtValue();
1366 if (ValueType == MVT::i64) {
1367 uint64_t UValue = CN->getZExtValue();
1368 uint32_t upper = uint32_t(UValue >> 32);
1369 uint32_t lower = uint32_t(UValue);
1372 Value = Value >> 32;
1374 if (Value <= 0x3ffff)
1375 return DAG.getTargetConstant(Value, ValueType);
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1384 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1386 if (ConstantSDNode *CN = getVecImm(N)) {
1387 int64_t Value = CN->getSExtValue();
1388 if (ValueType == MVT::i64) {
1389 uint64_t UValue = CN->getZExtValue();
1390 uint32_t upper = uint32_t(UValue >> 32);
1391 uint32_t lower = uint32_t(UValue);
1394 Value = Value >> 32;
1396 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1397 return DAG.getTargetConstant(Value, ValueType);
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1407 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int64_t Value = CN->getSExtValue();
1411 if (ValueType == MVT::i64) {
1412 uint64_t UValue = CN->getZExtValue();
1413 uint32_t upper = uint32_t(UValue >> 32);
1414 uint32_t lower = uint32_t(UValue);
1417 Value = Value >> 32;
1419 if (isS10Constant(Value))
1420 return DAG.getTargetConstant(Value, ValueType);
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1433 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 int Value = (int) CN->getZExtValue();
1437 if (ValueType == MVT::i16
1438 && Value <= 0xffff /* truncated from uint64_t */
1439 && ((short) Value >> 8) == ((short) Value & 0xff))
1440 return DAG.getTargetConstant(Value & 0xff, ValueType);
1441 else if (ValueType == MVT::i8
1442 && (Value & 0xff) == Value)
1443 return DAG.getTargetConstant(Value, ValueType);
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1452 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 uint64_t Value = CN->getZExtValue();
1456 if ((ValueType == MVT::i32
1457 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1458 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1459 return DAG.getTargetConstant(Value >> 16, ValueType);
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1467 if (ConstantSDNode *CN = getVecImm(N)) {
1468 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1476 if (ConstantSDNode *CN = getVecImm(N)) {
1477 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1483 //! Lower a BUILD_VECTOR instruction creatively:
1485 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1486 MVT VT = Op.getValueType();
1487 MVT EltVT = VT.getVectorElementType();
1488 DebugLoc dl = Op.getDebugLoc();
1489 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1490 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1491 unsigned minSplatBits = EltVT.getSizeInBits();
1493 if (minSplatBits < 16)
1496 APInt APSplatBits, APSplatUndef;
1497 unsigned SplatBitSize;
1500 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1501 HasAnyUndefs, minSplatBits)
1502 || minSplatBits < SplatBitSize)
1503 return SDValue(); // Wasn't a constant vector or splat exceeded min
1505 uint64_t SplatBits = APSplatBits.getZExtValue();
1506 unsigned SplatSize = SplatBitSize / 8;
1508 switch (VT.getSimpleVT()) {
1510 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1511 << VT.getMVTString()
1516 uint32_t Value32 = uint32_t(SplatBits);
1517 assert(SplatSize == 4
1518 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1519 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1520 SDValue T = DAG.getConstant(Value32, MVT::i32);
1521 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1522 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T));
1526 uint64_t f64val = uint64_t(SplatBits);
1527 assert(SplatSize == 8
1528 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1529 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1530 SDValue T = DAG.getConstant(f64val, MVT::i64);
1531 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1532 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1536 // 8-bit constants have to be expanded to 16-bits
1537 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1538 SmallVector<SDValue, 8> Ops;
1540 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1541 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1542 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1545 unsigned short Value16 = SplatBits;
1546 SDValue T = DAG.getConstant(Value16, EltVT);
1547 SmallVector<SDValue, 8> Ops;
1550 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1553 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1554 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1557 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1558 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1561 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1571 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1573 uint32_t upper = uint32_t(SplatVal >> 32);
1574 uint32_t lower = uint32_t(SplatVal);
1576 if (upper == lower) {
1577 // Magic constant that can be matched by IL, ILA, et. al.
1578 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1579 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1580 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1581 Val, Val, Val, Val));
1583 bool upper_special, lower_special;
1585 // NOTE: This code creates common-case shuffle masks that can be easily
1586 // detected as common expressions. It is not attempting to create highly
1587 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1589 // Detect if the upper or lower half is a special shuffle mask pattern:
1590 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1591 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1593 // Both upper and lower are special, lower to a constant pool load:
1594 if (lower_special && upper_special) {
1595 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1596 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1597 SplatValCN, SplatValCN);
1602 SmallVector<SDValue, 16> ShufBytes;
1605 // Create lower vector if not a special pattern
1606 if (!lower_special) {
1607 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1608 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1609 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1610 LO32C, LO32C, LO32C, LO32C));
1613 // Create upper vector if not a special pattern
1614 if (!upper_special) {
1615 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1616 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1617 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1618 HI32C, HI32C, HI32C, HI32C));
1621 // If either upper or lower are special, then the two input operands are
1622 // the same (basically, one of them is a "don't care")
1628 for (int i = 0; i < 4; ++i) {
1630 for (int j = 0; j < 4; ++j) {
1632 bool process_upper, process_lower;
1634 process_upper = (upper_special && (i & 1) == 0);
1635 process_lower = (lower_special && (i & 1) == 1);
1637 if (process_upper || process_lower) {
1638 if ((process_upper && upper == 0)
1639 || (process_lower && lower == 0))
1641 else if ((process_upper && upper == 0xffffffff)
1642 || (process_lower && lower == 0xffffffff))
1644 else if ((process_upper && upper == 0x80000000)
1645 || (process_lower && lower == 0x80000000))
1646 val |= (j == 0 ? 0xe0 : 0x80);
1648 val |= i * 4 + j + ((i & 1) * 16);
1651 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1654 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1655 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1656 &ShufBytes[0], ShufBytes.size()));
1660 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1661 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1662 /// permutation vector, V3, is monotonically increasing with one "exception"
1663 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1664 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1665 /// In either case, the net result is going to eventually invoke SHUFB to
1666 /// permute/shuffle the bytes from V1 and V2.
1668 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1669 /// control word for byte/halfword/word insertion. This takes care of a single
1670 /// element move from V2 into V1.
1672 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1673 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1674 SDValue V1 = Op.getOperand(0);
1675 SDValue V2 = Op.getOperand(1);
1676 SDValue PermMask = Op.getOperand(2);
1677 DebugLoc dl = Op.getDebugLoc();
1679 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1681 // If we have a single element being moved from V1 to V2, this can be handled
1682 // using the C*[DX] compute mask instructions, but the vector elements have
1683 // to be monotonically increasing with one exception element.
1684 MVT VecVT = V1.getValueType();
1685 MVT EltVT = VecVT.getVectorElementType();
1686 unsigned EltsFromV2 = 0;
1688 unsigned V2EltIdx0 = 0;
1689 unsigned CurrElt = 0;
1690 unsigned MaxElts = VecVT.getVectorNumElements();
1691 unsigned PrevElt = 0;
1693 bool monotonic = true;
1696 if (EltVT == MVT::i8) {
1698 } else if (EltVT == MVT::i16) {
1700 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1702 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1705 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1707 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1708 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1709 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1712 if (SrcElt >= V2EltIdx0) {
1713 if (1 >= (++EltsFromV2)) {
1714 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1716 } else if (CurrElt != SrcElt) {
1724 if (PrevElt > 0 && SrcElt < MaxElts) {
1725 if ((PrevElt == SrcElt - 1)
1726 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1733 } else if (PrevElt == 0) {
1734 // First time through, need to keep track of previous element
1737 // This isn't a rotation, takes elements from vector 2
1744 if (EltsFromV2 == 1 && monotonic) {
1745 // Compute mask and shuffle
1746 MachineFunction &MF = DAG.getMachineFunction();
1747 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1748 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1749 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1750 // Initialize temporary register to 0
1751 SDValue InitTempReg =
1752 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1753 // Copy register's contents as index in SHUFFLE_MASK:
1754 SDValue ShufMaskOp =
1755 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1756 DAG.getTargetConstant(V2Elt, MVT::i32),
1757 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1758 // Use shuffle mask in SHUFB synthetic instruction:
1759 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1761 } else if (rotate) {
1762 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1764 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1765 V1, DAG.getConstant(rotamt, MVT::i16));
1767 // Convert the SHUFFLE_VECTOR mask's input element units to the
1769 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1771 SmallVector<SDValue, 16> ResultMask;
1772 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1774 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1777 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1779 for (unsigned j = 0; j < BytesPerElement; ++j) {
1780 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1785 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1786 &ResultMask[0], ResultMask.size());
1787 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1791 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1792 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1793 DebugLoc dl = Op.getDebugLoc();
1795 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1796 // For a constant, build the appropriate constant vector, which will
1797 // eventually simplify to a vector register load.
1799 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1800 SmallVector<SDValue, 16> ConstVecValues;
1804 // Create a constant vector:
1805 switch (Op.getValueType().getSimpleVT()) {
1806 default: assert(0 && "Unexpected constant value type in "
1807 "LowerSCALAR_TO_VECTOR");
1808 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1809 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1810 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1811 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1812 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1813 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1816 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1817 for (size_t j = 0; j < n_copies; ++j)
1818 ConstVecValues.push_back(CValue);
1820 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1821 &ConstVecValues[0], ConstVecValues.size());
1823 // Otherwise, copy the value from one register to another:
1824 switch (Op0.getValueType().getSimpleVT()) {
1825 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1832 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1839 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1840 MVT VT = Op.getValueType();
1841 SDValue N = Op.getOperand(0);
1842 SDValue Elt = Op.getOperand(1);
1843 DebugLoc dl = Op.getDebugLoc();
1846 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1847 // Constant argument:
1848 int EltNo = (int) C->getZExtValue();
1851 if (VT == MVT::i8 && EltNo >= 16)
1852 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1853 else if (VT == MVT::i16 && EltNo >= 8)
1854 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1855 else if (VT == MVT::i32 && EltNo >= 4)
1856 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1857 else if (VT == MVT::i64 && EltNo >= 2)
1858 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1860 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1861 // i32 and i64: Element 0 is the preferred slot
1862 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1865 // Need to generate shuffle mask and extract:
1866 int prefslot_begin = -1, prefslot_end = -1;
1867 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1869 switch (VT.getSimpleVT()) {
1871 assert(false && "Invalid value type!");
1873 prefslot_begin = prefslot_end = 3;
1877 prefslot_begin = 2; prefslot_end = 3;
1882 prefslot_begin = 0; prefslot_end = 3;
1887 prefslot_begin = 0; prefslot_end = 7;
1892 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1893 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1895 unsigned int ShufBytes[16];
1896 for (int i = 0; i < 16; ++i) {
1897 // zero fill uppper part of preferred slot, don't care about the
1899 unsigned int mask_val;
1900 if (i <= prefslot_end) {
1902 ((i < prefslot_begin)
1904 : elt_byte + (i - prefslot_begin));
1906 ShufBytes[i] = mask_val;
1908 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1911 SDValue ShufMask[4];
1912 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1913 unsigned bidx = i * 4;
1914 unsigned int bits = ((ShufBytes[bidx] << 24) |
1915 (ShufBytes[bidx+1] << 16) |
1916 (ShufBytes[bidx+2] << 8) |
1918 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1921 SDValue ShufMaskVec =
1922 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1923 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1925 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1926 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1927 N, N, ShufMaskVec));
1929 // Variable index: Rotate the requested element into slot 0, then replicate
1930 // slot 0 across the vector
1931 MVT VecVT = N.getValueType();
1932 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1933 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1937 // Make life easier by making sure the index is zero-extended to i32
1938 if (Elt.getValueType() != MVT::i32)
1939 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1941 // Scale the index to a bit/byte shift quantity
1943 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1944 unsigned scaleShift = scaleFactor.logBase2();
1947 if (scaleShift > 0) {
1948 // Scale the shift factor:
1949 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1950 DAG.getConstant(scaleShift, MVT::i32));
1953 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
1955 // Replicate the bytes starting at byte 0 across the entire vector (for
1956 // consistency with the notion of a unified register set)
1959 switch (VT.getSimpleVT()) {
1961 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1965 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
1966 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1967 factor, factor, factor, factor);
1971 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
1972 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1973 factor, factor, factor, factor);
1978 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
1979 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1980 factor, factor, factor, factor);
1985 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
1986 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
1987 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1988 loFactor, hiFactor, loFactor, hiFactor);
1993 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1994 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
1995 vecShift, vecShift, replicate));
2001 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2002 SDValue VecOp = Op.getOperand(0);
2003 SDValue ValOp = Op.getOperand(1);
2004 SDValue IdxOp = Op.getOperand(2);
2005 DebugLoc dl = Op.getDebugLoc();
2006 MVT VT = Op.getValueType();
2008 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2009 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2011 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2012 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2013 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2014 DAG.getRegister(SPU::R1, PtrVT),
2015 DAG.getConstant(CN->getSExtValue(), PtrVT));
2016 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2019 DAG.getNode(SPUISD::SHUFB, dl, VT,
2020 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2022 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2027 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2028 const TargetLowering &TLI)
2030 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2031 DebugLoc dl = Op.getDebugLoc();
2032 MVT ShiftVT = TLI.getShiftAmountTy();
2034 assert(Op.getValueType() == MVT::i8);
2037 assert(0 && "Unhandled i8 math operator");
2041 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2043 SDValue N1 = Op.getOperand(1);
2044 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2045 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2046 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2047 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2052 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2054 SDValue N1 = Op.getOperand(1);
2055 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2056 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2057 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2058 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2062 SDValue N1 = Op.getOperand(1);
2063 MVT N1VT = N1.getValueType();
2065 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2066 if (!N1VT.bitsEq(ShiftVT)) {
2067 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2070 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2073 // Replicate lower 8-bits into upper 8:
2075 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2076 DAG.getNode(ISD::SHL, dl, MVT::i16,
2077 N0, DAG.getConstant(8, MVT::i32)));
2079 // Truncate back down to i8
2080 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2081 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2085 SDValue N1 = Op.getOperand(1);
2086 MVT N1VT = N1.getValueType();
2088 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2089 if (!N1VT.bitsEq(ShiftVT)) {
2090 unsigned N1Opc = ISD::ZERO_EXTEND;
2092 if (N1.getValueType().bitsGT(ShiftVT))
2093 N1Opc = ISD::TRUNCATE;
2095 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2098 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2099 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2102 SDValue N1 = Op.getOperand(1);
2103 MVT N1VT = N1.getValueType();
2105 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2106 if (!N1VT.bitsEq(ShiftVT)) {
2107 unsigned N1Opc = ISD::SIGN_EXTEND;
2109 if (N1VT.bitsGT(ShiftVT))
2110 N1Opc = ISD::TRUNCATE;
2111 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2114 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2115 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2118 SDValue N1 = Op.getOperand(1);
2120 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2121 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2122 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2123 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2131 //! Lower byte immediate operations for v16i8 vectors:
2133 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2136 MVT VT = Op.getValueType();
2137 DebugLoc dl = Op.getDebugLoc();
2139 ConstVec = Op.getOperand(0);
2140 Arg = Op.getOperand(1);
2141 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2142 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2143 ConstVec = ConstVec.getOperand(0);
2145 ConstVec = Op.getOperand(1);
2146 Arg = Op.getOperand(0);
2147 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2148 ConstVec = ConstVec.getOperand(0);
2153 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2154 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2155 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2157 APInt APSplatBits, APSplatUndef;
2158 unsigned SplatBitSize;
2160 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2162 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2163 HasAnyUndefs, minSplatBits)
2164 && minSplatBits <= SplatBitSize) {
2165 uint64_t SplatBits = APSplatBits.getZExtValue();
2166 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2168 SmallVector<SDValue, 16> tcVec;
2169 tcVec.assign(16, tc);
2170 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2171 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2175 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2176 // lowered. Return the operation, rather than a null SDValue.
2180 //! Custom lowering for CTPOP (count population)
2182 Custom lowering code that counts the number ones in the input
2183 operand. SPU has such an instruction, but it counts the number of
2184 ones per byte, which then have to be accumulated.
2186 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2187 MVT VT = Op.getValueType();
2188 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2189 DebugLoc dl = Op.getDebugLoc();
2191 switch (VT.getSimpleVT()) {
2193 assert(false && "Invalid value type!");
2195 SDValue N = Op.getOperand(0);
2196 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2198 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2199 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2201 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2205 MachineFunction &MF = DAG.getMachineFunction();
2206 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2208 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2210 SDValue N = Op.getOperand(0);
2211 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2212 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2213 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2215 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2216 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2218 // CNTB_result becomes the chain to which all of the virtual registers
2219 // CNTB_reg, SUM1_reg become associated:
2220 SDValue CNTB_result =
2221 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2223 SDValue CNTB_rescopy =
2224 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2226 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2228 return DAG.getNode(ISD::AND, dl, MVT::i16,
2229 DAG.getNode(ISD::ADD, dl, MVT::i16,
2230 DAG.getNode(ISD::SRL, dl, MVT::i16,
2237 MachineFunction &MF = DAG.getMachineFunction();
2238 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2240 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2241 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2243 SDValue N = Op.getOperand(0);
2244 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2245 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2246 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2247 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2249 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2250 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2252 // CNTB_result becomes the chain to which all of the virtual registers
2253 // CNTB_reg, SUM1_reg become associated:
2254 SDValue CNTB_result =
2255 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2257 SDValue CNTB_rescopy =
2258 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2261 DAG.getNode(ISD::SRL, dl, MVT::i32,
2262 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2266 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2267 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2269 SDValue Sum1_rescopy =
2270 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2273 DAG.getNode(ISD::SRL, dl, MVT::i32,
2274 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2277 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2278 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2280 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2290 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2292 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2293 All conversions to i64 are expanded to a libcall.
2295 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2296 SPUTargetLowering &TLI) {
2297 MVT OpVT = Op.getValueType();
2298 SDValue Op0 = Op.getOperand(0);
2299 MVT Op0VT = Op0.getValueType();
2301 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2302 || OpVT == MVT::i64) {
2303 // Convert f32 / f64 to i32 / i64 via libcall.
2305 (Op.getOpcode() == ISD::FP_TO_SINT)
2306 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2307 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2308 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2310 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2316 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2318 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2319 All conversions from i64 are expanded to a libcall.
2321 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2322 SPUTargetLowering &TLI) {
2323 MVT OpVT = Op.getValueType();
2324 SDValue Op0 = Op.getOperand(0);
2325 MVT Op0VT = Op0.getValueType();
2327 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2328 || Op0VT == MVT::i64) {
2329 // Convert i32, i64 to f64 via libcall:
2331 (Op.getOpcode() == ISD::SINT_TO_FP)
2332 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2333 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2334 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2336 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2342 //! Lower ISD::SETCC
2344 This handles MVT::f64 (double floating point) condition lowering
2346 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2347 const TargetLowering &TLI) {
2348 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2349 DebugLoc dl = Op.getDebugLoc();
2350 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2352 SDValue lhs = Op.getOperand(0);
2353 SDValue rhs = Op.getOperand(1);
2354 MVT lhsVT = lhs.getValueType();
2355 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2357 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2358 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2359 MVT IntVT(MVT::i64);
2361 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2362 // selected to a NOP:
2363 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2365 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2366 DAG.getNode(ISD::SRL, dl, IntVT,
2367 i64lhs, DAG.getConstant(32, MVT::i32)));
2368 SDValue lhsHi32abs =
2369 DAG.getNode(ISD::AND, dl, MVT::i32,
2370 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2372 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2374 // SETO and SETUO only use the lhs operand:
2375 if (CC->get() == ISD::SETO) {
2376 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2378 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2379 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2380 DAG.getSetCC(dl, ccResultVT,
2381 lhs, DAG.getConstantFP(0.0, lhsVT),
2383 DAG.getConstant(ccResultAllOnes, ccResultVT));
2384 } else if (CC->get() == ISD::SETUO) {
2385 // Evaluates to true if Op0 is [SQ]NaN
2386 return DAG.getNode(ISD::AND, dl, ccResultVT,
2387 DAG.getSetCC(dl, ccResultVT,
2389 DAG.getConstant(0x7ff00000, MVT::i32),
2391 DAG.getSetCC(dl, ccResultVT,
2393 DAG.getConstant(0, MVT::i32),
2397 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2399 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2400 DAG.getNode(ISD::SRL, dl, IntVT,
2401 i64rhs, DAG.getConstant(32, MVT::i32)));
2403 // If a value is negative, subtract from the sign magnitude constant:
2404 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2406 // Convert the sign-magnitude representation into 2's complement:
2407 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2408 lhsHi32, DAG.getConstant(31, MVT::i32));
2409 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2411 DAG.getNode(ISD::SELECT, dl, IntVT,
2412 lhsSelectMask, lhsSignMag2TC, i64lhs);
2414 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2415 rhsHi32, DAG.getConstant(31, MVT::i32));
2416 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2418 DAG.getNode(ISD::SELECT, dl, IntVT,
2419 rhsSelectMask, rhsSignMag2TC, i64rhs);
2423 switch (CC->get()) {
2426 compareOp = ISD::SETEQ; break;
2429 compareOp = ISD::SETGT; break;
2432 compareOp = ISD::SETGE; break;
2435 compareOp = ISD::SETLT; break;
2438 compareOp = ISD::SETLE; break;
2441 compareOp = ISD::SETNE; break;
2443 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2449 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2450 (ISD::CondCode) compareOp);
2452 if ((CC->get() & 0x8) == 0) {
2453 // Ordered comparison:
2454 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2455 lhs, DAG.getConstantFP(0.0, MVT::f64),
2457 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2458 rhs, DAG.getConstantFP(0.0, MVT::f64),
2460 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2462 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2468 //! Lower ISD::SELECT_CC
2470 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2473 \note Need to revisit this in the future: if the code path through the true
2474 and false value computations is longer than the latency of a branch (6
2475 cycles), then it would be more advantageous to branch and insert a new basic
2476 block and branch on the condition. However, this code does not make that
2477 assumption, given the simplisitc uses so far.
2480 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2481 const TargetLowering &TLI) {
2482 MVT VT = Op.getValueType();
2483 SDValue lhs = Op.getOperand(0);
2484 SDValue rhs = Op.getOperand(1);
2485 SDValue trueval = Op.getOperand(2);
2486 SDValue falseval = Op.getOperand(3);
2487 SDValue condition = Op.getOperand(4);
2488 DebugLoc dl = Op.getDebugLoc();
2490 // NOTE: SELB's arguments: $rA, $rB, $mask
2492 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2493 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2494 // condition was true and 0s where the condition was false. Hence, the
2495 // arguments to SELB get reversed.
2497 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2498 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2499 // with another "cannot select select_cc" assert:
2501 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2502 TLI.getSetCCResultType(Op.getValueType()),
2503 lhs, rhs, condition);
2504 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2507 //! Custom lower ISD::TRUNCATE
2508 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2510 // Type to truncate to
2511 MVT VT = Op.getValueType();
2512 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2513 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2514 DebugLoc dl = Op.getDebugLoc();
2516 // Type to truncate from
2517 SDValue Op0 = Op.getOperand(0);
2518 MVT Op0VT = Op0.getValueType();
2520 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2521 // Create shuffle mask, least significant doubleword of quadword
2522 unsigned maskHigh = 0x08090a0b;
2523 unsigned maskLow = 0x0c0d0e0f;
2524 // Use a shuffle to perform the truncation
2525 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2526 DAG.getConstant(maskHigh, MVT::i32),
2527 DAG.getConstant(maskLow, MVT::i32),
2528 DAG.getConstant(maskHigh, MVT::i32),
2529 DAG.getConstant(maskLow, MVT::i32));
2531 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2532 Op0, Op0, shufMask);
2534 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2537 return SDValue(); // Leave the truncate unmolested
2540 //! Custom (target-specific) lowering entry point
2542 This is where LLVM's DAG selection process calls to do target-specific
2546 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2548 unsigned Opc = (unsigned) Op.getOpcode();
2549 MVT VT = Op.getValueType();
2553 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2554 cerr << "Op.getOpcode() = " << Opc << "\n";
2555 cerr << "*Op.getNode():\n";
2556 Op.getNode()->dump();
2563 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2565 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2566 case ISD::ConstantPool:
2567 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2568 case ISD::GlobalAddress:
2569 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2570 case ISD::JumpTable:
2571 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2572 case ISD::ConstantFP:
2573 return LowerConstantFP(Op, DAG);
2574 case ISD::FORMAL_ARGUMENTS:
2575 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2577 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2579 return LowerRET(Op, DAG, getTargetMachine());
2581 // i8, i64 math ops:
2590 return LowerI8Math(Op, DAG, Opc, *this);
2594 case ISD::FP_TO_SINT:
2595 case ISD::FP_TO_UINT:
2596 return LowerFP_TO_INT(Op, DAG, *this);
2598 case ISD::SINT_TO_FP:
2599 case ISD::UINT_TO_FP:
2600 return LowerINT_TO_FP(Op, DAG, *this);
2602 // Vector-related lowering.
2603 case ISD::BUILD_VECTOR:
2604 return LowerBUILD_VECTOR(Op, DAG);
2605 case ISD::SCALAR_TO_VECTOR:
2606 return LowerSCALAR_TO_VECTOR(Op, DAG);
2607 case ISD::VECTOR_SHUFFLE:
2608 return LowerVECTOR_SHUFFLE(Op, DAG);
2609 case ISD::EXTRACT_VECTOR_ELT:
2610 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2611 case ISD::INSERT_VECTOR_ELT:
2612 return LowerINSERT_VECTOR_ELT(Op, DAG);
2614 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2618 return LowerByteImmed(Op, DAG);
2620 // Vector and i8 multiply:
2623 return LowerI8Math(Op, DAG, Opc, *this);
2626 return LowerCTPOP(Op, DAG);
2628 case ISD::SELECT_CC:
2629 return LowerSELECT_CC(Op, DAG, *this);
2632 return LowerSETCC(Op, DAG, *this);
2635 return LowerTRUNCATE(Op, DAG);
2641 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2642 SmallVectorImpl<SDValue>&Results,
2646 unsigned Opc = (unsigned) N->getOpcode();
2647 MVT OpVT = N->getValueType(0);
2651 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2652 cerr << "Op.getOpcode() = " << Opc << "\n";
2653 cerr << "*Op.getNode():\n";
2661 /* Otherwise, return unchanged */
2664 //===----------------------------------------------------------------------===//
2665 // Target Optimization Hooks
2666 //===----------------------------------------------------------------------===//
2669 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2672 TargetMachine &TM = getTargetMachine();
2674 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2675 SelectionDAG &DAG = DCI.DAG;
2676 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2677 MVT NodeVT = N->getValueType(0); // The node's value type
2678 MVT Op0VT = Op0.getValueType(); // The first operand's result
2679 SDValue Result; // Initially, empty result
2680 DebugLoc dl = N->getDebugLoc();
2682 switch (N->getOpcode()) {
2685 SDValue Op1 = N->getOperand(1);
2687 if (Op0.getOpcode() == SPUISD::IndirectAddr
2688 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2689 // Normalize the operands to reduce repeated code
2690 SDValue IndirectArg = Op0, AddArg = Op1;
2692 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2697 if (isa<ConstantSDNode>(AddArg)) {
2698 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2699 SDValue IndOp1 = IndirectArg.getOperand(1);
2701 if (CN0->isNullValue()) {
2702 // (add (SPUindirect <arg>, <arg>), 0) ->
2703 // (SPUindirect <arg>, <arg>)
2705 #if !defined(NDEBUG)
2706 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2708 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2709 << "With: (SPUindirect <arg>, <arg>)\n";
2714 } else if (isa<ConstantSDNode>(IndOp1)) {
2715 // (add (SPUindirect <arg>, <const>), <const>) ->
2716 // (SPUindirect <arg>, <const + const>)
2717 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2718 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2719 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2721 #if !defined(NDEBUG)
2722 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2724 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2725 << "), " << CN0->getSExtValue() << ")\n"
2726 << "With: (SPUindirect <arg>, "
2727 << combinedConst << ")\n";
2731 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2732 IndirectArg, combinedValue);
2738 case ISD::SIGN_EXTEND:
2739 case ISD::ZERO_EXTEND:
2740 case ISD::ANY_EXTEND: {
2741 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2742 // (any_extend (SPUextract_elt0 <arg>)) ->
2743 // (SPUextract_elt0 <arg>)
2744 // Types must match, however...
2745 #if !defined(NDEBUG)
2746 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2747 cerr << "\nReplace: ";
2750 Op0.getNode()->dump(&DAG);
2759 case SPUISD::IndirectAddr: {
2760 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2761 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2762 if (CN != 0 && CN->getZExtValue() == 0) {
2763 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2764 // (SPUaform <addr>, 0)
2766 DEBUG(cerr << "Replace: ");
2767 DEBUG(N->dump(&DAG));
2768 DEBUG(cerr << "\nWith: ");
2769 DEBUG(Op0.getNode()->dump(&DAG));
2770 DEBUG(cerr << "\n");
2774 } else if (Op0.getOpcode() == ISD::ADD) {
2775 SDValue Op1 = N->getOperand(1);
2776 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2777 // (SPUindirect (add <arg>, <arg>), 0) ->
2778 // (SPUindirect <arg>, <arg>)
2779 if (CN1->isNullValue()) {
2781 #if !defined(NDEBUG)
2782 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2784 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2785 << "With: (SPUindirect <arg>, <arg>)\n";
2789 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2790 Op0.getOperand(0), Op0.getOperand(1));
2796 case SPUISD::SHLQUAD_L_BITS:
2797 case SPUISD::SHLQUAD_L_BYTES:
2798 case SPUISD::VEC_SHL:
2799 case SPUISD::VEC_SRL:
2800 case SPUISD::VEC_SRA:
2801 case SPUISD::ROTBYTES_LEFT: {
2802 SDValue Op1 = N->getOperand(1);
2804 // Kill degenerate vector shifts:
2805 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2806 if (CN->isNullValue()) {
2812 case SPUISD::PREFSLOT2VEC: {
2813 switch (Op0.getOpcode()) {
2816 case ISD::ANY_EXTEND:
2817 case ISD::ZERO_EXTEND:
2818 case ISD::SIGN_EXTEND: {
2819 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2821 // but only if the SPUprefslot2vec and <arg> types match.
2822 SDValue Op00 = Op0.getOperand(0);
2823 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2824 SDValue Op000 = Op00.getOperand(0);
2825 if (Op000.getValueType() == NodeVT) {
2831 case SPUISD::VEC2PREFSLOT: {
2832 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2834 Result = Op0.getOperand(0);
2842 // Otherwise, return unchanged.
2844 if (Result.getNode()) {
2845 DEBUG(cerr << "\nReplace.SPU: ");
2846 DEBUG(N->dump(&DAG));
2847 DEBUG(cerr << "\nWith: ");
2848 DEBUG(Result.getNode()->dump(&DAG));
2849 DEBUG(cerr << "\n");
2856 //===----------------------------------------------------------------------===//
2857 // Inline Assembly Support
2858 //===----------------------------------------------------------------------===//
2860 /// getConstraintType - Given a constraint letter, return the type of
2861 /// constraint it is for this target.
2862 SPUTargetLowering::ConstraintType
2863 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2864 if (ConstraintLetter.size() == 1) {
2865 switch (ConstraintLetter[0]) {
2872 return C_RegisterClass;
2875 return TargetLowering::getConstraintType(ConstraintLetter);
2878 std::pair<unsigned, const TargetRegisterClass*>
2879 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2882 if (Constraint.size() == 1) {
2883 // GCC RS6000 Constraint Letters
2884 switch (Constraint[0]) {
2888 return std::make_pair(0U, SPU::R64CRegisterClass);
2889 return std::make_pair(0U, SPU::R32CRegisterClass);
2892 return std::make_pair(0U, SPU::R32FPRegisterClass);
2893 else if (VT == MVT::f64)
2894 return std::make_pair(0U, SPU::R64FPRegisterClass);
2897 return std::make_pair(0U, SPU::GPRCRegisterClass);
2901 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2904 //! Compute used/known bits for a SPU operand
2906 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2910 const SelectionDAG &DAG,
2911 unsigned Depth ) const {
2913 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2915 switch (Op.getOpcode()) {
2917 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2923 case SPUISD::PREFSLOT2VEC:
2924 case SPUISD::LDRESULT:
2925 case SPUISD::VEC2PREFSLOT:
2926 case SPUISD::SHLQUAD_L_BITS:
2927 case SPUISD::SHLQUAD_L_BYTES:
2928 case SPUISD::VEC_SHL:
2929 case SPUISD::VEC_SRL:
2930 case SPUISD::VEC_SRA:
2931 case SPUISD::VEC_ROTL:
2932 case SPUISD::VEC_ROTR:
2933 case SPUISD::ROTBYTES_LEFT:
2934 case SPUISD::SELECT_MASK:
2941 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2942 unsigned Depth) const {
2943 switch (Op.getOpcode()) {
2948 MVT VT = Op.getValueType();
2950 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2953 return VT.getSizeInBits();
2958 // LowerAsmOperandForConstraint
2960 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2961 char ConstraintLetter,
2963 std::vector<SDValue> &Ops,
2964 SelectionDAG &DAG) const {
2965 // Default, for the time being, to the base class handler
2966 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2970 /// isLegalAddressImmediate - Return true if the integer value can be used
2971 /// as the offset of the target addressing mode.
2972 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2973 const Type *Ty) const {
2974 // SPU's addresses are 256K:
2975 return (V > -(1 << 18) && V < (1 << 18) - 1);
2978 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2983 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2984 // The SPU target isn't yet aware of offsets.