1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
117 Op.getNode()->getDebugLoc());
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 DebugLoc dl = Op.getDebugLoc();
512 switch (LN->getAddressingMode()) {
513 case ISD::UNINDEXED: {
515 SDValue basePtr = LN->getBasePtr();
518 if (alignment == 16) {
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr.getOpcode() == ISD::ADD
524 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset = CN->getSExtValue();
527 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
532 rotate = DAG.getConstant(rotamt, MVT::i16);
534 // Simplify the base pointer for this case:
535 basePtr = basePtr.getOperand(0);
536 if ((offset & ~0xf) > 0) {
537 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
539 DAG.getConstant((offset & ~0xf), PtrVT));
541 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
542 || (basePtr.getOpcode() == SPUISD::IndirectAddr
543 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt = -vtm->prefslot_byte;
550 rotate = DAG.getConstant(rotamt, MVT::i16);
552 // Offset the rotate amount by the basePtr and the preferred slot
554 int64_t rotamt = -vtm->prefslot_byte;
557 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
559 DAG.getConstant(rotamt, PtrVT));
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr.getOpcode() == ISD::ADD) {
564 MachineFunction &MF = DAG.getMachineFunction();
565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
566 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
569 SDValue Op0 = basePtr.getOperand(0);
570 SDValue Op1 = basePtr.getOperand(1);
572 if (isa<ConstantSDNode>(Op1)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
577 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
578 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
585 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
587 DAG.getConstant(0, PtrVT));
590 // Offset the rotate amount by the basePtr and the preferred slot
592 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
594 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
597 // Re-emit as a v16i8 vector load
598 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
599 LN->getSrcValue(), LN->getSrcValueOffset(),
600 LN->isVolatile(), 16);
603 the_chain = result.getValue(1);
605 // Rotate into the preferred slot:
606 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
607 result.getValue(0), rotate);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
612 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
613 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType == ISD::SEXTLOAD) {
617 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
618 } else if (ExtType == ISD::ZEXTLOAD) {
619 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
620 } else if (ExtType == ISD::EXTLOAD) {
621 unsigned NewOpc = ISD::ANY_EXTEND;
623 if (OutVT.isFloatingPoint())
624 NewOpc = ISD::FP_EXTEND;
626 result = DAG.getNode(NewOpc, dl, OutVT, result);
629 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
630 SDValue retops[2] = {
635 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
636 retops, sizeof(retops) / sizeof(retops[0]));
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDValue Value = SN->getValue();
664 MVT VT = Value.getValueType();
665 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
666 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl = Op.getDebugLoc();
668 unsigned alignment = SN->getAlignment();
670 switch (SN->getAddressingMode()) {
671 case ISD::UNINDEXED: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
674 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
676 SDValue alignLoadVec;
677 SDValue basePtr = SN->getBasePtr();
678 SDValue the_chain = SN->getChain();
679 SDValue insertEltOffs;
681 if (alignment == 16) {
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr.getOpcode() == ISD::ADD
687 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset = CN->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr = basePtr.getOperand(0);
693 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
695 DAG.getConstant((offset & 0xf), PtrVT));
697 if ((offset & ~0xf) > 0) {
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
700 DAG.getConstant((offset & ~0xf), PtrVT));
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
706 DAG.getConstant(0, PtrVT));
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr.getOpcode() == ISD::ADD) {
711 MachineFunction &MF = DAG.getMachineFunction();
712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
713 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
716 SDValue Op0 = basePtr.getOperand(0);
717 SDValue Op1 = basePtr.getOperand(1);
719 if (isa<ConstantSDNode>(Op1)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
724 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
725 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
732 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
734 DAG.getConstant(0, PtrVT));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
740 DAG.getConstant(0, PtrVT));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
745 SN->getSrcValue(), SN->getSrcValueOffset(),
746 SN->isVolatile(), 16);
749 the_chain = alignLoadVec.getValue(1);
751 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
752 SDValue theValue = SN->getValue();
756 && (theValue.getOpcode() == ISD::AssertZext
757 || theValue.getOpcode() == ISD::AssertSext)) {
758 // Drill down and get the value for zero- and sign-extended
760 theValue = theValue.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
768 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
769 cerr << "CellSPU LowerSTORE: basePtr = ";
770 basePtr.getNode()->dump(&DAG);
775 SDValue insertEltOp =
776 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
777 SDValue vectorizeOp =
778 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
780 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
781 vectorizeOp, alignLoadVec,
782 DAG.getNode(ISD::BIT_CONVERT, dl,
783 MVT::v4i32, insertEltOp));
785 result = DAG.getStore(the_chain, dl, result, basePtr,
786 LN->getSrcValue(), LN->getSrcValueOffset(),
787 LN->isVolatile(), LN->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
791 const SDValue ¤tRoot = DAG.getRoot();
794 cerr << "------- CellSPU:LowerStore result:\n";
797 DAG.setRoot(currentRoot);
808 case ISD::LAST_INDEXED_MODE:
809 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
811 cerr << (unsigned) SN->getAddressingMode() << "\n";
819 //! Generate the address of a constant pool entry.
821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
822 MVT PtrVT = Op.getValueType();
823 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
824 Constant *C = CP->getConstVal();
825 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
829 if (TM.getRelocationModel() == Reloc::Static) {
830 if (!ST->usingLargeMem()) {
831 // Just return the SDValue with the constant pool address in it.
832 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
834 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
835 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
836 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
841 "LowerConstantPool: Relocation model other than static"
846 //! Alternate entry point for generating the address of a constant pool entry
848 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
849 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
853 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
854 MVT PtrVT = Op.getValueType();
855 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
856 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
857 SDValue Zero = DAG.getConstant(0, PtrVT);
858 const TargetMachine &TM = DAG.getTarget();
860 if (TM.getRelocationModel() == Reloc::Static) {
861 if (!ST->usingLargeMem()) {
862 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
864 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
865 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
866 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
871 "LowerJumpTable: Relocation model other than static not supported.");
876 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
877 MVT PtrVT = Op.getValueType();
878 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
879 GlobalValue *GV = GSDN->getGlobal();
880 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
881 const TargetMachine &TM = DAG.getTarget();
882 SDValue Zero = DAG.getConstant(0, PtrVT);
884 if (TM.getRelocationModel() == Reloc::Static) {
885 if (!ST->usingLargeMem()) {
886 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
888 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
889 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
890 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
893 cerr << "LowerGlobalAddress: Relocation model other than static not "
902 //! Custom lower double precision floating point constants
904 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
905 MVT VT = Op.getValueType();
907 if (VT == MVT::f64) {
908 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
911 "LowerConstantFP: Node is not ConstantFPSDNode");
913 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
914 SDValue T = DAG.getConstant(dbits, MVT::i64);
915 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
916 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
917 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
926 MachineFunction &MF = DAG.getMachineFunction();
927 MachineFrameInfo *MFI = MF.getFrameInfo();
928 MachineRegisterInfo &RegInfo = MF.getRegInfo();
929 SmallVector<SDValue, 48> ArgValues;
930 SDValue Root = Op.getOperand(0);
931 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
932 DebugLoc dl = Op.getDebugLoc();
934 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
935 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
937 unsigned ArgOffset = SPUFrameInfo::minStackSize();
938 unsigned ArgRegIdx = 0;
939 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
941 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
943 // Add DAG nodes to load the arguments or copy them out of registers.
944 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
945 ArgNo != e; ++ArgNo) {
946 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
947 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
950 if (ArgRegIdx < NumArgRegs) {
951 const TargetRegisterClass *ArgRegClass;
953 switch (ObjectVT.getSimpleVT()) {
955 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
956 << ObjectVT.getMVTString()
961 ArgRegClass = &SPU::R8CRegClass;
964 ArgRegClass = &SPU::R16CRegClass;
967 ArgRegClass = &SPU::R32CRegClass;
970 ArgRegClass = &SPU::R64CRegClass;
973 ArgRegClass = &SPU::GPRCRegClass;
976 ArgRegClass = &SPU::R32FPRegClass;
979 ArgRegClass = &SPU::R64FPRegClass;
987 ArgRegClass = &SPU::VECREGRegClass;
991 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
992 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
993 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
996 // We need to load the argument to a virtual register if we determined
997 // above that we ran out of physical registers of the appropriate type
998 // or we're forced to do vararg
999 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1000 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1001 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1002 ArgOffset += StackSlotSize;
1005 ArgValues.push_back(ArgVal);
1007 Root = ArgVal.getOperand(0);
1012 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1013 // We will spill (79-3)+1 registers to the stack
1014 SmallVector<SDValue, 79-3+1> MemOps;
1016 // Create the frame slot
1018 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1019 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1020 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1021 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1022 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1023 Root = Store.getOperand(0);
1024 MemOps.push_back(Store);
1026 // Increment address by stack slot size for the next stored argument
1027 ArgOffset += StackSlotSize;
1029 if (!MemOps.empty())
1030 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1031 &MemOps[0], MemOps.size());
1034 ArgValues.push_back(Root);
1036 // Return the new list of results.
1037 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1038 &ArgValues[0], ArgValues.size());
1041 /// isLSAAddress - Return the immediate to use if the specified
1042 /// value is representable as a LSA address.
1043 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1044 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1047 int Addr = C->getZExtValue();
1048 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1049 (Addr << 14 >> 14) != Addr)
1050 return 0; // Top 14 bits have to be sext of immediate.
1052 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1056 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1057 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1058 SDValue Chain = TheCall->getChain();
1059 SDValue Callee = TheCall->getCallee();
1060 unsigned NumOps = TheCall->getNumArgs();
1061 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1062 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1063 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1064 DebugLoc dl = TheCall->getDebugLoc();
1066 // Handy pointer type
1067 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1069 // Accumulate how many bytes are to be pushed on the stack, including the
1070 // linkage area, and parameter passing area. According to the SPU ABI,
1071 // we minimally need space for [LR] and [SP]
1072 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1074 // Set up a copy of the stack pointer for use loading and storing any
1075 // arguments that may not fit in the registers available for argument
1077 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1079 // Figure out which arguments are going to go in registers, and which in
1081 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1082 unsigned ArgRegIdx = 0;
1084 // Keep track of registers passing arguments
1085 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1086 // And the arguments passed on the stack
1087 SmallVector<SDValue, 8> MemOpChains;
1089 for (unsigned i = 0; i != NumOps; ++i) {
1090 SDValue Arg = TheCall->getArg(i);
1092 // PtrOff will be used to store the current argument to the stack if a
1093 // register cannot be found for it.
1094 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1095 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1097 switch (Arg.getValueType().getSimpleVT()) {
1098 default: assert(0 && "Unexpected ValueType for argument!");
1104 if (ArgRegIdx != NumArgRegs) {
1105 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1107 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1108 ArgOffset += StackSlotSize;
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1116 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1126 if (ArgRegIdx != NumArgRegs) {
1127 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1130 ArgOffset += StackSlotSize;
1136 // Update number of stack bytes actually used, insert a call sequence start
1137 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1138 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1141 if (!MemOpChains.empty()) {
1142 // Adjust the stack pointer for the stack arguments.
1143 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1144 &MemOpChains[0], MemOpChains.size());
1147 // Build a sequence of copy-to-reg nodes chained together with token chain
1148 // and flag operands which copy the outgoing args into the appropriate regs.
1150 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1151 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1152 RegsToPass[i].second, InFlag);
1153 InFlag = Chain.getValue(1);
1156 SmallVector<SDValue, 8> Ops;
1157 unsigned CallOpc = SPUISD::CALL;
1159 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1160 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1161 // node so that legalize doesn't hack it.
1162 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1163 GlobalValue *GV = G->getGlobal();
1164 MVT CalleeVT = Callee.getValueType();
1165 SDValue Zero = DAG.getConstant(0, PtrVT);
1166 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1168 if (!ST->usingLargeMem()) {
1169 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1170 // style calls, otherwise, external symbols are BRASL calls. This assumes
1171 // that declared/defined symbols are in the same compilation unit and can
1172 // be reached through PC-relative jumps.
1175 // This may be an unsafe assumption for JIT and really large compilation
1177 if (GV->isDeclaration()) {
1178 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1180 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1183 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1185 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1187 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1188 MVT CalleeVT = Callee.getValueType();
1189 SDValue Zero = DAG.getConstant(0, PtrVT);
1190 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1191 Callee.getValueType());
1193 if (!ST->usingLargeMem()) {
1194 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1196 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1198 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199 // If this is an absolute destination address that appears to be a legal
1200 // local store address, use the munged value.
1201 Callee = SDValue(Dest, 0);
1204 Ops.push_back(Chain);
1205 Ops.push_back(Callee);
1207 // Add argument registers to the end of the list so that they are known live
1209 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211 RegsToPass[i].second.getValueType()));
1213 if (InFlag.getNode())
1214 Ops.push_back(InFlag);
1215 // Returns a chain and a flag for retval copy to use.
1216 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1217 &Ops[0], Ops.size());
1218 InFlag = Chain.getValue(1);
1220 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221 DAG.getIntPtrConstant(0, true), InFlag);
1222 if (TheCall->getValueType(0) != MVT::Other)
1223 InFlag = Chain.getValue(1);
1225 SDValue ResultVals[3];
1226 unsigned NumResults = 0;
1228 // If the call has results, copy the values out of the ret val registers.
1229 switch (TheCall->getValueType(0).getSimpleVT()) {
1230 default: assert(0 && "Unexpected ret value!");
1231 case MVT::Other: break;
1233 if (TheCall->getValueType(1) == MVT::i32) {
1234 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1235 MVT::i32, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1237 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1238 Chain.getValue(2)).getValue(1);
1239 ResultVals[1] = Chain.getValue(0);
1242 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1243 InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1249 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1250 InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1255 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1256 InFlag).getValue(1);
1257 ResultVals[0] = Chain.getValue(0);
1262 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1273 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1274 InFlag).getValue(1);
1275 ResultVals[0] = Chain.getValue(0);
1280 // If the function returns void, just return the chain.
1281 if (NumResults == 0)
1284 // Otherwise, merge everything together with a MERGE_VALUES node.
1285 ResultVals[NumResults++] = Chain;
1286 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1287 return Res.getValue(Op.getResNo());
1291 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1292 SmallVector<CCValAssign, 16> RVLocs;
1293 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1294 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1295 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1296 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1298 // If this is the first return lowered for this function, add the regs to the
1299 // liveout set for the function.
1300 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1301 for (unsigned i = 0; i != RVLocs.size(); ++i)
1302 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1305 SDValue Chain = Op.getOperand(0);
1308 // Copy the result values into the output registers.
1309 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1310 CCValAssign &VA = RVLocs[i];
1311 assert(VA.isRegLoc() && "Can only return in registers!");
1312 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1313 Flag = Chain.getValue(1);
1317 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1319 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1323 //===----------------------------------------------------------------------===//
1324 // Vector related lowering:
1325 //===----------------------------------------------------------------------===//
1327 static ConstantSDNode *
1328 getVecImm(SDNode *N) {
1329 SDValue OpVal(0, 0);
1331 // Check to see if this buildvec has a single non-undef value in its elements.
1332 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1333 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1334 if (OpVal.getNode() == 0)
1335 OpVal = N->getOperand(i);
1336 else if (OpVal != N->getOperand(i))
1340 if (OpVal.getNode() != 0) {
1341 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1346 return 0; // All UNDEF: use implicit def.; not Constant node
1349 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1350 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1352 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1354 if (ConstantSDNode *CN = getVecImm(N)) {
1355 uint64_t Value = CN->getZExtValue();
1356 if (ValueType == MVT::i64) {
1357 uint64_t UValue = CN->getZExtValue();
1358 uint32_t upper = uint32_t(UValue >> 32);
1359 uint32_t lower = uint32_t(UValue);
1362 Value = Value >> 32;
1364 if (Value <= 0x3ffff)
1365 return DAG.getTargetConstant(Value, ValueType);
1371 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1372 /// and the value fits into a signed 16-bit constant, and if so, return the
1374 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1376 if (ConstantSDNode *CN = getVecImm(N)) {
1377 int64_t Value = CN->getSExtValue();
1378 if (ValueType == MVT::i64) {
1379 uint64_t UValue = CN->getZExtValue();
1380 uint32_t upper = uint32_t(UValue >> 32);
1381 uint32_t lower = uint32_t(UValue);
1384 Value = Value >> 32;
1386 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1387 return DAG.getTargetConstant(Value, ValueType);
1394 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1395 /// and the value fits into a signed 10-bit constant, and if so, return the
1397 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1399 if (ConstantSDNode *CN = getVecImm(N)) {
1400 int64_t Value = CN->getSExtValue();
1401 if (ValueType == MVT::i64) {
1402 uint64_t UValue = CN->getZExtValue();
1403 uint32_t upper = uint32_t(UValue >> 32);
1404 uint32_t lower = uint32_t(UValue);
1407 Value = Value >> 32;
1409 if (isS10Constant(Value))
1410 return DAG.getTargetConstant(Value, ValueType);
1416 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1417 /// and the value fits into a signed 8-bit constant, and if so, return the
1420 /// @note: The incoming vector is v16i8 because that's the only way we can load
1421 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1423 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1425 if (ConstantSDNode *CN = getVecImm(N)) {
1426 int Value = (int) CN->getZExtValue();
1427 if (ValueType == MVT::i16
1428 && Value <= 0xffff /* truncated from uint64_t */
1429 && ((short) Value >> 8) == ((short) Value & 0xff))
1430 return DAG.getTargetConstant(Value & 0xff, ValueType);
1431 else if (ValueType == MVT::i8
1432 && (Value & 0xff) == Value)
1433 return DAG.getTargetConstant(Value, ValueType);
1439 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1440 /// and the value fits into a signed 16-bit constant, and if so, return the
1442 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1444 if (ConstantSDNode *CN = getVecImm(N)) {
1445 uint64_t Value = CN->getZExtValue();
1446 if ((ValueType == MVT::i32
1447 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1448 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1449 return DAG.getTargetConstant(Value >> 16, ValueType);
1455 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1456 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1457 if (ConstantSDNode *CN = getVecImm(N)) {
1458 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1464 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1465 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1466 if (ConstantSDNode *CN = getVecImm(N)) {
1467 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1473 // If this is a vector of constants or undefs, get the bits. A bit in
1474 // UndefBits is set if the corresponding element of the vector is an
1475 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1476 // zero. Return true if this is not an array of constants, false if it is.
1478 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1479 uint64_t UndefBits[2]) {
1480 // Start with zero'd results.
1481 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1483 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1484 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1485 SDValue OpVal = BV->getOperand(i);
1487 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1488 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1490 uint64_t EltBits = 0;
1491 if (OpVal.getOpcode() == ISD::UNDEF) {
1492 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1493 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1495 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1496 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1497 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1498 const APFloat &apf = CN->getValueAPF();
1499 EltBits = (CN->getValueType(0) == MVT::f32
1500 ? FloatToBits(apf.convertToFloat())
1501 : DoubleToBits(apf.convertToDouble()));
1503 // Nonconstant element.
1507 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1510 //printf("%llx %llx %llx %llx\n",
1511 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1515 /// If this is a splat (repetition) of a value across the whole vector, return
1516 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1517 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1518 /// SplatSize = 1 byte.
1519 static bool isConstantSplat(const uint64_t Bits128[2],
1520 const uint64_t Undef128[2],
1522 uint64_t &SplatBits, uint64_t &SplatUndef,
1524 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1525 // the same as the lower 64-bits, ignoring undefs.
1526 uint64_t Bits64 = Bits128[0] | Bits128[1];
1527 uint64_t Undef64 = Undef128[0] & Undef128[1];
1528 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1529 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1530 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1531 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1533 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1534 if (MinSplatBits < 64) {
1536 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1538 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1539 if (MinSplatBits < 32) {
1541 // If the top 16-bits are different than the lower 16-bits, ignoring
1542 // undefs, we have an i32 splat.
1543 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1544 if (MinSplatBits < 16) {
1545 // If the top 8-bits are different than the lower 8-bits, ignoring
1546 // undefs, we have an i16 splat.
1547 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1548 == ((Bits16 >> 8) & ~Undef16)) {
1549 // Otherwise, we have an 8-bit splat.
1550 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1551 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1557 SplatUndef = Undef16;
1564 SplatUndef = Undef32;
1570 SplatBits = Bits128[0];
1571 SplatUndef = Undef128[0];
1577 return false; // Can't be a splat if two pieces don't match.
1580 //! Lower a BUILD_VECTOR instruction creatively:
1582 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1583 MVT VT = Op.getValueType();
1584 // If this is a vector of constants or undefs, get the bits. A bit in
1585 // UndefBits is set if the corresponding element of the vector is an
1586 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1588 uint64_t VectorBits[2];
1589 uint64_t UndefBits[2];
1590 uint64_t SplatBits, SplatUndef;
1592 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1593 || !isConstantSplat(VectorBits, UndefBits,
1594 VT.getVectorElementType().getSizeInBits(),
1595 SplatBits, SplatUndef, SplatSize))
1596 return SDValue(); // Not a constant vector, not a splat.
1598 switch (VT.getSimpleVT()) {
1600 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1601 << VT.getMVTString()
1606 uint32_t Value32 = uint32_t(SplatBits);
1607 assert(SplatSize == 4
1608 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDValue T = DAG.getConstant(Value32, MVT::i32);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1616 uint64_t f64val = uint64_t(SplatBits);
1617 assert(SplatSize == 8
1618 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1619 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620 SDValue T = DAG.getConstant(f64val, MVT::i64);
1621 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1626 // 8-bit constants have to be expanded to 16-bits
1627 unsigned short Value16 = SplatBits | (SplatBits << 8);
1629 for (int i = 0; i < 8; ++i)
1630 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631 return DAG.getNode(ISD::BIT_CONVERT, VT,
1632 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1635 unsigned short Value16;
1637 Value16 = (unsigned short) (SplatBits & 0xffff);
1639 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1642 for (int i = 0; i < 8; ++i) Ops[i] = T;
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1646 unsigned int Value = SplatBits;
1647 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1651 unsigned int Value = SplatBits;
1652 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1653 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1656 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1664 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1665 uint32_t upper = uint32_t(SplatVal >> 32);
1666 uint32_t lower = uint32_t(SplatVal);
1668 if (upper == lower) {
1669 // Magic constant that can be matched by IL, ILA, et. al.
1670 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1671 return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1672 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1673 Val, Val, Val, Val));
1677 SmallVector<SDValue, 16> ShufBytes;
1679 bool upper_special, lower_special;
1681 // NOTE: This code creates common-case shuffle masks that can be easily
1682 // detected as common expressions. It is not attempting to create highly
1683 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1685 // Detect if the upper or lower half is a special shuffle mask pattern:
1686 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1687 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1689 // Create lower vector if not a special pattern
1690 if (!lower_special) {
1691 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1692 LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1693 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1694 LO32C, LO32C, LO32C, LO32C));
1697 // Create upper vector if not a special pattern
1698 if (!upper_special) {
1699 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1700 HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1701 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1702 HI32C, HI32C, HI32C, HI32C));
1705 // If either upper or lower are special, then the two input operands are
1706 // the same (basically, one of them is a "don't care")
1711 if (lower_special && upper_special) {
1712 // Unhappy situation... both upper and lower are special, so punt with
1713 // a target constant:
1714 SDValue Zero = DAG.getConstant(0, MVT::i32);
1715 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1719 for (int i = 0; i < 4; ++i) {
1721 for (int j = 0; j < 4; ++j) {
1723 bool process_upper, process_lower;
1725 process_upper = (upper_special && (i & 1) == 0);
1726 process_lower = (lower_special && (i & 1) == 1);
1728 if (process_upper || process_lower) {
1729 if ((process_upper && upper == 0)
1730 || (process_lower && lower == 0))
1732 else if ((process_upper && upper == 0xffffffff)
1733 || (process_lower && lower == 0xffffffff))
1735 else if ((process_upper && upper == 0x80000000)
1736 || (process_lower && lower == 0x80000000))
1737 val |= (j == 0 ? 0xe0 : 0x80);
1739 val |= i * 4 + j + ((i & 1) * 16);
1742 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1745 return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1746 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1747 &ShufBytes[0], ShufBytes.size()));
1751 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1752 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1753 /// permutation vector, V3, is monotonically increasing with one "exception"
1754 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1755 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1756 /// In either case, the net result is going to eventually invoke SHUFB to
1757 /// permute/shuffle the bytes from V1 and V2.
1759 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1760 /// control word for byte/halfword/word insertion. This takes care of a single
1761 /// element move from V2 into V1.
1763 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1764 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1765 SDValue V1 = Op.getOperand(0);
1766 SDValue V2 = Op.getOperand(1);
1767 SDValue PermMask = Op.getOperand(2);
1769 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1771 // If we have a single element being moved from V1 to V2, this can be handled
1772 // using the C*[DX] compute mask instructions, but the vector elements have
1773 // to be monotonically increasing with one exception element.
1774 MVT VecVT = V1.getValueType();
1775 MVT EltVT = VecVT.getVectorElementType();
1776 unsigned EltsFromV2 = 0;
1778 unsigned V2EltIdx0 = 0;
1779 unsigned CurrElt = 0;
1780 unsigned MaxElts = VecVT.getVectorNumElements();
1781 unsigned PrevElt = 0;
1783 bool monotonic = true;
1786 if (EltVT == MVT::i8) {
1788 } else if (EltVT == MVT::i16) {
1790 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1792 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1795 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1797 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1798 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1799 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1802 if (SrcElt >= V2EltIdx0) {
1803 if (1 >= (++EltsFromV2)) {
1804 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1806 } else if (CurrElt != SrcElt) {
1814 if (PrevElt > 0 && SrcElt < MaxElts) {
1815 if ((PrevElt == SrcElt - 1)
1816 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1823 } else if (PrevElt == 0) {
1824 // First time through, need to keep track of previous element
1827 // This isn't a rotation, takes elements from vector 2
1834 if (EltsFromV2 == 1 && monotonic) {
1835 // Compute mask and shuffle
1836 MachineFunction &MF = DAG.getMachineFunction();
1837 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1838 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1839 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1840 // Initialize temporary register to 0
1841 SDValue InitTempReg =
1842 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1843 // Copy register's contents as index in SHUFFLE_MASK:
1844 SDValue ShufMaskOp =
1845 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1846 DAG.getTargetConstant(V2Elt, MVT::i32),
1847 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1848 // Use shuffle mask in SHUFB synthetic instruction:
1849 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1850 } else if (rotate) {
1851 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1853 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1854 V1, DAG.getConstant(rotamt, MVT::i16));
1856 // Convert the SHUFFLE_VECTOR mask's input element units to the
1858 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1860 SmallVector<SDValue, 16> ResultMask;
1861 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1863 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1866 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1868 for (unsigned j = 0; j < BytesPerElement; ++j) {
1869 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1874 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1875 &ResultMask[0], ResultMask.size());
1876 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1880 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1881 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1883 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1884 // For a constant, build the appropriate constant vector, which will
1885 // eventually simplify to a vector register load.
1887 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1888 SmallVector<SDValue, 16> ConstVecValues;
1892 // Create a constant vector:
1893 switch (Op.getValueType().getSimpleVT()) {
1894 default: assert(0 && "Unexpected constant value type in "
1895 "LowerSCALAR_TO_VECTOR");
1896 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1897 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1898 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1899 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1900 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1901 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1904 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1905 for (size_t j = 0; j < n_copies; ++j)
1906 ConstVecValues.push_back(CValue);
1908 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1909 &ConstVecValues[0], ConstVecValues.size());
1911 // Otherwise, copy the value from one register to another:
1912 switch (Op0.getValueType().getSimpleVT()) {
1913 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1920 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1927 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1928 MVT VT = Op.getValueType();
1929 SDValue N = Op.getOperand(0);
1930 SDValue Elt = Op.getOperand(1);
1933 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1934 // Constant argument:
1935 int EltNo = (int) C->getZExtValue();
1938 if (VT == MVT::i8 && EltNo >= 16)
1939 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1940 else if (VT == MVT::i16 && EltNo >= 8)
1941 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1942 else if (VT == MVT::i32 && EltNo >= 4)
1943 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1944 else if (VT == MVT::i64 && EltNo >= 2)
1945 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1947 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1948 // i32 and i64: Element 0 is the preferred slot
1949 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1952 // Need to generate shuffle mask and extract:
1953 int prefslot_begin = -1, prefslot_end = -1;
1954 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1956 switch (VT.getSimpleVT()) {
1958 assert(false && "Invalid value type!");
1960 prefslot_begin = prefslot_end = 3;
1964 prefslot_begin = 2; prefslot_end = 3;
1969 prefslot_begin = 0; prefslot_end = 3;
1974 prefslot_begin = 0; prefslot_end = 7;
1979 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1980 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1982 unsigned int ShufBytes[16];
1983 for (int i = 0; i < 16; ++i) {
1984 // zero fill uppper part of preferred slot, don't care about the
1986 unsigned int mask_val;
1987 if (i <= prefslot_end) {
1989 ((i < prefslot_begin)
1991 : elt_byte + (i - prefslot_begin));
1993 ShufBytes[i] = mask_val;
1995 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1998 SDValue ShufMask[4];
1999 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2000 unsigned bidx = i * 4;
2001 unsigned int bits = ((ShufBytes[bidx] << 24) |
2002 (ShufBytes[bidx+1] << 16) |
2003 (ShufBytes[bidx+2] << 8) |
2005 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2008 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2010 sizeof(ShufMask) / sizeof(ShufMask[0]));
2012 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2013 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2014 N, N, ShufMaskVec));
2016 // Variable index: Rotate the requested element into slot 0, then replicate
2017 // slot 0 across the vector
2018 MVT VecVT = N.getValueType();
2019 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2020 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2024 // Make life easier by making sure the index is zero-extended to i32
2025 if (Elt.getValueType() != MVT::i32)
2026 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2028 // Scale the index to a bit/byte shift quantity
2030 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2031 unsigned scaleShift = scaleFactor.logBase2();
2034 if (scaleShift > 0) {
2035 // Scale the shift factor:
2036 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2037 DAG.getConstant(scaleShift, MVT::i32));
2040 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2042 // Replicate the bytes starting at byte 0 across the entire vector (for
2043 // consistency with the notion of a unified register set)
2046 switch (VT.getSimpleVT()) {
2048 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2052 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2053 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2058 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2059 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2065 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2066 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2072 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2073 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2074 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2075 loFactor, hiFactor);
2080 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2081 DAG.getNode(SPUISD::SHUFB, VecVT,
2082 vecShift, vecShift, replicate));
2088 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2089 SDValue VecOp = Op.getOperand(0);
2090 SDValue ValOp = Op.getOperand(1);
2091 SDValue IdxOp = Op.getOperand(2);
2092 MVT VT = Op.getValueType();
2094 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2095 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2097 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2098 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2099 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2100 DAG.getRegister(SPU::R1, PtrVT),
2101 DAG.getConstant(CN->getSExtValue(), PtrVT));
2102 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2105 DAG.getNode(SPUISD::SHUFB, VT,
2106 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2108 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2113 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2114 const TargetLowering &TLI)
2116 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2117 MVT ShiftVT = TLI.getShiftAmountTy();
2119 assert(Op.getValueType() == MVT::i8);
2122 assert(0 && "Unhandled i8 math operator");
2126 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2128 SDValue N1 = Op.getOperand(1);
2129 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2130 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2131 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132 DAG.getNode(Opc, MVT::i16, N0, N1));
2137 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2139 SDValue N1 = Op.getOperand(1);
2140 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2141 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2142 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2143 DAG.getNode(Opc, MVT::i16, N0, N1));
2147 SDValue N1 = Op.getOperand(1);
2149 N0 = (N0.getOpcode() != ISD::Constant
2150 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2151 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2153 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2156 N1 = (N1.getOpcode() != ISD::Constant
2157 ? DAG.getNode(N1Opc, ShiftVT, N1)
2158 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2159 TLI.getShiftAmountTy()));
2161 DAG.getNode(ISD::OR, MVT::i16, N0,
2162 DAG.getNode(ISD::SHL, MVT::i16,
2163 N0, DAG.getConstant(8, MVT::i32)));
2164 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2165 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2169 SDValue N1 = Op.getOperand(1);
2171 N0 = (N0.getOpcode() != ISD::Constant
2172 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2173 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2175 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2178 N1 = (N1.getOpcode() != ISD::Constant
2179 ? DAG.getNode(N1Opc, ShiftVT, N1)
2180 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2181 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2182 DAG.getNode(Opc, MVT::i16, N0, N1));
2185 SDValue N1 = Op.getOperand(1);
2187 N0 = (N0.getOpcode() != ISD::Constant
2188 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2189 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2191 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2194 N1 = (N1.getOpcode() != ISD::Constant
2195 ? DAG.getNode(N1Opc, ShiftVT, N1)
2196 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2198 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2199 DAG.getNode(Opc, MVT::i16, N0, N1));
2202 SDValue N1 = Op.getOperand(1);
2204 N0 = (N0.getOpcode() != ISD::Constant
2205 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2206 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2208 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2209 N1 = (N1.getOpcode() != ISD::Constant
2210 ? DAG.getNode(N1Opc, MVT::i16, N1)
2211 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2213 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2214 DAG.getNode(Opc, MVT::i16, N0, N1));
2222 //! Generate the carry-generate shuffle mask.
2223 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2224 SmallVector<SDValue, 16 > ShufBytes;
2226 // Create the shuffle mask for "rotating" the borrow up one register slot
2227 // once the borrow is generated.
2228 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2229 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2230 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2231 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2233 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2234 &ShufBytes[0], ShufBytes.size());
2237 //! Generate the borrow-generate shuffle mask
2238 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2239 SmallVector<SDValue, 16 > ShufBytes;
2241 // Create the shuffle mask for "rotating" the borrow up one register slot
2242 // once the borrow is generated.
2243 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2244 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2245 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2246 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2248 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2249 &ShufBytes[0], ShufBytes.size());
2252 //! Lower byte immediate operations for v16i8 vectors:
2254 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2257 MVT VT = Op.getValueType();
2259 ConstVec = Op.getOperand(0);
2260 Arg = Op.getOperand(1);
2261 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2262 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2263 ConstVec = ConstVec.getOperand(0);
2265 ConstVec = Op.getOperand(1);
2266 Arg = Op.getOperand(0);
2267 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2268 ConstVec = ConstVec.getOperand(0);
2273 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2274 uint64_t VectorBits[2];
2275 uint64_t UndefBits[2];
2276 uint64_t SplatBits, SplatUndef;
2279 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2280 && isConstantSplat(VectorBits, UndefBits,
2281 VT.getVectorElementType().getSizeInBits(),
2282 SplatBits, SplatUndef, SplatSize)) {
2284 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2285 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2287 // Turn the BUILD_VECTOR into a set of target constants:
2288 for (size_t i = 0; i < tcVecSize; ++i)
2291 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2292 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2296 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2297 // lowered. Return the operation, rather than a null SDValue.
2301 //! Custom lowering for CTPOP (count population)
2303 Custom lowering code that counts the number ones in the input
2304 operand. SPU has such an instruction, but it counts the number of
2305 ones per byte, which then have to be accumulated.
2307 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2308 MVT VT = Op.getValueType();
2309 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2311 switch (VT.getSimpleVT()) {
2313 assert(false && "Invalid value type!");
2315 SDValue N = Op.getOperand(0);
2316 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2318 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2319 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2321 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2325 MachineFunction &MF = DAG.getMachineFunction();
2326 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2328 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2330 SDValue N = Op.getOperand(0);
2331 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2332 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2333 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2335 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2336 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2338 // CNTB_result becomes the chain to which all of the virtual registers
2339 // CNTB_reg, SUM1_reg become associated:
2340 SDValue CNTB_result =
2341 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2343 SDValue CNTB_rescopy =
2344 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2346 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2348 return DAG.getNode(ISD::AND, MVT::i16,
2349 DAG.getNode(ISD::ADD, MVT::i16,
2350 DAG.getNode(ISD::SRL, MVT::i16,
2357 MachineFunction &MF = DAG.getMachineFunction();
2358 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2360 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2361 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2363 SDValue N = Op.getOperand(0);
2364 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2365 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2366 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2367 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2369 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2370 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2372 // CNTB_result becomes the chain to which all of the virtual registers
2373 // CNTB_reg, SUM1_reg become associated:
2374 SDValue CNTB_result =
2375 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2377 SDValue CNTB_rescopy =
2378 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2381 DAG.getNode(ISD::SRL, MVT::i32,
2382 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2385 DAG.getNode(ISD::ADD, MVT::i32,
2386 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2388 SDValue Sum1_rescopy =
2389 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2392 DAG.getNode(ISD::SRL, MVT::i32,
2393 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2396 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2397 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2399 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2409 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2411 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2412 All conversions to i64 are expanded to a libcall.
2414 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2415 SPUTargetLowering &TLI) {
2416 MVT OpVT = Op.getValueType();
2417 SDValue Op0 = Op.getOperand(0);
2418 MVT Op0VT = Op0.getValueType();
2420 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2421 || OpVT == MVT::i64) {
2422 // Convert f32 / f64 to i32 / i64 via libcall.
2424 (Op.getOpcode() == ISD::FP_TO_SINT)
2425 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2426 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2427 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2429 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2432 return Op; // return unmolested, legalized op
2435 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2437 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2438 All conversions from i64 are expanded to a libcall.
2440 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2441 SPUTargetLowering &TLI) {
2442 MVT OpVT = Op.getValueType();
2443 SDValue Op0 = Op.getOperand(0);
2444 MVT Op0VT = Op0.getValueType();
2446 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2447 || Op0VT == MVT::i64) {
2448 // Convert i32, i64 to f64 via libcall:
2450 (Op.getOpcode() == ISD::SINT_TO_FP)
2451 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2452 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2453 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2455 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2458 return Op; // return unmolested, legalized
2461 //! Lower ISD::SETCC
2463 This handles MVT::f64 (double floating point) condition lowering
2465 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2466 const TargetLowering &TLI) {
2467 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2468 DebugLoc dl = Op.getNode()->getDebugLoc();
2469 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2471 SDValue lhs = Op.getOperand(0);
2472 SDValue rhs = Op.getOperand(1);
2473 MVT lhsVT = lhs.getValueType();
2474 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2476 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2477 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2478 MVT IntVT(MVT::i64);
2480 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2481 // selected to a NOP:
2482 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2484 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2485 DAG.getNode(ISD::SRL, dl, IntVT,
2486 i64lhs, DAG.getConstant(32, MVT::i32)));
2487 SDValue lhsHi32abs =
2488 DAG.getNode(ISD::AND, dl, MVT::i32,
2489 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2491 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2493 // SETO and SETUO only use the lhs operand:
2494 if (CC->get() == ISD::SETO) {
2495 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2497 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2498 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2499 DAG.getSetCC(dl, ccResultVT,
2500 lhs, DAG.getConstantFP(0.0, lhsVT),
2502 DAG.getConstant(ccResultAllOnes, ccResultVT));
2503 } else if (CC->get() == ISD::SETUO) {
2504 // Evaluates to true if Op0 is [SQ]NaN
2505 return DAG.getNode(ISD::AND, dl, ccResultVT,
2506 DAG.getSetCC(dl, ccResultVT,
2508 DAG.getConstant(0x7ff00000, MVT::i32),
2510 DAG.getSetCC(dl, ccResultVT,
2512 DAG.getConstant(0, MVT::i32),
2516 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2518 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2519 DAG.getNode(ISD::SRL, dl, IntVT,
2520 i64rhs, DAG.getConstant(32, MVT::i32)));
2522 // If a value is negative, subtract from the sign magnitude constant:
2523 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2525 // Convert the sign-magnitude representation into 2's complement:
2526 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2527 lhsHi32, DAG.getConstant(31, MVT::i32));
2528 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2530 DAG.getNode(ISD::SELECT, dl, IntVT,
2531 lhsSelectMask, lhsSignMag2TC, i64lhs);
2533 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2534 rhsHi32, DAG.getConstant(31, MVT::i32));
2535 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2537 DAG.getNode(ISD::SELECT, dl, IntVT,
2538 rhsSelectMask, rhsSignMag2TC, i64rhs);
2542 switch (CC->get()) {
2545 compareOp = ISD::SETEQ; break;
2548 compareOp = ISD::SETGT; break;
2551 compareOp = ISD::SETGE; break;
2554 compareOp = ISD::SETLT; break;
2557 compareOp = ISD::SETLE; break;
2560 compareOp = ISD::SETNE; break;
2562 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2568 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2569 (ISD::CondCode) compareOp);
2571 if ((CC->get() & 0x8) == 0) {
2572 // Ordered comparison:
2573 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2574 lhs, DAG.getConstantFP(0.0, MVT::f64),
2576 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2577 rhs, DAG.getConstantFP(0.0, MVT::f64),
2579 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2581 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2587 //! Lower ISD::SELECT_CC
2589 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2592 \note Need to revisit this in the future: if the code path through the true
2593 and false value computations is longer than the latency of a branch (6
2594 cycles), then it would be more advantageous to branch and insert a new basic
2595 block and branch on the condition. However, this code does not make that
2596 assumption, given the simplisitc uses so far.
2599 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2600 const TargetLowering &TLI) {
2601 MVT VT = Op.getValueType();
2602 SDValue lhs = Op.getOperand(0);
2603 SDValue rhs = Op.getOperand(1);
2604 SDValue trueval = Op.getOperand(2);
2605 SDValue falseval = Op.getOperand(3);
2606 SDValue condition = Op.getOperand(4);
2608 // NOTE: SELB's arguments: $rA, $rB, $mask
2610 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2611 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2612 // condition was true and 0s where the condition was false. Hence, the
2613 // arguments to SELB get reversed.
2615 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2616 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2617 // with another "cannot select select_cc" assert:
2619 SDValue compare = DAG.getNode(ISD::SETCC,
2620 TLI.getSetCCResultType(Op.getValueType()),
2621 lhs, rhs, condition);
2622 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2625 //! Custom lower ISD::TRUNCATE
2626 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2628 MVT VT = Op.getValueType();
2629 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2630 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2632 SDValue Op0 = Op.getOperand(0);
2633 MVT Op0VT = Op0.getValueType();
2634 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2636 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2637 // Create shuffle mask, least significant doubleword of quadword
2638 unsigned maskHigh = 0x08090a0b;
2639 unsigned maskLow = 0x0c0d0e0f;
2640 // Use a shuffle to perform the truncation
2641 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2642 DAG.getConstant(maskHigh, MVT::i32),
2643 DAG.getConstant(maskLow, MVT::i32),
2644 DAG.getConstant(maskHigh, MVT::i32),
2645 DAG.getConstant(maskLow, MVT::i32));
2648 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2650 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2651 PromoteScalar, PromoteScalar, shufMask);
2653 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2654 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2657 return SDValue(); // Leave the truncate unmolested
2660 //! Custom (target-specific) lowering entry point
2662 This is where LLVM's DAG selection process calls to do target-specific
2666 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2668 unsigned Opc = (unsigned) Op.getOpcode();
2669 MVT VT = Op.getValueType();
2673 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2674 cerr << "Op.getOpcode() = " << Opc << "\n";
2675 cerr << "*Op.getNode():\n";
2676 Op.getNode()->dump();
2683 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2685 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2686 case ISD::ConstantPool:
2687 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2688 case ISD::GlobalAddress:
2689 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2690 case ISD::JumpTable:
2691 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2692 case ISD::ConstantFP:
2693 return LowerConstantFP(Op, DAG);
2694 case ISD::FORMAL_ARGUMENTS:
2695 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2697 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2699 return LowerRET(Op, DAG, getTargetMachine());
2701 // i8, i64 math ops:
2710 return LowerI8Math(Op, DAG, Opc, *this);
2714 case ISD::FP_TO_SINT:
2715 case ISD::FP_TO_UINT:
2716 return LowerFP_TO_INT(Op, DAG, *this);
2718 case ISD::SINT_TO_FP:
2719 case ISD::UINT_TO_FP:
2720 return LowerINT_TO_FP(Op, DAG, *this);
2722 // Vector-related lowering.
2723 case ISD::BUILD_VECTOR:
2724 return LowerBUILD_VECTOR(Op, DAG);
2725 case ISD::SCALAR_TO_VECTOR:
2726 return LowerSCALAR_TO_VECTOR(Op, DAG);
2727 case ISD::VECTOR_SHUFFLE:
2728 return LowerVECTOR_SHUFFLE(Op, DAG);
2729 case ISD::EXTRACT_VECTOR_ELT:
2730 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2731 case ISD::INSERT_VECTOR_ELT:
2732 return LowerINSERT_VECTOR_ELT(Op, DAG);
2734 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2738 return LowerByteImmed(Op, DAG);
2740 // Vector and i8 multiply:
2743 return LowerI8Math(Op, DAG, Opc, *this);
2746 return LowerCTPOP(Op, DAG);
2748 case ISD::SELECT_CC:
2749 return LowerSELECT_CC(Op, DAG, *this);
2752 return LowerSETCC(Op, DAG, *this);
2755 return LowerTRUNCATE(Op, DAG);
2761 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2762 SmallVectorImpl<SDValue>&Results,
2766 unsigned Opc = (unsigned) N->getOpcode();
2767 MVT OpVT = N->getValueType(0);
2771 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2772 cerr << "Op.getOpcode() = " << Opc << "\n";
2773 cerr << "*Op.getNode():\n";
2781 /* Otherwise, return unchanged */
2784 //===----------------------------------------------------------------------===//
2785 // Target Optimization Hooks
2786 //===----------------------------------------------------------------------===//
2789 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2792 TargetMachine &TM = getTargetMachine();
2794 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2795 SelectionDAG &DAG = DCI.DAG;
2796 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2797 MVT NodeVT = N->getValueType(0); // The node's value type
2798 MVT Op0VT = Op0.getValueType(); // The first operand's result
2799 SDValue Result; // Initially, empty result
2801 switch (N->getOpcode()) {
2804 SDValue Op1 = N->getOperand(1);
2806 if (Op0.getOpcode() == SPUISD::IndirectAddr
2807 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2808 // Normalize the operands to reduce repeated code
2809 SDValue IndirectArg = Op0, AddArg = Op1;
2811 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2816 if (isa<ConstantSDNode>(AddArg)) {
2817 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2818 SDValue IndOp1 = IndirectArg.getOperand(1);
2820 if (CN0->isNullValue()) {
2821 // (add (SPUindirect <arg>, <arg>), 0) ->
2822 // (SPUindirect <arg>, <arg>)
2824 #if !defined(NDEBUG)
2825 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2827 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2828 << "With: (SPUindirect <arg>, <arg>)\n";
2833 } else if (isa<ConstantSDNode>(IndOp1)) {
2834 // (add (SPUindirect <arg>, <const>), <const>) ->
2835 // (SPUindirect <arg>, <const + const>)
2836 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2837 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2838 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2840 #if !defined(NDEBUG)
2841 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2843 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2844 << "), " << CN0->getSExtValue() << ")\n"
2845 << "With: (SPUindirect <arg>, "
2846 << combinedConst << ")\n";
2850 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2851 IndirectArg, combinedValue);
2857 case ISD::SIGN_EXTEND:
2858 case ISD::ZERO_EXTEND:
2859 case ISD::ANY_EXTEND: {
2860 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2861 // (any_extend (SPUextract_elt0 <arg>)) ->
2862 // (SPUextract_elt0 <arg>)
2863 // Types must match, however...
2864 #if !defined(NDEBUG)
2865 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2866 cerr << "\nReplace: ";
2869 Op0.getNode()->dump(&DAG);
2878 case SPUISD::IndirectAddr: {
2879 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2880 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2881 if (CN != 0 && CN->getZExtValue() == 0) {
2882 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2883 // (SPUaform <addr>, 0)
2885 DEBUG(cerr << "Replace: ");
2886 DEBUG(N->dump(&DAG));
2887 DEBUG(cerr << "\nWith: ");
2888 DEBUG(Op0.getNode()->dump(&DAG));
2889 DEBUG(cerr << "\n");
2893 } else if (Op0.getOpcode() == ISD::ADD) {
2894 SDValue Op1 = N->getOperand(1);
2895 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2896 // (SPUindirect (add <arg>, <arg>), 0) ->
2897 // (SPUindirect <arg>, <arg>)
2898 if (CN1->isNullValue()) {
2900 #if !defined(NDEBUG)
2901 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2903 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2904 << "With: (SPUindirect <arg>, <arg>)\n";
2908 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2909 Op0.getOperand(0), Op0.getOperand(1));
2915 case SPUISD::SHLQUAD_L_BITS:
2916 case SPUISD::SHLQUAD_L_BYTES:
2917 case SPUISD::VEC_SHL:
2918 case SPUISD::VEC_SRL:
2919 case SPUISD::VEC_SRA:
2920 case SPUISD::ROTBYTES_LEFT: {
2921 SDValue Op1 = N->getOperand(1);
2923 // Kill degenerate vector shifts:
2924 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2925 if (CN->isNullValue()) {
2931 case SPUISD::PREFSLOT2VEC: {
2932 switch (Op0.getOpcode()) {
2935 case ISD::ANY_EXTEND:
2936 case ISD::ZERO_EXTEND:
2937 case ISD::SIGN_EXTEND: {
2938 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2940 // but only if the SPUprefslot2vec and <arg> types match.
2941 SDValue Op00 = Op0.getOperand(0);
2942 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2943 SDValue Op000 = Op00.getOperand(0);
2944 if (Op000.getValueType() == NodeVT) {
2950 case SPUISD::VEC2PREFSLOT: {
2951 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2953 Result = Op0.getOperand(0);
2961 // Otherwise, return unchanged.
2963 if (Result.getNode()) {
2964 DEBUG(cerr << "\nReplace.SPU: ");
2965 DEBUG(N->dump(&DAG));
2966 DEBUG(cerr << "\nWith: ");
2967 DEBUG(Result.getNode()->dump(&DAG));
2968 DEBUG(cerr << "\n");
2975 //===----------------------------------------------------------------------===//
2976 // Inline Assembly Support
2977 //===----------------------------------------------------------------------===//
2979 /// getConstraintType - Given a constraint letter, return the type of
2980 /// constraint it is for this target.
2981 SPUTargetLowering::ConstraintType
2982 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2983 if (ConstraintLetter.size() == 1) {
2984 switch (ConstraintLetter[0]) {
2991 return C_RegisterClass;
2994 return TargetLowering::getConstraintType(ConstraintLetter);
2997 std::pair<unsigned, const TargetRegisterClass*>
2998 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3001 if (Constraint.size() == 1) {
3002 // GCC RS6000 Constraint Letters
3003 switch (Constraint[0]) {
3007 return std::make_pair(0U, SPU::R64CRegisterClass);
3008 return std::make_pair(0U, SPU::R32CRegisterClass);
3011 return std::make_pair(0U, SPU::R32FPRegisterClass);
3012 else if (VT == MVT::f64)
3013 return std::make_pair(0U, SPU::R64FPRegisterClass);
3016 return std::make_pair(0U, SPU::GPRCRegisterClass);
3020 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3023 //! Compute used/known bits for a SPU operand
3025 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3029 const SelectionDAG &DAG,
3030 unsigned Depth ) const {
3032 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3034 switch (Op.getOpcode()) {
3036 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3042 case SPUISD::PREFSLOT2VEC:
3043 case SPUISD::LDRESULT:
3044 case SPUISD::VEC2PREFSLOT:
3045 case SPUISD::SHLQUAD_L_BITS:
3046 case SPUISD::SHLQUAD_L_BYTES:
3047 case SPUISD::VEC_SHL:
3048 case SPUISD::VEC_SRL:
3049 case SPUISD::VEC_SRA:
3050 case SPUISD::VEC_ROTL:
3051 case SPUISD::VEC_ROTR:
3052 case SPUISD::ROTBYTES_LEFT:
3053 case SPUISD::SELECT_MASK:
3060 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3061 unsigned Depth) const {
3062 switch (Op.getOpcode()) {
3067 MVT VT = Op.getValueType();
3069 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3072 return VT.getSizeInBits();
3077 // LowerAsmOperandForConstraint
3079 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3080 char ConstraintLetter,
3082 std::vector<SDValue> &Ops,
3083 SelectionDAG &DAG) const {
3084 // Default, for the time being, to the base class handler
3085 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3089 /// isLegalAddressImmediate - Return true if the integer value can be used
3090 /// as the offset of the target addressing mode.
3091 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3092 const Type *Ty) const {
3093 // SPU's addresses are 256K:
3094 return (V > -(1 << 18) && V < (1 << 18) - 1);
3097 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3102 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3103 // The SPU target isn't yet aware of offsets.