2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Target/TargetOptions.h"
40 // Used in getTargetNodeName() below
42 std::map<unsigned, const char *> node_names;
44 //! MVT mapping to useful data for Cell SPU
45 struct valtype_map_s {
47 const int prefslot_byte;
50 const valtype_map_s valtype_map[] = {
61 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
63 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
64 const valtype_map_s *retval = 0;
66 for (size_t i = 0; i < n_valtype_map; ++i) {
67 if (valtype_map[i].valtype == VT) {
68 retval = valtype_map + i;
76 raw_string_ostream Msg(msg);
77 Msg << "getValueTypeMapEntry returns NULL for "
79 llvm_report_error(Msg.str());
86 //! Expand a library call into an actual call DAG node
89 This code is taken from SelectionDAGLegalize, since it is not exposed as
90 part of the LLVM SelectionDAG API.
94 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
95 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
96 // The input chain to this libcall is the entry node of the function.
97 // Legalizing the call will automatically add the previous call to the
99 SDValue InChain = DAG.getEntryNode();
101 TargetLowering::ArgListTy Args;
102 TargetLowering::ArgListEntry Entry;
103 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
104 MVT ArgVT = Op.getOperand(i).getValueType();
105 const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext());
106 Entry.Node = Op.getOperand(i);
108 Entry.isSExt = isSigned;
109 Entry.isZExt = !isSigned;
110 Args.push_back(Entry);
112 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
115 // Splice the libcall in wherever FindInputOutputChains tells us to.
117 Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext());
118 std::pair<SDValue, SDValue> CallInfo =
119 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
120 0, CallingConv::C, false, Callee, Args, DAG,
123 return CallInfo.first;
127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
128 : TargetLowering(TM),
131 // Fold away setcc operations if possible.
134 // Use _setjmp/_longjmp instead of setjmp/longjmp.
135 setUseUnderscoreSetJmp(true);
136 setUseUnderscoreLongJmp(true);
138 // Set RTLIB libcall names as used by SPU:
139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
141 // Set up the SPU's register classes:
142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
150 // SPU has no sign or zero extended loads for i1, i8, i16:
151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
163 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
165 // SPU constant load actions are custom lowered:
166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
169 // SPU's loads and stores have to be custom lowered:
170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
172 MVT VT = (MVT::SimpleValueType)sctype;
174 setOperationAction(ISD::LOAD, VT, Custom);
175 setOperationAction(ISD::STORE, VT, Custom);
176 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181 MVT StoreVT = (MVT::SimpleValueType) stype;
182 setTruncStoreAction(VT, StoreVT, Expand);
186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
188 MVT VT = (MVT::SimpleValueType) sctype;
190 setOperationAction(ISD::LOAD, VT, Custom);
191 setOperationAction(ISD::STORE, VT, Custom);
193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194 MVT StoreVT = (MVT::SimpleValueType) stype;
195 setTruncStoreAction(VT, StoreVT, Expand);
199 // Expand the jumptable branches
200 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
201 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
203 // Custom lower SELECT_CC for most cases, but expand by default
204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
210 // SPU has no intrinsics for these particular operations:
211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
213 // SPU has no division/remainder instructions
214 setOperationAction(ISD::SREM, MVT::i8, Expand);
215 setOperationAction(ISD::UREM, MVT::i8, Expand);
216 setOperationAction(ISD::SDIV, MVT::i8, Expand);
217 setOperationAction(ISD::UDIV, MVT::i8, Expand);
218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
220 setOperationAction(ISD::SREM, MVT::i16, Expand);
221 setOperationAction(ISD::UREM, MVT::i16, Expand);
222 setOperationAction(ISD::SDIV, MVT::i16, Expand);
223 setOperationAction(ISD::UDIV, MVT::i16, Expand);
224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
226 setOperationAction(ISD::SREM, MVT::i32, Expand);
227 setOperationAction(ISD::UREM, MVT::i32, Expand);
228 setOperationAction(ISD::SDIV, MVT::i32, Expand);
229 setOperationAction(ISD::UDIV, MVT::i32, Expand);
230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
232 setOperationAction(ISD::SREM, MVT::i64, Expand);
233 setOperationAction(ISD::UREM, MVT::i64, Expand);
234 setOperationAction(ISD::SDIV, MVT::i64, Expand);
235 setOperationAction(ISD::UDIV, MVT::i64, Expand);
236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
238 setOperationAction(ISD::SREM, MVT::i128, Expand);
239 setOperationAction(ISD::UREM, MVT::i128, Expand);
240 setOperationAction(ISD::SDIV, MVT::i128, Expand);
241 setOperationAction(ISD::UDIV, MVT::i128, Expand);
242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
245 // We don't support sin/cos/sqrt/fmod
246 setOperationAction(ISD::FSIN , MVT::f64, Expand);
247 setOperationAction(ISD::FCOS , MVT::f64, Expand);
248 setOperationAction(ISD::FREM , MVT::f64, Expand);
249 setOperationAction(ISD::FSIN , MVT::f32, Expand);
250 setOperationAction(ISD::FCOS , MVT::f32, Expand);
251 setOperationAction(ISD::FREM , MVT::f32, Expand);
253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
255 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
261 // SPU can do rotate right and left, so legalize it... but customize for i8
262 // because instructions don't exist.
264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
270 setOperationAction(ISD::ROTL, MVT::i32, Legal);
271 setOperationAction(ISD::ROTL, MVT::i16, Legal);
272 setOperationAction(ISD::ROTL, MVT::i8, Custom);
274 // SPU has no native version of shift left/right for i8
275 setOperationAction(ISD::SHL, MVT::i8, Custom);
276 setOperationAction(ISD::SRL, MVT::i8, Custom);
277 setOperationAction(ISD::SRA, MVT::i8, Custom);
279 // Make these operations legal and handle them during instruction selection:
280 setOperationAction(ISD::SHL, MVT::i64, Legal);
281 setOperationAction(ISD::SRL, MVT::i64, Legal);
282 setOperationAction(ISD::SRA, MVT::i64, Legal);
284 // Custom lower i8, i32 and i64 multiplications
285 setOperationAction(ISD::MUL, MVT::i8, Custom);
286 setOperationAction(ISD::MUL, MVT::i32, Legal);
287 setOperationAction(ISD::MUL, MVT::i64, Legal);
289 // Expand double-width multiplication
290 // FIXME: It would probably be reasonable to support some of these operations
291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
293 setOperationAction(ISD::MULHU, MVT::i8, Expand);
294 setOperationAction(ISD::MULHS, MVT::i8, Expand);
295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297 setOperationAction(ISD::MULHU, MVT::i16, Expand);
298 setOperationAction(ISD::MULHS, MVT::i16, Expand);
299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301 setOperationAction(ISD::MULHU, MVT::i32, Expand);
302 setOperationAction(ISD::MULHS, MVT::i32, Expand);
303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305 setOperationAction(ISD::MULHU, MVT::i64, Expand);
306 setOperationAction(ISD::MULHS, MVT::i64, Expand);
308 // Need to custom handle (some) common i8, i64 math ops
309 setOperationAction(ISD::ADD, MVT::i8, Custom);
310 setOperationAction(ISD::ADD, MVT::i64, Legal);
311 setOperationAction(ISD::SUB, MVT::i8, Custom);
312 setOperationAction(ISD::SUB, MVT::i64, Legal);
314 // SPU does not have BSWAP. It does have i32 support CTLZ.
315 // CTPOP has to be custom lowered.
316 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
317 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
319 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
323 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
329 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
331 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
333 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
334 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
335 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
337 // SPU has a version of select that implements (a&~c)|(b&c), just like
338 // select ought to work:
339 setOperationAction(ISD::SELECT, MVT::i8, Legal);
340 setOperationAction(ISD::SELECT, MVT::i16, Legal);
341 setOperationAction(ISD::SELECT, MVT::i32, Legal);
342 setOperationAction(ISD::SELECT, MVT::i64, Legal);
344 setOperationAction(ISD::SETCC, MVT::i8, Legal);
345 setOperationAction(ISD::SETCC, MVT::i16, Legal);
346 setOperationAction(ISD::SETCC, MVT::i32, Legal);
347 setOperationAction(ISD::SETCC, MVT::i64, Legal);
348 setOperationAction(ISD::SETCC, MVT::f64, Custom);
350 // Custom lower i128 -> i64 truncates
351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
353 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
354 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
355 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
356 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
357 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
358 // to expand to a libcall, hence the custom lowering:
359 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
362 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
363 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
364 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
366 // FDIV on SPU requires custom lowering
367 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
369 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
370 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
371 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
372 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
373 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
374 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
375 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
376 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
379 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
380 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
381 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
382 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
384 // We cannot sextinreg(i1). Expand to shifts.
385 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
387 // Support label based line numbers.
388 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
389 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
391 // We want to legalize GlobalAddress and ConstantPool nodes into the
392 // appropriate instructions to materialize the address.
393 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
395 MVT VT = (MVT::SimpleValueType)sctype;
397 setOperationAction(ISD::GlobalAddress, VT, Custom);
398 setOperationAction(ISD::ConstantPool, VT, Custom);
399 setOperationAction(ISD::JumpTable, VT, Custom);
402 // RET must be custom lowered, to meet ABI requirements
403 setOperationAction(ISD::RET, MVT::Other, Custom);
405 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
406 setOperationAction(ISD::VASTART , MVT::Other, Custom);
408 // Use the default implementation.
409 setOperationAction(ISD::VAARG , MVT::Other, Expand);
410 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
411 setOperationAction(ISD::VAEND , MVT::Other, Expand);
412 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
413 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
414 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
415 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
417 // Cell SPU has instructions for converting between i64 and fp.
418 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
419 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
421 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
422 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
424 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
425 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
427 // First set operation action for all vector types to expand. Then we
428 // will selectively turn on ones that can be effectively codegen'd.
429 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
431 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
432 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
433 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
434 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
436 // "Odd size" vector classes that we're willing to support:
437 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
439 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
440 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
441 MVT VT = (MVT::SimpleValueType)i;
443 // add/sub are legal for all supported vector VT's.
444 setOperationAction(ISD::ADD, VT, Legal);
445 setOperationAction(ISD::SUB, VT, Legal);
446 // mul has to be custom lowered.
447 setOperationAction(ISD::MUL, VT, Legal);
449 setOperationAction(ISD::AND, VT, Legal);
450 setOperationAction(ISD::OR, VT, Legal);
451 setOperationAction(ISD::XOR, VT, Legal);
452 setOperationAction(ISD::LOAD, VT, Legal);
453 setOperationAction(ISD::SELECT, VT, Legal);
454 setOperationAction(ISD::STORE, VT, Legal);
456 // These operations need to be expanded:
457 setOperationAction(ISD::SDIV, VT, Expand);
458 setOperationAction(ISD::SREM, VT, Expand);
459 setOperationAction(ISD::UDIV, VT, Expand);
460 setOperationAction(ISD::UREM, VT, Expand);
462 // Custom lower build_vector, constant pool spills, insert and
463 // extract vector elements:
464 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
465 setOperationAction(ISD::ConstantPool, VT, Custom);
466 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
467 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
468 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
469 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
472 setOperationAction(ISD::AND, MVT::v16i8, Custom);
473 setOperationAction(ISD::OR, MVT::v16i8, Custom);
474 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
475 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
477 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
479 setShiftAmountType(MVT::i32);
480 setBooleanContents(ZeroOrNegativeOneBooleanContent);
482 setStackPointerRegisterToSaveRestore(SPU::R1);
484 // We have target-specific dag combine patterns for the following nodes:
485 setTargetDAGCombine(ISD::ADD);
486 setTargetDAGCombine(ISD::ZERO_EXTEND);
487 setTargetDAGCombine(ISD::SIGN_EXTEND);
488 setTargetDAGCombine(ISD::ANY_EXTEND);
490 computeRegisterProperties();
492 // Set pre-RA register scheduler default to BURR, which produces slightly
493 // better code than the default (could also be TDRR, but TargetLowering.h
494 // needs a mod to support that model):
495 setSchedulingPreference(SchedulingForRegPressure);
499 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
501 if (node_names.empty()) {
502 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
503 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
504 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
505 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
506 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
507 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
508 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
509 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
510 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
511 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
512 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
513 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
514 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
515 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
516 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
517 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
518 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
519 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
520 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
521 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
522 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
523 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
524 "SPUISD::ROTBYTES_LEFT_BITS";
525 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
526 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
527 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
528 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
529 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
532 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
534 return ((i != node_names.end()) ? i->second : 0);
537 /// getFunctionAlignment - Return the Log2 alignment of this function.
538 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
542 //===----------------------------------------------------------------------===//
543 // Return the Cell SPU's SETCC result type
544 //===----------------------------------------------------------------------===//
546 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
547 // i16 and i32 are valid SETCC result types
548 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
551 //===----------------------------------------------------------------------===//
552 // Calling convention code:
553 //===----------------------------------------------------------------------===//
555 #include "SPUGenCallingConv.inc"
557 //===----------------------------------------------------------------------===//
558 // LowerOperation implementation
559 //===----------------------------------------------------------------------===//
561 /// Custom lower loads for CellSPU
563 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
564 within a 16-byte block, we have to rotate to extract the requested element.
566 For extending loads, we also want to ensure that the following sequence is
567 emitted, e.g. for MVT::f32 extending load to MVT::f64:
571 %2 v16i8,ch = rotate %1
572 %3 v4f8, ch = bitconvert %2
573 %4 f32 = vec2perfslot %3
574 %5 f64 = fp_extend %4
578 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
579 LoadSDNode *LN = cast<LoadSDNode>(Op);
580 SDValue the_chain = LN->getChain();
581 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
582 MVT InVT = LN->getMemoryVT();
583 MVT OutVT = Op.getValueType();
584 ISD::LoadExtType ExtType = LN->getExtensionType();
585 unsigned alignment = LN->getAlignment();
586 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
587 DebugLoc dl = Op.getDebugLoc();
589 switch (LN->getAddressingMode()) {
590 case ISD::UNINDEXED: {
592 SDValue basePtr = LN->getBasePtr();
595 if (alignment == 16) {
598 // Special cases for a known aligned load to simplify the base pointer
599 // and the rotation amount:
600 if (basePtr.getOpcode() == ISD::ADD
601 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
602 // Known offset into basePtr
603 int64_t offset = CN->getSExtValue();
604 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
609 rotate = DAG.getConstant(rotamt, MVT::i16);
611 // Simplify the base pointer for this case:
612 basePtr = basePtr.getOperand(0);
613 if ((offset & ~0xf) > 0) {
614 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
616 DAG.getConstant((offset & ~0xf), PtrVT));
618 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
619 || (basePtr.getOpcode() == SPUISD::IndirectAddr
620 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
621 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
622 // Plain aligned a-form address: rotate into preferred slot
623 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
624 int64_t rotamt = -vtm->prefslot_byte;
627 rotate = DAG.getConstant(rotamt, MVT::i16);
629 // Offset the rotate amount by the basePtr and the preferred slot
631 int64_t rotamt = -vtm->prefslot_byte;
634 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
636 DAG.getConstant(rotamt, PtrVT));
639 // Unaligned load: must be more pessimistic about addressing modes:
640 if (basePtr.getOpcode() == ISD::ADD) {
641 MachineFunction &MF = DAG.getMachineFunction();
642 MachineRegisterInfo &RegInfo = MF.getRegInfo();
643 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
646 SDValue Op0 = basePtr.getOperand(0);
647 SDValue Op1 = basePtr.getOperand(1);
649 if (isa<ConstantSDNode>(Op1)) {
650 // Convert the (add <ptr>, <const>) to an indirect address contained
651 // in a register. Note that this is done because we need to avoid
652 // creating a 0(reg) d-form address due to the SPU's block loads.
653 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
654 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
655 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
657 // Convert the (add <arg1>, <arg2>) to an indirect address, which
658 // will likely be lowered as a reg(reg) x-form address.
659 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
662 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
664 DAG.getConstant(0, PtrVT));
667 // Offset the rotate amount by the basePtr and the preferred slot
669 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
671 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
674 // Re-emit as a v16i8 vector load
675 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
676 LN->getSrcValue(), LN->getSrcValueOffset(),
677 LN->isVolatile(), 16);
680 the_chain = result.getValue(1);
682 // Rotate into the preferred slot:
683 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
684 result.getValue(0), rotate);
686 // Convert the loaded v16i8 vector to the appropriate vector type
687 // specified by the operand:
688 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
689 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
690 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
692 // Handle extending loads by extending the scalar result:
693 if (ExtType == ISD::SEXTLOAD) {
694 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
695 } else if (ExtType == ISD::ZEXTLOAD) {
696 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
697 } else if (ExtType == ISD::EXTLOAD) {
698 unsigned NewOpc = ISD::ANY_EXTEND;
700 if (OutVT.isFloatingPoint())
701 NewOpc = ISD::FP_EXTEND;
703 result = DAG.getNode(NewOpc, dl, OutVT, result);
706 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
707 SDValue retops[2] = {
712 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
713 retops, sizeof(retops) / sizeof(retops[0]));
720 case ISD::LAST_INDEXED_MODE:
723 raw_string_ostream Msg(msg);
724 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
726 Msg << (unsigned) LN->getAddressingMode();
727 llvm_report_error(Msg.str());
735 /// Custom lower stores for CellSPU
737 All CellSPU stores are aligned to 16-byte boundaries, so for elements
738 within a 16-byte block, we have to generate a shuffle to insert the
739 requested element into its place, then store the resulting block.
742 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
743 StoreSDNode *SN = cast<StoreSDNode>(Op);
744 SDValue Value = SN->getValue();
745 MVT VT = Value.getValueType();
746 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
747 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
748 DebugLoc dl = Op.getDebugLoc();
749 unsigned alignment = SN->getAlignment();
751 switch (SN->getAddressingMode()) {
752 case ISD::UNINDEXED: {
753 // The vector type we really want to load from the 16-byte chunk.
754 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
755 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
757 SDValue alignLoadVec;
758 SDValue basePtr = SN->getBasePtr();
759 SDValue the_chain = SN->getChain();
760 SDValue insertEltOffs;
762 if (alignment == 16) {
765 // Special cases for a known aligned load to simplify the base pointer
766 // and insertion byte:
767 if (basePtr.getOpcode() == ISD::ADD
768 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
769 // Known offset into basePtr
770 int64_t offset = CN->getSExtValue();
772 // Simplify the base pointer for this case:
773 basePtr = basePtr.getOperand(0);
774 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
776 DAG.getConstant((offset & 0xf), PtrVT));
778 if ((offset & ~0xf) > 0) {
779 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
781 DAG.getConstant((offset & ~0xf), PtrVT));
784 // Otherwise, assume it's at byte 0 of basePtr
785 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
787 DAG.getConstant(0, PtrVT));
790 // Unaligned load: must be more pessimistic about addressing modes:
791 if (basePtr.getOpcode() == ISD::ADD) {
792 MachineFunction &MF = DAG.getMachineFunction();
793 MachineRegisterInfo &RegInfo = MF.getRegInfo();
794 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
797 SDValue Op0 = basePtr.getOperand(0);
798 SDValue Op1 = basePtr.getOperand(1);
800 if (isa<ConstantSDNode>(Op1)) {
801 // Convert the (add <ptr>, <const>) to an indirect address contained
802 // in a register. Note that this is done because we need to avoid
803 // creating a 0(reg) d-form address due to the SPU's block loads.
804 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
805 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
806 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
808 // Convert the (add <arg1>, <arg2>) to an indirect address, which
809 // will likely be lowered as a reg(reg) x-form address.
810 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
813 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
815 DAG.getConstant(0, PtrVT));
818 // Insertion point is solely determined by basePtr's contents
819 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
821 DAG.getConstant(0, PtrVT));
824 // Re-emit as a v16i8 vector load
825 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
826 SN->getSrcValue(), SN->getSrcValueOffset(),
827 SN->isVolatile(), 16);
830 the_chain = alignLoadVec.getValue(1);
832 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
833 SDValue theValue = SN->getValue();
837 && (theValue.getOpcode() == ISD::AssertZext
838 || theValue.getOpcode() == ISD::AssertSext)) {
839 // Drill down and get the value for zero- and sign-extended
841 theValue = theValue.getOperand(0);
844 // If the base pointer is already a D-form address, then just create
845 // a new D-form address with a slot offset and the orignal base pointer.
846 // Otherwise generate a D-form address with the slot offset relative
847 // to the stack pointer, which is always aligned.
849 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
850 cerr << "CellSPU LowerSTORE: basePtr = ";
851 basePtr.getNode()->dump(&DAG);
856 SDValue insertEltOp =
857 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
858 SDValue vectorizeOp =
859 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
861 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
862 vectorizeOp, alignLoadVec,
863 DAG.getNode(ISD::BIT_CONVERT, dl,
864 MVT::v4i32, insertEltOp));
866 result = DAG.getStore(the_chain, dl, result, basePtr,
867 LN->getSrcValue(), LN->getSrcValueOffset(),
868 LN->isVolatile(), LN->getAlignment());
870 #if 0 && !defined(NDEBUG)
871 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
872 const SDValue ¤tRoot = DAG.getRoot();
875 cerr << "------- CellSPU:LowerStore result:\n";
878 DAG.setRoot(currentRoot);
889 case ISD::LAST_INDEXED_MODE:
892 raw_string_ostream Msg(msg);
893 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
895 Msg << (unsigned) SN->getAddressingMode();
896 llvm_report_error(Msg.str());
904 //! Generate the address of a constant pool entry.
906 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
907 MVT PtrVT = Op.getValueType();
908 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
909 Constant *C = CP->getConstVal();
910 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
911 SDValue Zero = DAG.getConstant(0, PtrVT);
912 const TargetMachine &TM = DAG.getTarget();
913 // FIXME there is no actual debug info here
914 DebugLoc dl = Op.getDebugLoc();
916 if (TM.getRelocationModel() == Reloc::Static) {
917 if (!ST->usingLargeMem()) {
918 // Just return the SDValue with the constant pool address in it.
919 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
921 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
922 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
923 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
927 llvm_unreachable("LowerConstantPool: Relocation model other than static"
932 //! Alternate entry point for generating the address of a constant pool entry
934 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
935 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
939 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
940 MVT PtrVT = Op.getValueType();
941 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
942 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
943 SDValue Zero = DAG.getConstant(0, PtrVT);
944 const TargetMachine &TM = DAG.getTarget();
945 // FIXME there is no actual debug info here
946 DebugLoc dl = Op.getDebugLoc();
948 if (TM.getRelocationModel() == Reloc::Static) {
949 if (!ST->usingLargeMem()) {
950 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
952 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
953 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
954 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
958 llvm_unreachable("LowerJumpTable: Relocation model other than static"
964 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
965 MVT PtrVT = Op.getValueType();
966 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
967 GlobalValue *GV = GSDN->getGlobal();
968 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
969 const TargetMachine &TM = DAG.getTarget();
970 SDValue Zero = DAG.getConstant(0, PtrVT);
971 // FIXME there is no actual debug info here
972 DebugLoc dl = Op.getDebugLoc();
974 if (TM.getRelocationModel() == Reloc::Static) {
975 if (!ST->usingLargeMem()) {
976 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
978 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
979 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
980 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
983 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
991 //! Custom lower double precision floating point constants
993 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
994 MVT VT = Op.getValueType();
995 // FIXME there is no actual debug info here
996 DebugLoc dl = Op.getDebugLoc();
998 if (VT == MVT::f64) {
999 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1002 "LowerConstantFP: Node is not ConstantFPSDNode");
1004 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1005 SDValue T = DAG.getConstant(dbits, MVT::i64);
1006 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1007 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1008 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1015 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
1017 MachineFunction &MF = DAG.getMachineFunction();
1018 MachineFrameInfo *MFI = MF.getFrameInfo();
1019 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1020 SmallVector<SDValue, 48> ArgValues;
1021 SDValue Root = Op.getOperand(0);
1022 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1023 DebugLoc dl = Op.getDebugLoc();
1025 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1026 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1028 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1029 unsigned ArgRegIdx = 0;
1030 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1032 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1034 // Add DAG nodes to load the arguments or copy them out of registers.
1035 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
1036 ArgNo != e; ++ArgNo) {
1037 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1038 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1041 if (ArgRegIdx < NumArgRegs) {
1042 const TargetRegisterClass *ArgRegClass;
1044 switch (ObjectVT.getSimpleVT()) {
1047 raw_string_ostream Msg(msg);
1048 Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
1049 << ObjectVT.getMVTString();
1050 llvm_report_error(Msg.str());
1053 ArgRegClass = &SPU::R8CRegClass;
1056 ArgRegClass = &SPU::R16CRegClass;
1059 ArgRegClass = &SPU::R32CRegClass;
1062 ArgRegClass = &SPU::R64CRegClass;
1065 ArgRegClass = &SPU::GPRCRegClass;
1068 ArgRegClass = &SPU::R32FPRegClass;
1071 ArgRegClass = &SPU::R64FPRegClass;
1079 ArgRegClass = &SPU::VECREGRegClass;
1083 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1084 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1085 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1088 // We need to load the argument to a virtual register if we determined
1089 // above that we ran out of physical registers of the appropriate type
1090 // or we're forced to do vararg
1091 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1092 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1093 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1094 ArgOffset += StackSlotSize;
1097 ArgValues.push_back(ArgVal);
1099 Root = ArgVal.getOperand(0);
1104 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1105 // We will spill (79-3)+1 registers to the stack
1106 SmallVector<SDValue, 79-3+1> MemOps;
1108 // Create the frame slot
1110 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1111 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1112 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1113 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1114 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1115 Root = Store.getOperand(0);
1116 MemOps.push_back(Store);
1118 // Increment address by stack slot size for the next stored argument
1119 ArgOffset += StackSlotSize;
1121 if (!MemOps.empty())
1122 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1123 &MemOps[0], MemOps.size());
1126 ArgValues.push_back(Root);
1128 // Return the new list of results.
1129 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1130 &ArgValues[0], ArgValues.size());
1133 /// isLSAAddress - Return the immediate to use if the specified
1134 /// value is representable as a LSA address.
1135 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1136 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1139 int Addr = C->getZExtValue();
1140 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1141 (Addr << 14 >> 14) != Addr)
1142 return 0; // Top 14 bits have to be sext of immediate.
1144 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1148 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1149 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1150 SDValue Chain = TheCall->getChain();
1151 SDValue Callee = TheCall->getCallee();
1152 unsigned NumOps = TheCall->getNumArgs();
1153 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1156 DebugLoc dl = TheCall->getDebugLoc();
1158 // Handy pointer type
1159 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1161 // Accumulate how many bytes are to be pushed on the stack, including the
1162 // linkage area, and parameter passing area. According to the SPU ABI,
1163 // we minimally need space for [LR] and [SP]
1164 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1166 // Set up a copy of the stack pointer for use loading and storing any
1167 // arguments that may not fit in the registers available for argument
1169 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1171 // Figure out which arguments are going to go in registers, and which in
1173 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1174 unsigned ArgRegIdx = 0;
1176 // Keep track of registers passing arguments
1177 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1178 // And the arguments passed on the stack
1179 SmallVector<SDValue, 8> MemOpChains;
1181 for (unsigned i = 0; i != NumOps; ++i) {
1182 SDValue Arg = TheCall->getArg(i);
1184 // PtrOff will be used to store the current argument to the stack if a
1185 // register cannot be found for it.
1186 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1187 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1189 switch (Arg.getValueType().getSimpleVT()) {
1190 default: llvm_unreachable("Unexpected ValueType for argument!");
1196 if (ArgRegIdx != NumArgRegs) {
1197 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1199 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1200 ArgOffset += StackSlotSize;
1205 if (ArgRegIdx != NumArgRegs) {
1206 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1208 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1209 ArgOffset += StackSlotSize;
1218 if (ArgRegIdx != NumArgRegs) {
1219 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1221 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1222 ArgOffset += StackSlotSize;
1228 // Update number of stack bytes actually used, insert a call sequence start
1229 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1230 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1233 if (!MemOpChains.empty()) {
1234 // Adjust the stack pointer for the stack arguments.
1235 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1236 &MemOpChains[0], MemOpChains.size());
1239 // Build a sequence of copy-to-reg nodes chained together with token chain
1240 // and flag operands which copy the outgoing args into the appropriate regs.
1242 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1243 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1244 RegsToPass[i].second, InFlag);
1245 InFlag = Chain.getValue(1);
1248 SmallVector<SDValue, 8> Ops;
1249 unsigned CallOpc = SPUISD::CALL;
1251 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1252 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1253 // node so that legalize doesn't hack it.
1254 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1255 GlobalValue *GV = G->getGlobal();
1256 MVT CalleeVT = Callee.getValueType();
1257 SDValue Zero = DAG.getConstant(0, PtrVT);
1258 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1260 if (!ST->usingLargeMem()) {
1261 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1262 // style calls, otherwise, external symbols are BRASL calls. This assumes
1263 // that declared/defined symbols are in the same compilation unit and can
1264 // be reached through PC-relative jumps.
1267 // This may be an unsafe assumption for JIT and really large compilation
1269 if (GV->isDeclaration()) {
1270 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1272 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1275 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1277 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1279 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1280 MVT CalleeVT = Callee.getValueType();
1281 SDValue Zero = DAG.getConstant(0, PtrVT);
1282 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1283 Callee.getValueType());
1285 if (!ST->usingLargeMem()) {
1286 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1288 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1290 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1291 // If this is an absolute destination address that appears to be a legal
1292 // local store address, use the munged value.
1293 Callee = SDValue(Dest, 0);
1296 Ops.push_back(Chain);
1297 Ops.push_back(Callee);
1299 // Add argument registers to the end of the list so that they are known live
1301 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1302 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1303 RegsToPass[i].second.getValueType()));
1305 if (InFlag.getNode())
1306 Ops.push_back(InFlag);
1307 // Returns a chain and a flag for retval copy to use.
1308 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1309 &Ops[0], Ops.size());
1310 InFlag = Chain.getValue(1);
1312 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1313 DAG.getIntPtrConstant(0, true), InFlag);
1314 if (TheCall->getValueType(0) != MVT::Other)
1315 InFlag = Chain.getValue(1);
1317 SDValue ResultVals[3];
1318 unsigned NumResults = 0;
1320 // If the call has results, copy the values out of the ret val registers.
1321 switch (TheCall->getValueType(0).getSimpleVT()) {
1322 default: llvm_unreachable("Unexpected ret value!");
1323 case MVT::Other: break;
1325 if (TheCall->getValueType(1) == MVT::i32) {
1326 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1327 MVT::i32, InFlag).getValue(1);
1328 ResultVals[0] = Chain.getValue(0);
1329 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1330 Chain.getValue(2)).getValue(1);
1331 ResultVals[1] = Chain.getValue(0);
1334 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1335 InFlag).getValue(1);
1336 ResultVals[0] = Chain.getValue(0);
1341 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1342 InFlag).getValue(1);
1343 ResultVals[0] = Chain.getValue(0);
1347 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348 InFlag).getValue(1);
1349 ResultVals[0] = Chain.getValue(0);
1354 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1355 InFlag).getValue(1);
1356 ResultVals[0] = Chain.getValue(0);
1365 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1366 InFlag).getValue(1);
1367 ResultVals[0] = Chain.getValue(0);
1372 // If the function returns void, just return the chain.
1373 if (NumResults == 0)
1376 // Otherwise, merge everything together with a MERGE_VALUES node.
1377 ResultVals[NumResults++] = Chain;
1378 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1379 return Res.getValue(Op.getResNo());
1383 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1384 SmallVector<CCValAssign, 16> RVLocs;
1385 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1386 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1387 DebugLoc dl = Op.getDebugLoc();
1388 CCState CCInfo(CC, isVarArg, TM, RVLocs, DAG.getContext());
1389 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1391 // If this is the first return lowered for this function, add the regs to the
1392 // liveout set for the function.
1393 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1394 for (unsigned i = 0; i != RVLocs.size(); ++i)
1395 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1398 SDValue Chain = Op.getOperand(0);
1401 // Copy the result values into the output registers.
1402 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1403 CCValAssign &VA = RVLocs[i];
1404 assert(VA.isRegLoc() && "Can only return in registers!");
1405 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1406 Op.getOperand(i*2+1), Flag);
1407 Flag = Chain.getValue(1);
1411 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1413 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1417 //===----------------------------------------------------------------------===//
1418 // Vector related lowering:
1419 //===----------------------------------------------------------------------===//
1421 static ConstantSDNode *
1422 getVecImm(SDNode *N) {
1423 SDValue OpVal(0, 0);
1425 // Check to see if this buildvec has a single non-undef value in its elements.
1426 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1427 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1428 if (OpVal.getNode() == 0)
1429 OpVal = N->getOperand(i);
1430 else if (OpVal != N->getOperand(i))
1434 if (OpVal.getNode() != 0) {
1435 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1443 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1444 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1446 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 uint64_t Value = CN->getZExtValue();
1450 if (ValueType == MVT::i64) {
1451 uint64_t UValue = CN->getZExtValue();
1452 uint32_t upper = uint32_t(UValue >> 32);
1453 uint32_t lower = uint32_t(UValue);
1456 Value = Value >> 32;
1458 if (Value <= 0x3ffff)
1459 return DAG.getTargetConstant(Value, ValueType);
1465 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1466 /// and the value fits into a signed 16-bit constant, and if so, return the
1468 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1470 if (ConstantSDNode *CN = getVecImm(N)) {
1471 int64_t Value = CN->getSExtValue();
1472 if (ValueType == MVT::i64) {
1473 uint64_t UValue = CN->getZExtValue();
1474 uint32_t upper = uint32_t(UValue >> 32);
1475 uint32_t lower = uint32_t(UValue);
1478 Value = Value >> 32;
1480 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1481 return DAG.getTargetConstant(Value, ValueType);
1488 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1489 /// and the value fits into a signed 10-bit constant, and if so, return the
1491 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1493 if (ConstantSDNode *CN = getVecImm(N)) {
1494 int64_t Value = CN->getSExtValue();
1495 if (ValueType == MVT::i64) {
1496 uint64_t UValue = CN->getZExtValue();
1497 uint32_t upper = uint32_t(UValue >> 32);
1498 uint32_t lower = uint32_t(UValue);
1501 Value = Value >> 32;
1503 if (isS10Constant(Value))
1504 return DAG.getTargetConstant(Value, ValueType);
1510 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1511 /// and the value fits into a signed 8-bit constant, and if so, return the
1514 /// @note: The incoming vector is v16i8 because that's the only way we can load
1515 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1517 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1519 if (ConstantSDNode *CN = getVecImm(N)) {
1520 int Value = (int) CN->getZExtValue();
1521 if (ValueType == MVT::i16
1522 && Value <= 0xffff /* truncated from uint64_t */
1523 && ((short) Value >> 8) == ((short) Value & 0xff))
1524 return DAG.getTargetConstant(Value & 0xff, ValueType);
1525 else if (ValueType == MVT::i8
1526 && (Value & 0xff) == Value)
1527 return DAG.getTargetConstant(Value, ValueType);
1533 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1534 /// and the value fits into a signed 16-bit constant, and if so, return the
1536 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1538 if (ConstantSDNode *CN = getVecImm(N)) {
1539 uint64_t Value = CN->getZExtValue();
1540 if ((ValueType == MVT::i32
1541 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1542 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1543 return DAG.getTargetConstant(Value >> 16, ValueType);
1549 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1550 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1551 if (ConstantSDNode *CN = getVecImm(N)) {
1552 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1558 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1559 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1560 if (ConstantSDNode *CN = getVecImm(N)) {
1561 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1567 //! Lower a BUILD_VECTOR instruction creatively:
1569 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1570 MVT VT = Op.getValueType();
1571 MVT EltVT = VT.getVectorElementType();
1572 DebugLoc dl = Op.getDebugLoc();
1573 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1574 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1575 unsigned minSplatBits = EltVT.getSizeInBits();
1577 if (minSplatBits < 16)
1580 APInt APSplatBits, APSplatUndef;
1581 unsigned SplatBitSize;
1584 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1585 HasAnyUndefs, minSplatBits)
1586 || minSplatBits < SplatBitSize)
1587 return SDValue(); // Wasn't a constant vector or splat exceeded min
1589 uint64_t SplatBits = APSplatBits.getZExtValue();
1591 switch (VT.getSimpleVT()) {
1594 raw_string_ostream Msg(msg);
1595 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1596 << VT.getMVTString();
1597 llvm_report_error(Msg.str());
1601 uint32_t Value32 = uint32_t(SplatBits);
1602 assert(SplatBitSize == 32
1603 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1604 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1605 SDValue T = DAG.getConstant(Value32, MVT::i32);
1606 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1607 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1611 uint64_t f64val = uint64_t(SplatBits);
1612 assert(SplatBitSize == 64
1613 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1614 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1615 SDValue T = DAG.getConstant(f64val, MVT::i64);
1616 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1617 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1621 // 8-bit constants have to be expanded to 16-bits
1622 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1623 SmallVector<SDValue, 8> Ops;
1625 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1626 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1627 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1630 unsigned short Value16 = SplatBits;
1631 SDValue T = DAG.getConstant(Value16, EltVT);
1632 SmallVector<SDValue, 8> Ops;
1635 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1638 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1639 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1642 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1643 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1646 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1656 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1658 uint32_t upper = uint32_t(SplatVal >> 32);
1659 uint32_t lower = uint32_t(SplatVal);
1661 if (upper == lower) {
1662 // Magic constant that can be matched by IL, ILA, et. al.
1663 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1664 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1665 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1666 Val, Val, Val, Val));
1668 bool upper_special, lower_special;
1670 // NOTE: This code creates common-case shuffle masks that can be easily
1671 // detected as common expressions. It is not attempting to create highly
1672 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1674 // Detect if the upper or lower half is a special shuffle mask pattern:
1675 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1678 // Both upper and lower are special, lower to a constant pool load:
1679 if (lower_special && upper_special) {
1680 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1681 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1682 SplatValCN, SplatValCN);
1687 SmallVector<SDValue, 16> ShufBytes;
1690 // Create lower vector if not a special pattern
1691 if (!lower_special) {
1692 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1693 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1694 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1695 LO32C, LO32C, LO32C, LO32C));
1698 // Create upper vector if not a special pattern
1699 if (!upper_special) {
1700 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1701 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1702 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1703 HI32C, HI32C, HI32C, HI32C));
1706 // If either upper or lower are special, then the two input operands are
1707 // the same (basically, one of them is a "don't care")
1713 for (int i = 0; i < 4; ++i) {
1715 for (int j = 0; j < 4; ++j) {
1717 bool process_upper, process_lower;
1719 process_upper = (upper_special && (i & 1) == 0);
1720 process_lower = (lower_special && (i & 1) == 1);
1722 if (process_upper || process_lower) {
1723 if ((process_upper && upper == 0)
1724 || (process_lower && lower == 0))
1726 else if ((process_upper && upper == 0xffffffff)
1727 || (process_lower && lower == 0xffffffff))
1729 else if ((process_upper && upper == 0x80000000)
1730 || (process_lower && lower == 0x80000000))
1731 val |= (j == 0 ? 0xe0 : 0x80);
1733 val |= i * 4 + j + ((i & 1) * 16);
1736 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1739 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1740 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1741 &ShufBytes[0], ShufBytes.size()));
1745 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1746 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1747 /// permutation vector, V3, is monotonically increasing with one "exception"
1748 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1749 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1750 /// In either case, the net result is going to eventually invoke SHUFB to
1751 /// permute/shuffle the bytes from V1 and V2.
1753 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1754 /// control word for byte/halfword/word insertion. This takes care of a single
1755 /// element move from V2 into V1.
1757 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1758 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1759 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1760 SDValue V1 = Op.getOperand(0);
1761 SDValue V2 = Op.getOperand(1);
1762 DebugLoc dl = Op.getDebugLoc();
1764 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1766 // If we have a single element being moved from V1 to V2, this can be handled
1767 // using the C*[DX] compute mask instructions, but the vector elements have
1768 // to be monotonically increasing with one exception element.
1769 MVT VecVT = V1.getValueType();
1770 MVT EltVT = VecVT.getVectorElementType();
1771 unsigned EltsFromV2 = 0;
1773 unsigned V2EltIdx0 = 0;
1774 unsigned CurrElt = 0;
1775 unsigned MaxElts = VecVT.getVectorNumElements();
1776 unsigned PrevElt = 0;
1778 bool monotonic = true;
1781 if (EltVT == MVT::i8) {
1783 } else if (EltVT == MVT::i16) {
1785 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1787 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1790 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1792 for (unsigned i = 0; i != MaxElts; ++i) {
1793 if (SVN->getMaskElt(i) < 0)
1796 unsigned SrcElt = SVN->getMaskElt(i);
1799 if (SrcElt >= V2EltIdx0) {
1800 if (1 >= (++EltsFromV2)) {
1801 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1803 } else if (CurrElt != SrcElt) {
1811 if (PrevElt > 0 && SrcElt < MaxElts) {
1812 if ((PrevElt == SrcElt - 1)
1813 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1820 } else if (PrevElt == 0) {
1821 // First time through, need to keep track of previous element
1824 // This isn't a rotation, takes elements from vector 2
1830 if (EltsFromV2 == 1 && monotonic) {
1831 // Compute mask and shuffle
1832 MachineFunction &MF = DAG.getMachineFunction();
1833 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1834 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1835 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1836 // Initialize temporary register to 0
1837 SDValue InitTempReg =
1838 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1839 // Copy register's contents as index in SHUFFLE_MASK:
1840 SDValue ShufMaskOp =
1841 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1842 DAG.getTargetConstant(V2Elt, MVT::i32),
1843 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1844 // Use shuffle mask in SHUFB synthetic instruction:
1845 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1847 } else if (rotate) {
1848 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1850 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1851 V1, DAG.getConstant(rotamt, MVT::i16));
1853 // Convert the SHUFFLE_VECTOR mask's input element units to the
1855 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1857 SmallVector<SDValue, 16> ResultMask;
1858 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1859 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1861 for (unsigned j = 0; j < BytesPerElement; ++j)
1862 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1865 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1866 &ResultMask[0], ResultMask.size());
1867 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1871 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1872 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1873 DebugLoc dl = Op.getDebugLoc();
1875 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1876 // For a constant, build the appropriate constant vector, which will
1877 // eventually simplify to a vector register load.
1879 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1880 SmallVector<SDValue, 16> ConstVecValues;
1884 // Create a constant vector:
1885 switch (Op.getValueType().getSimpleVT()) {
1886 default: llvm_unreachable("Unexpected constant value type in "
1887 "LowerSCALAR_TO_VECTOR");
1888 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1889 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1890 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1891 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1892 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1893 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1896 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1897 for (size_t j = 0; j < n_copies; ++j)
1898 ConstVecValues.push_back(CValue);
1900 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1901 &ConstVecValues[0], ConstVecValues.size());
1903 // Otherwise, copy the value from one register to another:
1904 switch (Op0.getValueType().getSimpleVT()) {
1905 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1912 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1919 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1920 MVT VT = Op.getValueType();
1921 SDValue N = Op.getOperand(0);
1922 SDValue Elt = Op.getOperand(1);
1923 DebugLoc dl = Op.getDebugLoc();
1926 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1927 // Constant argument:
1928 int EltNo = (int) C->getZExtValue();
1931 if (VT == MVT::i8 && EltNo >= 16)
1932 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1933 else if (VT == MVT::i16 && EltNo >= 8)
1934 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1935 else if (VT == MVT::i32 && EltNo >= 4)
1936 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1937 else if (VT == MVT::i64 && EltNo >= 2)
1938 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1940 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1941 // i32 and i64: Element 0 is the preferred slot
1942 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1945 // Need to generate shuffle mask and extract:
1946 int prefslot_begin = -1, prefslot_end = -1;
1947 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1949 switch (VT.getSimpleVT()) {
1951 assert(false && "Invalid value type!");
1953 prefslot_begin = prefslot_end = 3;
1957 prefslot_begin = 2; prefslot_end = 3;
1962 prefslot_begin = 0; prefslot_end = 3;
1967 prefslot_begin = 0; prefslot_end = 7;
1972 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1973 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1975 unsigned int ShufBytes[16];
1976 for (int i = 0; i < 16; ++i) {
1977 // zero fill uppper part of preferred slot, don't care about the
1979 unsigned int mask_val;
1980 if (i <= prefslot_end) {
1982 ((i < prefslot_begin)
1984 : elt_byte + (i - prefslot_begin));
1986 ShufBytes[i] = mask_val;
1988 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1991 SDValue ShufMask[4];
1992 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1993 unsigned bidx = i * 4;
1994 unsigned int bits = ((ShufBytes[bidx] << 24) |
1995 (ShufBytes[bidx+1] << 16) |
1996 (ShufBytes[bidx+2] << 8) |
1998 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2001 SDValue ShufMaskVec =
2002 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2003 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2005 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2006 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2007 N, N, ShufMaskVec));
2009 // Variable index: Rotate the requested element into slot 0, then replicate
2010 // slot 0 across the vector
2011 MVT VecVT = N.getValueType();
2012 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2013 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2017 // Make life easier by making sure the index is zero-extended to i32
2018 if (Elt.getValueType() != MVT::i32)
2019 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2021 // Scale the index to a bit/byte shift quantity
2023 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2024 unsigned scaleShift = scaleFactor.logBase2();
2027 if (scaleShift > 0) {
2028 // Scale the shift factor:
2029 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2030 DAG.getConstant(scaleShift, MVT::i32));
2033 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2035 // Replicate the bytes starting at byte 0 across the entire vector (for
2036 // consistency with the notion of a unified register set)
2039 switch (VT.getSimpleVT()) {
2041 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2045 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2046 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2047 factor, factor, factor, factor);
2051 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2052 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053 factor, factor, factor, factor);
2058 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2059 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2060 factor, factor, factor, factor);
2065 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2066 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2067 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2068 loFactor, hiFactor, loFactor, hiFactor);
2073 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2074 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2075 vecShift, vecShift, replicate));
2081 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2082 SDValue VecOp = Op.getOperand(0);
2083 SDValue ValOp = Op.getOperand(1);
2084 SDValue IdxOp = Op.getOperand(2);
2085 DebugLoc dl = Op.getDebugLoc();
2086 MVT VT = Op.getValueType();
2088 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2089 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2091 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2092 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2093 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2094 DAG.getRegister(SPU::R1, PtrVT),
2095 DAG.getConstant(CN->getSExtValue(), PtrVT));
2096 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2099 DAG.getNode(SPUISD::SHUFB, dl, VT,
2100 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2102 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2107 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2108 const TargetLowering &TLI)
2110 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2111 DebugLoc dl = Op.getDebugLoc();
2112 MVT ShiftVT = TLI.getShiftAmountTy();
2114 assert(Op.getValueType() == MVT::i8);
2117 llvm_unreachable("Unhandled i8 math operator");
2121 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2123 SDValue N1 = Op.getOperand(1);
2124 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2125 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2126 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2127 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2134 SDValue N1 = Op.getOperand(1);
2135 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2136 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2137 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2138 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2142 SDValue N1 = Op.getOperand(1);
2143 MVT N1VT = N1.getValueType();
2145 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2146 if (!N1VT.bitsEq(ShiftVT)) {
2147 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2150 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2153 // Replicate lower 8-bits into upper 8:
2155 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2156 DAG.getNode(ISD::SHL, dl, MVT::i16,
2157 N0, DAG.getConstant(8, MVT::i32)));
2159 // Truncate back down to i8
2160 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2161 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2165 SDValue N1 = Op.getOperand(1);
2166 MVT N1VT = N1.getValueType();
2168 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2169 if (!N1VT.bitsEq(ShiftVT)) {
2170 unsigned N1Opc = ISD::ZERO_EXTEND;
2172 if (N1.getValueType().bitsGT(ShiftVT))
2173 N1Opc = ISD::TRUNCATE;
2175 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2178 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2179 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2182 SDValue N1 = Op.getOperand(1);
2183 MVT N1VT = N1.getValueType();
2185 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2186 if (!N1VT.bitsEq(ShiftVT)) {
2187 unsigned N1Opc = ISD::SIGN_EXTEND;
2189 if (N1VT.bitsGT(ShiftVT))
2190 N1Opc = ISD::TRUNCATE;
2191 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2194 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2195 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2198 SDValue N1 = Op.getOperand(1);
2200 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2201 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2202 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2203 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2211 //! Lower byte immediate operations for v16i8 vectors:
2213 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2216 MVT VT = Op.getValueType();
2217 DebugLoc dl = Op.getDebugLoc();
2219 ConstVec = Op.getOperand(0);
2220 Arg = Op.getOperand(1);
2221 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2222 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2223 ConstVec = ConstVec.getOperand(0);
2225 ConstVec = Op.getOperand(1);
2226 Arg = Op.getOperand(0);
2227 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2228 ConstVec = ConstVec.getOperand(0);
2233 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2234 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2235 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2237 APInt APSplatBits, APSplatUndef;
2238 unsigned SplatBitSize;
2240 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2242 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2243 HasAnyUndefs, minSplatBits)
2244 && minSplatBits <= SplatBitSize) {
2245 uint64_t SplatBits = APSplatBits.getZExtValue();
2246 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2248 SmallVector<SDValue, 16> tcVec;
2249 tcVec.assign(16, tc);
2250 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2251 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2255 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2256 // lowered. Return the operation, rather than a null SDValue.
2260 //! Custom lowering for CTPOP (count population)
2262 Custom lowering code that counts the number ones in the input
2263 operand. SPU has such an instruction, but it counts the number of
2264 ones per byte, which then have to be accumulated.
2266 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2267 MVT VT = Op.getValueType();
2268 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2269 DebugLoc dl = Op.getDebugLoc();
2271 switch (VT.getSimpleVT()) {
2273 assert(false && "Invalid value type!");
2275 SDValue N = Op.getOperand(0);
2276 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2278 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2279 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2281 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2285 MachineFunction &MF = DAG.getMachineFunction();
2286 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2288 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2290 SDValue N = Op.getOperand(0);
2291 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2292 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2293 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2295 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2296 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2298 // CNTB_result becomes the chain to which all of the virtual registers
2299 // CNTB_reg, SUM1_reg become associated:
2300 SDValue CNTB_result =
2301 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2303 SDValue CNTB_rescopy =
2304 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2306 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2308 return DAG.getNode(ISD::AND, dl, MVT::i16,
2309 DAG.getNode(ISD::ADD, dl, MVT::i16,
2310 DAG.getNode(ISD::SRL, dl, MVT::i16,
2317 MachineFunction &MF = DAG.getMachineFunction();
2318 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2320 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2321 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2323 SDValue N = Op.getOperand(0);
2324 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2325 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2326 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2327 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2329 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2330 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2332 // CNTB_result becomes the chain to which all of the virtual registers
2333 // CNTB_reg, SUM1_reg become associated:
2334 SDValue CNTB_result =
2335 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2337 SDValue CNTB_rescopy =
2338 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2341 DAG.getNode(ISD::SRL, dl, MVT::i32,
2342 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2346 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2347 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2349 SDValue Sum1_rescopy =
2350 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2353 DAG.getNode(ISD::SRL, dl, MVT::i32,
2354 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2357 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2358 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2360 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2370 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2372 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2373 All conversions to i64 are expanded to a libcall.
2375 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2376 SPUTargetLowering &TLI) {
2377 MVT OpVT = Op.getValueType();
2378 SDValue Op0 = Op.getOperand(0);
2379 MVT Op0VT = Op0.getValueType();
2381 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2382 || OpVT == MVT::i64) {
2383 // Convert f32 / f64 to i32 / i64 via libcall.
2385 (Op.getOpcode() == ISD::FP_TO_SINT)
2386 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2387 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2388 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2390 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2396 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2398 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2399 All conversions from i64 are expanded to a libcall.
2401 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2402 SPUTargetLowering &TLI) {
2403 MVT OpVT = Op.getValueType();
2404 SDValue Op0 = Op.getOperand(0);
2405 MVT Op0VT = Op0.getValueType();
2407 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2408 || Op0VT == MVT::i64) {
2409 // Convert i32, i64 to f64 via libcall:
2411 (Op.getOpcode() == ISD::SINT_TO_FP)
2412 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2413 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2414 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2416 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2422 //! Lower ISD::SETCC
2424 This handles MVT::f64 (double floating point) condition lowering
2426 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2427 const TargetLowering &TLI) {
2428 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2429 DebugLoc dl = Op.getDebugLoc();
2430 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2432 SDValue lhs = Op.getOperand(0);
2433 SDValue rhs = Op.getOperand(1);
2434 MVT lhsVT = lhs.getValueType();
2435 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2437 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2438 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2439 MVT IntVT(MVT::i64);
2441 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2442 // selected to a NOP:
2443 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2445 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2446 DAG.getNode(ISD::SRL, dl, IntVT,
2447 i64lhs, DAG.getConstant(32, MVT::i32)));
2448 SDValue lhsHi32abs =
2449 DAG.getNode(ISD::AND, dl, MVT::i32,
2450 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2452 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2454 // SETO and SETUO only use the lhs operand:
2455 if (CC->get() == ISD::SETO) {
2456 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2458 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2459 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2460 DAG.getSetCC(dl, ccResultVT,
2461 lhs, DAG.getConstantFP(0.0, lhsVT),
2463 DAG.getConstant(ccResultAllOnes, ccResultVT));
2464 } else if (CC->get() == ISD::SETUO) {
2465 // Evaluates to true if Op0 is [SQ]NaN
2466 return DAG.getNode(ISD::AND, dl, ccResultVT,
2467 DAG.getSetCC(dl, ccResultVT,
2469 DAG.getConstant(0x7ff00000, MVT::i32),
2471 DAG.getSetCC(dl, ccResultVT,
2473 DAG.getConstant(0, MVT::i32),
2477 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2479 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2480 DAG.getNode(ISD::SRL, dl, IntVT,
2481 i64rhs, DAG.getConstant(32, MVT::i32)));
2483 // If a value is negative, subtract from the sign magnitude constant:
2484 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2486 // Convert the sign-magnitude representation into 2's complement:
2487 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2488 lhsHi32, DAG.getConstant(31, MVT::i32));
2489 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2491 DAG.getNode(ISD::SELECT, dl, IntVT,
2492 lhsSelectMask, lhsSignMag2TC, i64lhs);
2494 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2495 rhsHi32, DAG.getConstant(31, MVT::i32));
2496 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2498 DAG.getNode(ISD::SELECT, dl, IntVT,
2499 rhsSelectMask, rhsSignMag2TC, i64rhs);
2503 switch (CC->get()) {
2506 compareOp = ISD::SETEQ; break;
2509 compareOp = ISD::SETGT; break;
2512 compareOp = ISD::SETGE; break;
2515 compareOp = ISD::SETLT; break;
2518 compareOp = ISD::SETLE; break;
2521 compareOp = ISD::SETNE; break;
2523 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2527 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2528 (ISD::CondCode) compareOp);
2530 if ((CC->get() & 0x8) == 0) {
2531 // Ordered comparison:
2532 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2533 lhs, DAG.getConstantFP(0.0, MVT::f64),
2535 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2536 rhs, DAG.getConstantFP(0.0, MVT::f64),
2538 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2540 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2546 //! Lower ISD::SELECT_CC
2548 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2551 \note Need to revisit this in the future: if the code path through the true
2552 and false value computations is longer than the latency of a branch (6
2553 cycles), then it would be more advantageous to branch and insert a new basic
2554 block and branch on the condition. However, this code does not make that
2555 assumption, given the simplisitc uses so far.
2558 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2559 const TargetLowering &TLI) {
2560 MVT VT = Op.getValueType();
2561 SDValue lhs = Op.getOperand(0);
2562 SDValue rhs = Op.getOperand(1);
2563 SDValue trueval = Op.getOperand(2);
2564 SDValue falseval = Op.getOperand(3);
2565 SDValue condition = Op.getOperand(4);
2566 DebugLoc dl = Op.getDebugLoc();
2568 // NOTE: SELB's arguments: $rA, $rB, $mask
2570 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2571 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2572 // condition was true and 0s where the condition was false. Hence, the
2573 // arguments to SELB get reversed.
2575 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2576 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2577 // with another "cannot select select_cc" assert:
2579 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2580 TLI.getSetCCResultType(Op.getValueType()),
2581 lhs, rhs, condition);
2582 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2585 //! Custom lower ISD::TRUNCATE
2586 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2588 // Type to truncate to
2589 MVT VT = Op.getValueType();
2590 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2591 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2592 DebugLoc dl = Op.getDebugLoc();
2594 // Type to truncate from
2595 SDValue Op0 = Op.getOperand(0);
2596 MVT Op0VT = Op0.getValueType();
2598 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2599 // Create shuffle mask, least significant doubleword of quadword
2600 unsigned maskHigh = 0x08090a0b;
2601 unsigned maskLow = 0x0c0d0e0f;
2602 // Use a shuffle to perform the truncation
2603 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2604 DAG.getConstant(maskHigh, MVT::i32),
2605 DAG.getConstant(maskLow, MVT::i32),
2606 DAG.getConstant(maskHigh, MVT::i32),
2607 DAG.getConstant(maskLow, MVT::i32));
2609 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2610 Op0, Op0, shufMask);
2612 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2615 return SDValue(); // Leave the truncate unmolested
2618 //! Custom (target-specific) lowering entry point
2620 This is where LLVM's DAG selection process calls to do target-specific
2624 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2626 unsigned Opc = (unsigned) Op.getOpcode();
2627 MVT VT = Op.getValueType();
2632 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2633 cerr << "Op.getOpcode() = " << Opc << "\n";
2634 cerr << "*Op.getNode():\n";
2635 Op.getNode()->dump();
2637 llvm_unreachable(0);
2643 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2645 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2646 case ISD::ConstantPool:
2647 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2648 case ISD::GlobalAddress:
2649 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2650 case ISD::JumpTable:
2651 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2652 case ISD::ConstantFP:
2653 return LowerConstantFP(Op, DAG);
2654 case ISD::FORMAL_ARGUMENTS:
2655 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2657 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2659 return LowerRET(Op, DAG, getTargetMachine());
2661 // i8, i64 math ops:
2670 return LowerI8Math(Op, DAG, Opc, *this);
2674 case ISD::FP_TO_SINT:
2675 case ISD::FP_TO_UINT:
2676 return LowerFP_TO_INT(Op, DAG, *this);
2678 case ISD::SINT_TO_FP:
2679 case ISD::UINT_TO_FP:
2680 return LowerINT_TO_FP(Op, DAG, *this);
2682 // Vector-related lowering.
2683 case ISD::BUILD_VECTOR:
2684 return LowerBUILD_VECTOR(Op, DAG);
2685 case ISD::SCALAR_TO_VECTOR:
2686 return LowerSCALAR_TO_VECTOR(Op, DAG);
2687 case ISD::VECTOR_SHUFFLE:
2688 return LowerVECTOR_SHUFFLE(Op, DAG);
2689 case ISD::EXTRACT_VECTOR_ELT:
2690 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2691 case ISD::INSERT_VECTOR_ELT:
2692 return LowerINSERT_VECTOR_ELT(Op, DAG);
2694 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2698 return LowerByteImmed(Op, DAG);
2700 // Vector and i8 multiply:
2703 return LowerI8Math(Op, DAG, Opc, *this);
2706 return LowerCTPOP(Op, DAG);
2708 case ISD::SELECT_CC:
2709 return LowerSELECT_CC(Op, DAG, *this);
2712 return LowerSETCC(Op, DAG, *this);
2715 return LowerTRUNCATE(Op, DAG);
2721 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2722 SmallVectorImpl<SDValue>&Results,
2726 unsigned Opc = (unsigned) N->getOpcode();
2727 MVT OpVT = N->getValueType(0);
2731 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2732 cerr << "Op.getOpcode() = " << Opc << "\n";
2733 cerr << "*Op.getNode():\n";
2741 /* Otherwise, return unchanged */
2744 //===----------------------------------------------------------------------===//
2745 // Target Optimization Hooks
2746 //===----------------------------------------------------------------------===//
2749 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2752 TargetMachine &TM = getTargetMachine();
2754 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2755 SelectionDAG &DAG = DCI.DAG;
2756 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2757 MVT NodeVT = N->getValueType(0); // The node's value type
2758 MVT Op0VT = Op0.getValueType(); // The first operand's result
2759 SDValue Result; // Initially, empty result
2760 DebugLoc dl = N->getDebugLoc();
2762 switch (N->getOpcode()) {
2765 SDValue Op1 = N->getOperand(1);
2767 if (Op0.getOpcode() == SPUISD::IndirectAddr
2768 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2769 // Normalize the operands to reduce repeated code
2770 SDValue IndirectArg = Op0, AddArg = Op1;
2772 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2777 if (isa<ConstantSDNode>(AddArg)) {
2778 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2779 SDValue IndOp1 = IndirectArg.getOperand(1);
2781 if (CN0->isNullValue()) {
2782 // (add (SPUindirect <arg>, <arg>), 0) ->
2783 // (SPUindirect <arg>, <arg>)
2785 #if !defined(NDEBUG)
2786 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2788 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2789 << "With: (SPUindirect <arg>, <arg>)\n";
2794 } else if (isa<ConstantSDNode>(IndOp1)) {
2795 // (add (SPUindirect <arg>, <const>), <const>) ->
2796 // (SPUindirect <arg>, <const + const>)
2797 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2798 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2799 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2801 #if !defined(NDEBUG)
2802 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2804 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2805 << "), " << CN0->getSExtValue() << ")\n"
2806 << "With: (SPUindirect <arg>, "
2807 << combinedConst << ")\n";
2811 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2812 IndirectArg, combinedValue);
2818 case ISD::SIGN_EXTEND:
2819 case ISD::ZERO_EXTEND:
2820 case ISD::ANY_EXTEND: {
2821 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2822 // (any_extend (SPUextract_elt0 <arg>)) ->
2823 // (SPUextract_elt0 <arg>)
2824 // Types must match, however...
2825 #if !defined(NDEBUG)
2826 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2827 cerr << "\nReplace: ";
2830 Op0.getNode()->dump(&DAG);
2839 case SPUISD::IndirectAddr: {
2840 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2841 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2842 if (CN != 0 && CN->getZExtValue() == 0) {
2843 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2844 // (SPUaform <addr>, 0)
2846 DEBUG(cerr << "Replace: ");
2847 DEBUG(N->dump(&DAG));
2848 DEBUG(cerr << "\nWith: ");
2849 DEBUG(Op0.getNode()->dump(&DAG));
2850 DEBUG(cerr << "\n");
2854 } else if (Op0.getOpcode() == ISD::ADD) {
2855 SDValue Op1 = N->getOperand(1);
2856 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2857 // (SPUindirect (add <arg>, <arg>), 0) ->
2858 // (SPUindirect <arg>, <arg>)
2859 if (CN1->isNullValue()) {
2861 #if !defined(NDEBUG)
2862 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2864 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2865 << "With: (SPUindirect <arg>, <arg>)\n";
2869 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2870 Op0.getOperand(0), Op0.getOperand(1));
2876 case SPUISD::SHLQUAD_L_BITS:
2877 case SPUISD::SHLQUAD_L_BYTES:
2878 case SPUISD::VEC_SHL:
2879 case SPUISD::VEC_SRL:
2880 case SPUISD::VEC_SRA:
2881 case SPUISD::ROTBYTES_LEFT: {
2882 SDValue Op1 = N->getOperand(1);
2884 // Kill degenerate vector shifts:
2885 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2886 if (CN->isNullValue()) {
2892 case SPUISD::PREFSLOT2VEC: {
2893 switch (Op0.getOpcode()) {
2896 case ISD::ANY_EXTEND:
2897 case ISD::ZERO_EXTEND:
2898 case ISD::SIGN_EXTEND: {
2899 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2901 // but only if the SPUprefslot2vec and <arg> types match.
2902 SDValue Op00 = Op0.getOperand(0);
2903 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2904 SDValue Op000 = Op00.getOperand(0);
2905 if (Op000.getValueType() == NodeVT) {
2911 case SPUISD::VEC2PREFSLOT: {
2912 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2914 Result = Op0.getOperand(0);
2922 // Otherwise, return unchanged.
2924 if (Result.getNode()) {
2925 DEBUG(cerr << "\nReplace.SPU: ");
2926 DEBUG(N->dump(&DAG));
2927 DEBUG(cerr << "\nWith: ");
2928 DEBUG(Result.getNode()->dump(&DAG));
2929 DEBUG(cerr << "\n");
2936 //===----------------------------------------------------------------------===//
2937 // Inline Assembly Support
2938 //===----------------------------------------------------------------------===//
2940 /// getConstraintType - Given a constraint letter, return the type of
2941 /// constraint it is for this target.
2942 SPUTargetLowering::ConstraintType
2943 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2944 if (ConstraintLetter.size() == 1) {
2945 switch (ConstraintLetter[0]) {
2952 return C_RegisterClass;
2955 return TargetLowering::getConstraintType(ConstraintLetter);
2958 std::pair<unsigned, const TargetRegisterClass*>
2959 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2962 if (Constraint.size() == 1) {
2963 // GCC RS6000 Constraint Letters
2964 switch (Constraint[0]) {
2968 return std::make_pair(0U, SPU::R64CRegisterClass);
2969 return std::make_pair(0U, SPU::R32CRegisterClass);
2972 return std::make_pair(0U, SPU::R32FPRegisterClass);
2973 else if (VT == MVT::f64)
2974 return std::make_pair(0U, SPU::R64FPRegisterClass);
2977 return std::make_pair(0U, SPU::GPRCRegisterClass);
2981 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2984 //! Compute used/known bits for a SPU operand
2986 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2990 const SelectionDAG &DAG,
2991 unsigned Depth ) const {
2993 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2995 switch (Op.getOpcode()) {
2997 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3003 case SPUISD::PREFSLOT2VEC:
3004 case SPUISD::LDRESULT:
3005 case SPUISD::VEC2PREFSLOT:
3006 case SPUISD::SHLQUAD_L_BITS:
3007 case SPUISD::SHLQUAD_L_BYTES:
3008 case SPUISD::VEC_SHL:
3009 case SPUISD::VEC_SRL:
3010 case SPUISD::VEC_SRA:
3011 case SPUISD::VEC_ROTL:
3012 case SPUISD::VEC_ROTR:
3013 case SPUISD::ROTBYTES_LEFT:
3014 case SPUISD::SELECT_MASK:
3021 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3022 unsigned Depth) const {
3023 switch (Op.getOpcode()) {
3028 MVT VT = Op.getValueType();
3030 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3033 return VT.getSizeInBits();
3038 // LowerAsmOperandForConstraint
3040 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3041 char ConstraintLetter,
3043 std::vector<SDValue> &Ops,
3044 SelectionDAG &DAG) const {
3045 // Default, for the time being, to the base class handler
3046 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3050 /// isLegalAddressImmediate - Return true if the integer value can be used
3051 /// as the offset of the target addressing mode.
3052 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3053 const Type *Ty) const {
3054 // SPU's addresses are 256K:
3055 return (V > -(1 << 18) && V < (1 << 18) - 1);
3058 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3063 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3064 // The SPU target isn't yet aware of offsets.