1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::Constant, MVT::i64, Custom);
156 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
157 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
159 // SPU's loads and stores have to be custom lowered:
160 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
162 MVT VT = (MVT::SimpleValueType)sctype;
164 setOperationAction(ISD::LOAD, VT, Custom);
165 setOperationAction(ISD::STORE, VT, Custom);
168 // Custom lower BRCOND for i8 to "promote" the result to i16
169 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
171 // Expand the jumptable branches
172 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
175 // Custom lower SELECT_CC for most cases, but expand by default
176 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
178 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
179 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
184 // SPU has no intrinsics for these particular operations:
185 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
187 // PowerPC has no SREM/UREM instructions
188 setOperationAction(ISD::SREM, MVT::i32, Expand);
189 setOperationAction(ISD::UREM, MVT::i32, Expand);
190 setOperationAction(ISD::SREM, MVT::i64, Expand);
191 setOperationAction(ISD::UREM, MVT::i64, Expand);
193 // We don't support sin/cos/sqrt/fmod
194 setOperationAction(ISD::FSIN , MVT::f64, Expand);
195 setOperationAction(ISD::FCOS , MVT::f64, Expand);
196 setOperationAction(ISD::FREM , MVT::f64, Expand);
197 setOperationAction(ISD::FSIN , MVT::f32, Expand);
198 setOperationAction(ISD::FCOS , MVT::f32, Expand);
199 setOperationAction(ISD::FREM , MVT::f32, Expand);
201 // If we're enabling GP optimizations, use hardware square root
202 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
203 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
206 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
208 // SPU can do rotate right and left, so legalize it... but customize for i8
209 // because instructions don't exist.
211 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
213 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
214 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
215 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
217 setOperationAction(ISD::ROTL, MVT::i32, Legal);
218 setOperationAction(ISD::ROTL, MVT::i16, Legal);
219 setOperationAction(ISD::ROTL, MVT::i8, Custom);
221 // SPU has no native version of shift left/right for i8
222 setOperationAction(ISD::SHL, MVT::i8, Custom);
223 setOperationAction(ISD::SRL, MVT::i8, Custom);
224 setOperationAction(ISD::SRA, MVT::i8, Custom);
226 // SPU needs custom lowering for shift left/right for i64
227 setOperationAction(ISD::SHL, MVT::i64, Custom);
228 setOperationAction(ISD::SRL, MVT::i64, Custom);
229 setOperationAction(ISD::SRA, MVT::i64, Custom);
231 // Custom lower i8, i32 and i64 multiplications
232 setOperationAction(ISD::MUL, MVT::i8, Custom);
233 setOperationAction(ISD::MUL, MVT::i32, Custom);
234 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
236 // SMUL_LOHI, UMUL_LOHI
237 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
238 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
239 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
240 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
242 // Need to custom handle (some) common i8, i64 math ops
243 setOperationAction(ISD::ADD, MVT::i64, Custom);
244 setOperationAction(ISD::SUB, MVT::i8, Custom);
245 setOperationAction(ISD::SUB, MVT::i64, Custom);
247 // SPU does not have BSWAP. It does have i32 support CTLZ.
248 // CTPOP has to be custom lowered.
249 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
250 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
252 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
253 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
254 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
255 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
257 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
258 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
260 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
262 // SPU has a version of select that implements (a&~c)|(b&c), just like
263 // select ought to work:
264 setOperationAction(ISD::SELECT, MVT::i8, Legal);
265 setOperationAction(ISD::SELECT, MVT::i16, Legal);
266 setOperationAction(ISD::SELECT, MVT::i32, Legal);
267 setOperationAction(ISD::SELECT, MVT::i64, Expand);
269 setOperationAction(ISD::SETCC, MVT::i8, Legal);
270 setOperationAction(ISD::SETCC, MVT::i16, Legal);
271 setOperationAction(ISD::SETCC, MVT::i32, Legal);
272 setOperationAction(ISD::SETCC, MVT::i64, Expand);
274 // Zero extension and sign extension for i64 have to be
276 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
277 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
278 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
280 // SPU has a legal FP -> signed INT instruction
281 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
282 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
284 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
286 // FDIV on SPU requires custom lowering
287 setOperationAction(ISD::FDIV, MVT::f32, Custom);
288 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
290 // SPU has [U|S]INT_TO_FP
291 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
292 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
293 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
295 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
296 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
297 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
298 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
300 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
302 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
303 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
305 // We cannot sextinreg(i1). Expand to shifts.
306 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
308 // Support label based line numbers.
309 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
310 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
312 // We want to legalize GlobalAddress and ConstantPool nodes into the
313 // appropriate instructions to materialize the address.
314 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
316 MVT VT = (MVT::SimpleValueType)sctype;
318 setOperationAction(ISD::GlobalAddress, VT, Custom);
319 setOperationAction(ISD::ConstantPool, VT, Custom);
320 setOperationAction(ISD::JumpTable, VT, Custom);
323 // RET must be custom lowered, to meet ABI requirements
324 setOperationAction(ISD::RET, MVT::Other, Custom);
326 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
327 setOperationAction(ISD::VASTART , MVT::Other, Custom);
329 // Use the default implementation.
330 setOperationAction(ISD::VAARG , MVT::Other, Expand);
331 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
332 setOperationAction(ISD::VAEND , MVT::Other, Expand);
333 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
334 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
335 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
336 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
338 // Cell SPU has instructions for converting between i64 and fp.
339 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
340 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
342 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
343 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
345 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
346 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
348 // First set operation action for all vector types to expand. Then we
349 // will selectively turn on ones that can be effectively codegen'd.
350 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
354 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
355 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
357 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
358 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
359 MVT VT = (MVT::SimpleValueType)i;
361 // add/sub are legal for all supported vector VT's.
362 setOperationAction(ISD::ADD , VT, Legal);
363 setOperationAction(ISD::SUB , VT, Legal);
364 // mul has to be custom lowered.
365 setOperationAction(ISD::MUL , VT, Custom);
367 setOperationAction(ISD::AND , VT, Legal);
368 setOperationAction(ISD::OR , VT, Legal);
369 setOperationAction(ISD::XOR , VT, Legal);
370 setOperationAction(ISD::LOAD , VT, Legal);
371 setOperationAction(ISD::SELECT, VT, Legal);
372 setOperationAction(ISD::STORE, VT, Legal);
374 // These operations need to be expanded:
375 setOperationAction(ISD::SDIV, VT, Expand);
376 setOperationAction(ISD::SREM, VT, Expand);
377 setOperationAction(ISD::UDIV, VT, Expand);
378 setOperationAction(ISD::UREM, VT, Expand);
379 setOperationAction(ISD::FDIV, VT, Custom);
381 // Custom lower build_vector, constant pool spills, insert and
382 // extract vector elements:
383 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
384 setOperationAction(ISD::ConstantPool, VT, Custom);
385 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
386 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
387 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
388 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
391 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
392 setOperationAction(ISD::AND, MVT::v16i8, Custom);
393 setOperationAction(ISD::OR, MVT::v16i8, Custom);
394 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
395 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
397 setShiftAmountType(MVT::i32);
398 setBooleanContents(ZeroOrOneBooleanContent);
400 setStackPointerRegisterToSaveRestore(SPU::R1);
402 // We have target-specific dag combine patterns for the following nodes:
403 setTargetDAGCombine(ISD::ADD);
404 setTargetDAGCombine(ISD::ZERO_EXTEND);
405 setTargetDAGCombine(ISD::SIGN_EXTEND);
406 setTargetDAGCombine(ISD::ANY_EXTEND);
408 computeRegisterProperties();
410 // Set other properties:
411 setSchedulingPreference(SchedulingForLatency);
415 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
417 if (node_names.empty()) {
418 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
419 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
420 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
421 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
422 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
423 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
424 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
425 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
426 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
427 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
428 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
429 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
430 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
431 node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
432 = "SPUISD::VEC2PREFSLOT_CHAINED";
433 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
434 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
435 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
436 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
437 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
438 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
439 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
440 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
441 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
442 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
443 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
444 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
445 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
446 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
447 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
448 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
449 "SPUISD::ROTQUAD_RZ_BYTES";
450 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
451 "SPUISD::ROTQUAD_RZ_BITS";
452 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
454 "SPUISD::ROTBYTES_LEFT_CHAINED";
455 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
456 "SPUISD::ROTBYTES_LEFT_BITS";
457 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
458 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
459 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
460 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
461 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
462 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
463 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
464 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
465 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
468 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
470 return ((i != node_names.end()) ? i->second : 0);
473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
474 MVT VT = Op.getValueType();
475 return (VT.isInteger() ? VT : MVT(MVT::i32));
478 //===----------------------------------------------------------------------===//
479 // Calling convention code:
480 //===----------------------------------------------------------------------===//
482 #include "SPUGenCallingConv.inc"
484 //===----------------------------------------------------------------------===//
485 // LowerOperation implementation
486 //===----------------------------------------------------------------------===//
488 /// Aligned load common code for CellSPU
490 \param[in] Op The SelectionDAG load or store operand
491 \param[in] DAG The selection DAG
492 \param[in] ST CellSPU subtarget information structure
493 \param[in,out] alignment Caller initializes this to the load or store node's
494 value from getAlignment(), may be updated while generating the aligned load
495 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
496 offset (divisible by 16, modulo 16 == 0)
497 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
498 offset of the preferred slot (modulo 16 != 0)
499 \param[in,out] VT Caller initializes this value type to the the load or store
500 node's loaded or stored value type; may be updated if an i1-extended load or
502 \param[out] was16aligned true if the base pointer had 16-byte alignment,
503 otherwise false. Can help to determine if the chunk needs to be rotated.
505 Both load and store lowering load a block of data aligned on a 16-byte
506 boundary. This is the common aligned load code shared between both.
509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
511 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
512 MVT &VT, bool &was16aligned)
514 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
515 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
516 SDValue basePtr = LSN->getBasePtr();
517 SDValue chain = LSN->getChain();
519 if (basePtr.getOpcode() == ISD::ADD) {
520 SDValue Op1 = basePtr.getNode()->getOperand(1);
522 if (Op1.getOpcode() == ISD::Constant
523 || Op1.getOpcode() == ISD::TargetConstant) {
524 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
526 alignOffs = (int) CN->getZExtValue();
527 prefSlotOffs = (int) (alignOffs & 0xf);
529 // Adjust the rotation amount to ensure that the final result ends up in
530 // the preferred slot:
531 prefSlotOffs -= vtm->prefslot_byte;
532 basePtr = basePtr.getOperand(0);
534 // Loading from memory, can we adjust alignment?
535 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
536 SDValue APtr = basePtr.getOperand(0);
537 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
538 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
539 alignment = GSDN->getGlobal()->getAlignment();
544 prefSlotOffs = -vtm->prefslot_byte;
546 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
547 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
548 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
549 prefSlotOffs = (int) (alignOffs & 0xf);
550 prefSlotOffs -= vtm->prefslot_byte;
551 basePtr = DAG.getRegister(SPU::R1, VT);
554 prefSlotOffs = -vtm->prefslot_byte;
557 if (alignment == 16) {
558 // Realign the base pointer as a D-Form address:
559 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
560 basePtr = DAG.getNode(ISD::ADD, PtrVT,
562 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 // Emit the vector load:
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 // Unaligned load or we're using the "large memory" model, which means that
573 // we have to be very pessimistic:
574 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
575 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
576 DAG.getConstant(0, PtrVT));
580 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
581 DAG.getConstant((alignOffs & ~0xf), PtrVT));
582 was16aligned = false;
583 return DAG.getLoad(MVT::v16i8, chain, basePtr,
584 LSN->getSrcValue(), LSN->getSrcValueOffset(),
585 LSN->isVolatile(), 16);
588 /// Custom lower loads for CellSPU
590 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
591 within a 16-byte block, we have to rotate to extract the requested element.
594 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
595 LoadSDNode *LN = cast<LoadSDNode>(Op);
596 SDValue the_chain = LN->getChain();
597 MVT VT = LN->getMemoryVT();
598 MVT OpVT = Op.getNode()->getValueType(0);
599 ISD::LoadExtType ExtType = LN->getExtensionType();
600 unsigned alignment = LN->getAlignment();
603 switch (LN->getAddressingMode()) {
604 case ISD::UNINDEXED: {
608 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
610 if (result.getNode() == 0)
613 the_chain = result.getValue(1);
614 // Rotate the chunk if necessary
617 if (rotamt != 0 || !was16aligned) {
618 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
623 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
625 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
626 LoadSDNode *LN1 = cast<LoadSDNode>(result);
627 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
628 DAG.getConstant(rotamt, PtrVT));
631 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
632 the_chain = result.getValue(1);
635 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
637 MVT vecVT = MVT::v16i8;
639 // Convert the loaded v16i8 vector to the appropriate vector type
640 // specified by the operand:
643 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
645 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
648 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
649 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
650 result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
651 the_chain = result.getValue(1);
653 // Handle the sign and zero-extending loads for i1 and i8:
656 if (ExtType == ISD::SEXTLOAD) {
657 NewOpC = (OpVT == MVT::i1
658 ? SPUISD::EXTRACT_I1_SEXT
659 : SPUISD::EXTRACT_I8_SEXT);
661 assert(ExtType == ISD::ZEXTLOAD);
662 NewOpC = (OpVT == MVT::i1
663 ? SPUISD::EXTRACT_I1_ZEXT
664 : SPUISD::EXTRACT_I8_ZEXT);
667 result = DAG.getNode(NewOpC, OpVT, result);
670 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
671 SDValue retops[2] = {
676 result = DAG.getNode(SPUISD::LDRESULT, retvts,
677 retops, sizeof(retops) / sizeof(retops[0]));
684 case ISD::LAST_INDEXED_MODE:
685 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
687 cerr << (unsigned) LN->getAddressingMode() << "\n";
695 /// Custom lower stores for CellSPU
697 All CellSPU stores are aligned to 16-byte boundaries, so for elements
698 within a 16-byte block, we have to generate a shuffle to insert the
699 requested element into its place, then store the resulting block.
702 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
703 StoreSDNode *SN = cast<StoreSDNode>(Op);
704 SDValue Value = SN->getValue();
705 MVT VT = Value.getValueType();
706 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
707 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
708 unsigned alignment = SN->getAlignment();
710 switch (SN->getAddressingMode()) {
711 case ISD::UNINDEXED: {
712 int chunk_offset, slot_offset;
715 // The vector type we really want to load from the 16-byte chunk.
716 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
717 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
719 SDValue alignLoadVec =
720 AlignedLoad(Op, DAG, ST, SN, alignment,
721 chunk_offset, slot_offset, VT, was16aligned);
723 if (alignLoadVec.getNode() == 0)
726 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
727 SDValue basePtr = LN->getBasePtr();
728 SDValue the_chain = alignLoadVec.getValue(1);
729 SDValue theValue = SN->getValue();
733 && (theValue.getOpcode() == ISD::AssertZext
734 || theValue.getOpcode() == ISD::AssertSext)) {
735 // Drill down and get the value for zero- and sign-extended
737 theValue = theValue.getOperand(0);
742 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
743 SDValue insertEltPtr;
745 // If the base pointer is already a D-form address, then just create
746 // a new D-form address with a slot offset and the orignal base pointer.
747 // Otherwise generate a D-form address with the slot offset relative
748 // to the stack pointer, which is always aligned.
749 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
750 DEBUG(basePtr.getNode()->dump(&DAG));
753 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
754 (basePtr.getOpcode() == ISD::ADD
755 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
756 insertEltPtr = basePtr;
758 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
761 SDValue insertEltOp =
762 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
763 SDValue vectorizeOp =
764 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
766 result = DAG.getNode(SPUISD::SHUFB, vecVT,
767 vectorizeOp, alignLoadVec,
768 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
770 result = DAG.getStore(the_chain, result, basePtr,
771 LN->getSrcValue(), LN->getSrcValueOffset(),
772 LN->isVolatile(), LN->getAlignment());
774 #if 0 && defined(NDEBUG)
775 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
776 const SDValue ¤tRoot = DAG.getRoot();
779 cerr << "------- CellSPU:LowerStore result:\n";
782 DAG.setRoot(currentRoot);
793 case ISD::LAST_INDEXED_MODE:
794 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
796 cerr << (unsigned) SN->getAddressingMode() << "\n";
804 /// Generate the address of a constant pool entry.
806 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
807 MVT PtrVT = Op.getValueType();
808 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
809 Constant *C = CP->getConstVal();
810 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
811 SDValue Zero = DAG.getConstant(0, PtrVT);
812 const TargetMachine &TM = DAG.getTarget();
814 if (TM.getRelocationModel() == Reloc::Static) {
815 if (!ST->usingLargeMem()) {
816 // Just return the SDValue with the constant pool address in it.
817 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
819 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
820 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
821 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
826 "LowerConstantPool: Relocation model other than static"
832 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
833 MVT PtrVT = Op.getValueType();
834 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
835 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
836 SDValue Zero = DAG.getConstant(0, PtrVT);
837 const TargetMachine &TM = DAG.getTarget();
839 if (TM.getRelocationModel() == Reloc::Static) {
840 if (!ST->usingLargeMem()) {
841 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
843 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
844 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
845 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
850 "LowerJumpTable: Relocation model other than static not supported.");
855 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
856 MVT PtrVT = Op.getValueType();
857 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
858 GlobalValue *GV = GSDN->getGlobal();
859 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
860 const TargetMachine &TM = DAG.getTarget();
861 SDValue Zero = DAG.getConstant(0, PtrVT);
863 if (TM.getRelocationModel() == Reloc::Static) {
864 if (!ST->usingLargeMem()) {
865 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
867 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
868 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
869 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
872 cerr << "LowerGlobalAddress: Relocation model other than static not "
881 //! Custom lower i64 integer constants
883 This code inserts all of the necessary juggling that needs to occur to load
884 a 64-bit constant into a register.
887 LowerConstant(SDValue Op, SelectionDAG &DAG) {
888 MVT VT = Op.getValueType();
890 if (VT == MVT::i64) {
891 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
892 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
893 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
894 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
896 cerr << "LowerConstant: unhandled constant type "
906 //! Custom lower double precision floating point constants
908 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
909 MVT VT = Op.getValueType();
911 if (VT == MVT::f64) {
912 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
915 "LowerConstantFP: Node is not ConstantFPSDNode");
917 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
918 SDValue T = DAG.getConstant(dbits, MVT::i64);
919 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
920 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
921 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
927 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
929 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
931 SDValue Cond = Op.getOperand(1);
932 MVT CondVT = Cond.getValueType();
935 if (CondVT == MVT::i8) {
937 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
939 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
942 return SDValue(); // Unchanged
946 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
948 MachineFunction &MF = DAG.getMachineFunction();
949 MachineFrameInfo *MFI = MF.getFrameInfo();
950 MachineRegisterInfo &RegInfo = MF.getRegInfo();
951 SmallVector<SDValue, 48> ArgValues;
952 SDValue Root = Op.getOperand(0);
953 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
955 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
956 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
958 unsigned ArgOffset = SPUFrameInfo::minStackSize();
959 unsigned ArgRegIdx = 0;
960 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
962 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
964 // Add DAG nodes to load the arguments or copy them out of registers.
965 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
966 ArgNo != e; ++ArgNo) {
967 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
968 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
971 if (ArgRegIdx < NumArgRegs) {
972 const TargetRegisterClass *ArgRegClass;
974 switch (ObjectVT.getSimpleVT()) {
976 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
977 << ObjectVT.getMVTString()
982 ArgRegClass = &SPU::R8CRegClass;
985 ArgRegClass = &SPU::R16CRegClass;
988 ArgRegClass = &SPU::R32CRegClass;
991 ArgRegClass = &SPU::R64CRegClass;
994 ArgRegClass = &SPU::R32FPRegClass;
997 ArgRegClass = &SPU::R64FPRegClass;
1005 ArgRegClass = &SPU::VECREGRegClass;
1009 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1014 // We need to load the argument to a virtual register if we determined
1015 // above that we ran out of physical registers of the appropriate type
1016 // or we're forced to do vararg
1017 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1018 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1019 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1020 ArgOffset += StackSlotSize;
1023 ArgValues.push_back(ArgVal);
1025 Root = ArgVal.getOperand(0);
1030 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1031 // We will spill (79-3)+1 registers to the stack
1032 SmallVector<SDValue, 79-3+1> MemOps;
1034 // Create the frame slot
1036 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1037 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1038 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1039 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1040 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1041 Root = Store.getOperand(0);
1042 MemOps.push_back(Store);
1044 // Increment address by stack slot size for the next stored argument
1045 ArgOffset += StackSlotSize;
1047 if (!MemOps.empty())
1048 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1051 ArgValues.push_back(Root);
1053 // Return the new list of results.
1054 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1055 &ArgValues[0], ArgValues.size());
1058 /// isLSAAddress - Return the immediate to use if the specified
1059 /// value is representable as a LSA address.
1060 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1064 int Addr = C->getZExtValue();
1065 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1066 (Addr << 14 >> 14) != Addr)
1067 return 0; // Top 14 bits have to be sext of immediate.
1069 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1074 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1075 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1076 SDValue Chain = TheCall->getChain();
1077 SDValue Callee = TheCall->getCallee();
1078 unsigned NumOps = TheCall->getNumArgs();
1079 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1080 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1081 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1083 // Handy pointer type
1084 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1086 // Accumulate how many bytes are to be pushed on the stack, including the
1087 // linkage area, and parameter passing area. According to the SPU ABI,
1088 // we minimally need space for [LR] and [SP]
1089 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1091 // Set up a copy of the stack pointer for use loading and storing any
1092 // arguments that may not fit in the registers available for argument
1094 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1096 // Figure out which arguments are going to go in registers, and which in
1098 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1099 unsigned ArgRegIdx = 0;
1101 // Keep track of registers passing arguments
1102 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1103 // And the arguments passed on the stack
1104 SmallVector<SDValue, 8> MemOpChains;
1106 for (unsigned i = 0; i != NumOps; ++i) {
1107 SDValue Arg = TheCall->getArg(i);
1109 // PtrOff will be used to store the current argument to the stack if a
1110 // register cannot be found for it.
1111 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1112 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1114 switch (Arg.getValueType().getSimpleVT()) {
1115 default: assert(0 && "Unexpected ValueType for argument!");
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1139 if (ArgRegIdx != NumArgRegs) {
1140 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1142 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1143 ArgOffset += StackSlotSize;
1149 // Update number of stack bytes actually used, insert a call sequence start
1150 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1151 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1154 if (!MemOpChains.empty()) {
1155 // Adjust the stack pointer for the stack arguments.
1156 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1157 &MemOpChains[0], MemOpChains.size());
1160 // Build a sequence of copy-to-reg nodes chained together with token chain
1161 // and flag operands which copy the outgoing args into the appropriate regs.
1163 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1164 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1166 InFlag = Chain.getValue(1);
1169 SmallVector<SDValue, 8> Ops;
1170 unsigned CallOpc = SPUISD::CALL;
1172 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1173 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1174 // node so that legalize doesn't hack it.
1175 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1176 GlobalValue *GV = G->getGlobal();
1177 MVT CalleeVT = Callee.getValueType();
1178 SDValue Zero = DAG.getConstant(0, PtrVT);
1179 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1181 if (!ST->usingLargeMem()) {
1182 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1183 // style calls, otherwise, external symbols are BRASL calls. This assumes
1184 // that declared/defined symbols are in the same compilation unit and can
1185 // be reached through PC-relative jumps.
1188 // This may be an unsafe assumption for JIT and really large compilation
1190 if (GV->isDeclaration()) {
1191 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1193 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1196 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1198 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1200 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDValue(Dest, 0);
1208 Ops.push_back(Chain);
1209 Ops.push_back(Callee);
1211 // Add argument registers to the end of the list so that they are known live
1213 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1214 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1215 RegsToPass[i].second.getValueType()));
1217 if (InFlag.getNode())
1218 Ops.push_back(InFlag);
1219 // Returns a chain and a flag for retval copy to use.
1220 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1221 &Ops[0], Ops.size());
1222 InFlag = Chain.getValue(1);
1224 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1225 DAG.getIntPtrConstant(0, true), InFlag);
1226 if (TheCall->getValueType(0) != MVT::Other)
1227 InFlag = Chain.getValue(1);
1229 SDValue ResultVals[3];
1230 unsigned NumResults = 0;
1232 // If the call has results, copy the values out of the ret val registers.
1233 switch (TheCall->getValueType(0).getSimpleVT()) {
1234 default: assert(0 && "Unexpected ret value!");
1235 case MVT::Other: break;
1237 if (TheCall->getValueType(1) == MVT::i32) {
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1241 Chain.getValue(2)).getValue(1);
1242 ResultVals[1] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1267 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1268 InFlag).getValue(1);
1269 ResultVals[0] = Chain.getValue(0);
1274 // If the function returns void, just return the chain.
1275 if (NumResults == 0)
1278 // Otherwise, merge everything together with a MERGE_VALUES node.
1279 ResultVals[NumResults++] = Chain;
1280 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1281 return Res.getValue(Op.getResNo());
1285 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1299 SDValue Chain = Op.getOperand(0);
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 //===----------------------------------------------------------------------===//
1318 // Vector related lowering:
1319 //===----------------------------------------------------------------------===//
1321 static ConstantSDNode *
1322 getVecImm(SDNode *N) {
1323 SDValue OpVal(0, 0);
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 if (OpVal.getNode() == 0)
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1334 if (OpVal.getNode() != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1343 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1346 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getZExtValue();
1350 if (ValueType == MVT::i64) {
1351 uint64_t UValue = CN->getZExtValue();
1352 uint32_t upper = uint32_t(UValue >> 32);
1353 uint32_t lower = uint32_t(UValue);
1356 Value = Value >> 32;
1358 if (Value <= 0x3ffff)
1359 return DAG.getTargetConstant(Value, ValueType);
1365 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1366 /// and the value fits into a signed 16-bit constant, and if so, return the
1368 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1370 if (ConstantSDNode *CN = getVecImm(N)) {
1371 int64_t Value = CN->getSExtValue();
1372 if (ValueType == MVT::i64) {
1373 uint64_t UValue = CN->getZExtValue();
1374 uint32_t upper = uint32_t(UValue >> 32);
1375 uint32_t lower = uint32_t(UValue);
1378 Value = Value >> 32;
1380 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1381 return DAG.getTargetConstant(Value, ValueType);
1388 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1389 /// and the value fits into a signed 10-bit constant, and if so, return the
1391 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1393 if (ConstantSDNode *CN = getVecImm(N)) {
1394 int64_t Value = CN->getSExtValue();
1395 if (ValueType == MVT::i64) {
1396 uint64_t UValue = CN->getZExtValue();
1397 uint32_t upper = uint32_t(UValue >> 32);
1398 uint32_t lower = uint32_t(UValue);
1401 Value = Value >> 32;
1403 if (isS10Constant(Value))
1404 return DAG.getTargetConstant(Value, ValueType);
1410 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1411 /// and the value fits into a signed 8-bit constant, and if so, return the
1414 /// @note: The incoming vector is v16i8 because that's the only way we can load
1415 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1417 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1419 if (ConstantSDNode *CN = getVecImm(N)) {
1420 int Value = (int) CN->getZExtValue();
1421 if (ValueType == MVT::i16
1422 && Value <= 0xffff /* truncated from uint64_t */
1423 && ((short) Value >> 8) == ((short) Value & 0xff))
1424 return DAG.getTargetConstant(Value & 0xff, ValueType);
1425 else if (ValueType == MVT::i8
1426 && (Value & 0xff) == Value)
1427 return DAG.getTargetConstant(Value, ValueType);
1433 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 16-bit constant, and if so, return the
1436 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1438 if (ConstantSDNode *CN = getVecImm(N)) {
1439 uint64_t Value = CN->getZExtValue();
1440 if ((ValueType == MVT::i32
1441 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1442 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1443 return DAG.getTargetConstant(Value >> 16, ValueType);
1449 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1450 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1458 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1459 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1460 if (ConstantSDNode *CN = getVecImm(N)) {
1461 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1467 // If this is a vector of constants or undefs, get the bits. A bit in
1468 // UndefBits is set if the corresponding element of the vector is an
1469 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1470 // zero. Return true if this is not an array of constants, false if it is.
1472 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1473 uint64_t UndefBits[2]) {
1474 // Start with zero'd results.
1475 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1477 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1478 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1479 SDValue OpVal = BV->getOperand(i);
1481 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1482 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1484 uint64_t EltBits = 0;
1485 if (OpVal.getOpcode() == ISD::UNDEF) {
1486 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1487 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1489 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1490 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1491 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1492 const APFloat &apf = CN->getValueAPF();
1493 EltBits = (CN->getValueType(0) == MVT::f32
1494 ? FloatToBits(apf.convertToFloat())
1495 : DoubleToBits(apf.convertToDouble()));
1497 // Nonconstant element.
1501 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1504 //printf("%llx %llx %llx %llx\n",
1505 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1509 /// If this is a splat (repetition) of a value across the whole vector, return
1510 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1511 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1512 /// SplatSize = 1 byte.
1513 static bool isConstantSplat(const uint64_t Bits128[2],
1514 const uint64_t Undef128[2],
1516 uint64_t &SplatBits, uint64_t &SplatUndef,
1518 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1519 // the same as the lower 64-bits, ignoring undefs.
1520 uint64_t Bits64 = Bits128[0] | Bits128[1];
1521 uint64_t Undef64 = Undef128[0] & Undef128[1];
1522 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1523 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1524 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1525 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1527 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1528 if (MinSplatBits < 64) {
1530 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1532 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1533 if (MinSplatBits < 32) {
1535 // If the top 16-bits are different than the lower 16-bits, ignoring
1536 // undefs, we have an i32 splat.
1537 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1538 if (MinSplatBits < 16) {
1539 // If the top 8-bits are different than the lower 8-bits, ignoring
1540 // undefs, we have an i16 splat.
1541 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1542 == ((Bits16 >> 8) & ~Undef16)) {
1543 // Otherwise, we have an 8-bit splat.
1544 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1545 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1551 SplatUndef = Undef16;
1558 SplatUndef = Undef32;
1564 SplatBits = Bits128[0];
1565 SplatUndef = Undef128[0];
1571 return false; // Can't be a splat if two pieces don't match.
1574 // If this is a case we can't handle, return null and let the default
1575 // expansion code take care of it. If we CAN select this case, and if it
1576 // selects to a single instruction, return Op. Otherwise, if we can codegen
1577 // this case more efficiently than a constant pool load, lower it to the
1578 // sequence of ops that should be used.
1579 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1580 MVT VT = Op.getValueType();
1581 // If this is a vector of constants or undefs, get the bits. A bit in
1582 // UndefBits is set if the corresponding element of the vector is an
1583 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1585 uint64_t VectorBits[2];
1586 uint64_t UndefBits[2];
1587 uint64_t SplatBits, SplatUndef;
1589 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1590 || !isConstantSplat(VectorBits, UndefBits,
1591 VT.getVectorElementType().getSizeInBits(),
1592 SplatBits, SplatUndef, SplatSize))
1593 return SDValue(); // Not a constant vector, not a splat.
1595 switch (VT.getSimpleVT()) {
1598 uint32_t Value32 = SplatBits;
1599 assert(SplatSize == 4
1600 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDValue T = DAG.getConstant(Value32, MVT::i32);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1608 uint64_t f64val = SplatBits;
1609 assert(SplatSize == 8
1610 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1611 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1612 SDValue T = DAG.getConstant(f64val, MVT::i64);
1613 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1618 // 8-bit constants have to be expanded to 16-bits
1619 unsigned short Value16 = SplatBits | (SplatBits << 8);
1621 for (int i = 0; i < 8; ++i)
1622 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1623 return DAG.getNode(ISD::BIT_CONVERT, VT,
1624 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1627 unsigned short Value16;
1629 Value16 = (unsigned short) (SplatBits & 0xffff);
1631 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1632 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1634 for (int i = 0; i < 8; ++i) Ops[i] = T;
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1638 unsigned int Value = SplatBits;
1639 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1640 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1643 uint64_t val = SplatBits;
1644 uint32_t upper = uint32_t(val >> 32);
1645 uint32_t lower = uint32_t(val);
1647 if (upper == lower) {
1648 // Magic constant that can be matched by IL, ILA, et. al.
1649 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1650 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1654 SmallVector<SDValue, 16> ShufBytes;
1656 bool upper_special, lower_special;
1658 // NOTE: This code creates common-case shuffle masks that can be easily
1659 // detected as common expressions. It is not attempting to create highly
1660 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1662 // Detect if the upper or lower half is a special shuffle mask pattern:
1663 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1664 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1666 // Create lower vector if not a special pattern
1667 if (!lower_special) {
1668 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1669 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1670 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1671 LO32C, LO32C, LO32C, LO32C));
1674 // Create upper vector if not a special pattern
1675 if (!upper_special) {
1676 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1677 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 HI32C, HI32C, HI32C, HI32C));
1682 // If either upper or lower are special, then the two input operands are
1683 // the same (basically, one of them is a "don't care")
1688 if (lower_special && upper_special) {
1689 // Unhappy situation... both upper and lower are special, so punt with
1690 // a target constant:
1691 SDValue Zero = DAG.getConstant(0, MVT::i32);
1692 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1696 for (int i = 0; i < 4; ++i) {
1698 for (int j = 0; j < 4; ++j) {
1700 bool process_upper, process_lower;
1702 process_upper = (upper_special && (i & 1) == 0);
1703 process_lower = (lower_special && (i & 1) == 1);
1705 if (process_upper || process_lower) {
1706 if ((process_upper && upper == 0)
1707 || (process_lower && lower == 0))
1709 else if ((process_upper && upper == 0xffffffff)
1710 || (process_lower && lower == 0xffffffff))
1712 else if ((process_upper && upper == 0x80000000)
1713 || (process_lower && lower == 0x80000000))
1714 val |= (j == 0 ? 0xe0 : 0x80);
1716 val |= i * 4 + j + ((i & 1) * 16);
1719 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1722 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1723 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1724 &ShufBytes[0], ShufBytes.size()));
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746 SDValue V1 = Op.getOperand(0);
1747 SDValue V2 = Op.getOperand(1);
1748 SDValue PermMask = Op.getOperand(2);
1750 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1752 // If we have a single element being moved from V1 to V2, this can be handled
1753 // using the C*[DX] compute mask instructions, but the vector elements have
1754 // to be monotonically increasing with one exception element.
1755 MVT EltVT = V1.getValueType().getVectorElementType();
1756 unsigned EltsFromV2 = 0;
1758 unsigned V2EltIdx0 = 0;
1759 unsigned CurrElt = 0;
1760 bool monotonic = true;
1761 if (EltVT == MVT::i8)
1763 else if (EltVT == MVT::i16)
1765 else if (EltVT == MVT::i32)
1768 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1770 for (unsigned i = 0, e = PermMask.getNumOperands();
1771 EltsFromV2 <= 1 && monotonic && i != e;
1774 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1777 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1779 if (SrcElt >= V2EltIdx0) {
1781 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1782 } else if (CurrElt != SrcElt) {
1789 if (EltsFromV2 == 1 && monotonic) {
1790 // Compute mask and shuffle
1791 MachineFunction &MF = DAG.getMachineFunction();
1792 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1793 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1794 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1795 // Initialize temporary register to 0
1796 SDValue InitTempReg =
1797 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1798 // Copy register's contents as index in SHUFFLE_MASK:
1799 SDValue ShufMaskOp =
1800 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1801 DAG.getTargetConstant(V2Elt, MVT::i32),
1802 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1803 // Use shuffle mask in SHUFB synthetic instruction:
1804 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1806 // Convert the SHUFFLE_VECTOR mask's input element units to the
1808 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1810 SmallVector<SDValue, 16> ResultMask;
1811 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1813 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1816 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1818 for (unsigned j = 0; j < BytesPerElement; ++j) {
1819 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1824 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1825 &ResultMask[0], ResultMask.size());
1826 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1830 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1831 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1833 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1834 // For a constant, build the appropriate constant vector, which will
1835 // eventually simplify to a vector register load.
1837 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1838 SmallVector<SDValue, 16> ConstVecValues;
1842 // Create a constant vector:
1843 switch (Op.getValueType().getSimpleVT()) {
1844 default: assert(0 && "Unexpected constant value type in "
1845 "LowerSCALAR_TO_VECTOR");
1846 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1847 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1848 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1849 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1850 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1851 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1854 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1855 for (size_t j = 0; j < n_copies; ++j)
1856 ConstVecValues.push_back(CValue);
1858 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1859 &ConstVecValues[0], ConstVecValues.size());
1861 // Otherwise, copy the value from one register to another:
1862 switch (Op0.getValueType().getSimpleVT()) {
1863 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1870 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1877 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1878 switch (Op.getValueType().getSimpleVT()) {
1880 cerr << "CellSPU: Unknown vector multiplication, got "
1881 << Op.getValueType().getMVTString()
1887 SDValue rA = Op.getOperand(0);
1888 SDValue rB = Op.getOperand(1);
1889 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1890 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1891 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1892 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1894 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1898 // Multiply two v8i16 vectors (pipeline friendly version):
1899 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1900 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1901 // c) Use SELB to select upper and lower halves from the intermediate results
1903 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1904 // dual-issue. This code does manage to do this, even if it's a little on
1907 MachineFunction &MF = DAG.getMachineFunction();
1908 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1909 SDValue Chain = Op.getOperand(0);
1910 SDValue rA = Op.getOperand(0);
1911 SDValue rB = Op.getOperand(1);
1912 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1913 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1916 DAG.getCopyToReg(Chain, FSMBIreg,
1917 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1918 DAG.getConstant(0xcccc, MVT::i16)));
1921 DAG.getCopyToReg(FSMBOp, HiProdReg,
1922 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1924 SDValue HHProd_v4i32 =
1925 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1926 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1928 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1929 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1930 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1931 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1933 DAG.getConstant(16, MVT::i16))),
1934 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1937 // This M00sE is N@stI! (apologies to Monty Python)
1939 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1940 // is to break it all apart, sign extend, and reassemble the various
1941 // intermediate products.
1943 SDValue rA = Op.getOperand(0);
1944 SDValue rB = Op.getOperand(1);
1945 SDValue c8 = DAG.getConstant(8, MVT::i32);
1946 SDValue c16 = DAG.getConstant(16, MVT::i32);
1949 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1950 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1951 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1953 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1955 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1958 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1959 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1961 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1962 DAG.getConstant(0x2222, MVT::i16));
1964 SDValue LoProdParts =
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1966 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1967 LLProd, LHProd, FSMBmask));
1969 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1972 DAG.getNode(ISD::AND, MVT::v4i32,
1974 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975 LoProdMask, LoProdMask,
1976 LoProdMask, LoProdMask));
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1983 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994 DAG.getNode(SPUISD::VEC_SRA,
1995 MVT::v4i32, rAH, c8)),
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SRA,
1998 MVT::v4i32, rBH, c8)));
2001 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2007 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2009 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2010 DAG.getNode(ISD::OR, MVT::v4i32,
2018 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2019 MachineFunction &MF = DAG.getMachineFunction();
2020 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2022 SDValue A = Op.getOperand(0);
2023 SDValue B = Op.getOperand(1);
2024 MVT VT = Op.getValueType();
2026 unsigned VRegBR, VRegC;
2028 if (VT == MVT::f32) {
2029 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2030 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2032 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2033 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2035 // TODO: make sure we're feeding FPInterp the right arguments
2036 // Right now: fi B, frest(B)
2039 // (Floating Interpolate (FP Reciprocal Estimate B))
2041 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042 DAG.getNode(SPUISD::FPInterp, VT, B,
2043 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2045 // Computes A * BRcpl and stores in a temporary register
2047 DAG.getCopyToReg(BRcpl, VRegC,
2048 DAG.getNode(ISD::FMUL, VT, A,
2049 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050 // What's the Chain variable do? It's magic!
2051 // TODO: set Chain = Op(0).getEntryNode()
2053 return DAG.getNode(ISD::FADD, VT,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055 DAG.getNode(ISD::FMUL, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057 DAG.getNode(ISD::FSUB, VT, A,
2058 DAG.getNode(ISD::FMUL, VT, B,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2062 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2063 MVT VT = Op.getValueType();
2064 SDValue N = Op.getOperand(0);
2065 SDValue Elt = Op.getOperand(1);
2068 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2069 // Constant argument:
2070 int EltNo = (int) C->getZExtValue();
2073 if (VT == MVT::i8 && EltNo >= 16)
2074 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2075 else if (VT == MVT::i16 && EltNo >= 8)
2076 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2077 else if (VT == MVT::i32 && EltNo >= 4)
2078 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2079 else if (VT == MVT::i64 && EltNo >= 2)
2080 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2082 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2083 // i32 and i64: Element 0 is the preferred slot
2084 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2087 // Need to generate shuffle mask and extract:
2088 int prefslot_begin = -1, prefslot_end = -1;
2089 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2091 switch (VT.getSimpleVT()) {
2093 assert(false && "Invalid value type!");
2095 prefslot_begin = prefslot_end = 3;
2099 prefslot_begin = 2; prefslot_end = 3;
2104 prefslot_begin = 0; prefslot_end = 3;
2109 prefslot_begin = 0; prefslot_end = 7;
2114 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2115 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2117 unsigned int ShufBytes[16];
2118 for (int i = 0; i < 16; ++i) {
2119 // zero fill uppper part of preferred slot, don't care about the
2121 unsigned int mask_val;
2122 if (i <= prefslot_end) {
2124 ((i < prefslot_begin)
2126 : elt_byte + (i - prefslot_begin));
2128 ShufBytes[i] = mask_val;
2130 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2133 SDValue ShufMask[4];
2134 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2135 unsigned bidx = i / 4;
2136 unsigned int bits = ((ShufBytes[bidx] << 24) |
2137 (ShufBytes[bidx+1] << 16) |
2138 (ShufBytes[bidx+2] << 8) |
2140 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2143 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2145 sizeof(ShufMask) / sizeof(ShufMask[0]));
2147 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2148 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2149 N, N, ShufMaskVec));
2151 // Variable index: Rotate the requested element into slot 0, then replicate
2152 // slot 0 across the vector
2153 MVT VecVT = N.getValueType();
2154 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2155 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2159 // Make life easier by making sure the index is zero-extended to i32
2160 if (Elt.getValueType() != MVT::i32)
2161 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2163 // Scale the index to a bit/byte shift quantity
2165 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2166 unsigned scaleShift = scaleFactor.logBase2();
2169 if (scaleShift > 0) {
2170 // Scale the shift factor:
2171 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2172 DAG.getConstant(scaleShift, MVT::i32));
2175 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2177 // Replicate the bytes starting at byte 0 across the entire vector (for
2178 // consistency with the notion of a unified register set)
2181 switch (VT.getSimpleVT()) {
2183 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2187 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2188 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2193 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2194 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2200 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2201 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2207 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2208 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2209 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2210 loFactor, hiFactor);
2215 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2216 DAG.getNode(SPUISD::SHUFB, VecVT,
2217 vecShift, vecShift, replicate));
2223 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2224 SDValue VecOp = Op.getOperand(0);
2225 SDValue ValOp = Op.getOperand(1);
2226 SDValue IdxOp = Op.getOperand(2);
2227 MVT VT = Op.getValueType();
2229 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2230 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2232 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2233 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2234 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2235 DAG.getRegister(SPU::R1, PtrVT),
2236 DAG.getConstant(CN->getSExtValue(), PtrVT));
2237 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2240 DAG.getNode(SPUISD::SHUFB, VT,
2241 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2243 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2248 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2250 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2252 assert(Op.getValueType() == MVT::i8);
2255 assert(0 && "Unhandled i8 math operator");
2259 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2261 SDValue N1 = Op.getOperand(1);
2262 N0 = (N0.getOpcode() != ISD::Constant
2263 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2264 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2266 N1 = (N1.getOpcode() != ISD::Constant
2267 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2268 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2270 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2271 DAG.getNode(Opc, MVT::i16, N0, N1));
2275 SDValue N1 = Op.getOperand(1);
2277 N0 = (N0.getOpcode() != ISD::Constant
2278 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2279 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2281 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2284 N1 = (N1.getOpcode() != ISD::Constant
2285 ? DAG.getNode(N1Opc, MVT::i32, N1)
2286 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2289 DAG.getNode(ISD::OR, MVT::i16, N0,
2290 DAG.getNode(ISD::SHL, MVT::i16,
2291 N0, DAG.getConstant(8, MVT::i32)));
2292 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2293 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2297 SDValue N1 = Op.getOperand(1);
2299 N0 = (N0.getOpcode() != ISD::Constant
2300 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2301 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2303 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2306 N1 = (N1.getOpcode() != ISD::Constant
2307 ? DAG.getNode(N1Opc, MVT::i16, N1)
2308 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2310 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2311 DAG.getNode(Opc, MVT::i16, N0, N1));
2314 SDValue N1 = Op.getOperand(1);
2316 N0 = (N0.getOpcode() != ISD::Constant
2317 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2318 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2320 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2323 N1 = (N1.getOpcode() != ISD::Constant
2324 ? DAG.getNode(N1Opc, MVT::i16, N1)
2325 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2327 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2328 DAG.getNode(Opc, MVT::i16, N0, N1));
2331 SDValue N1 = Op.getOperand(1);
2333 N0 = (N0.getOpcode() != ISD::Constant
2334 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2335 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2337 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2338 N1 = (N1.getOpcode() != ISD::Constant
2339 ? DAG.getNode(N1Opc, MVT::i16, N1)
2340 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2342 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2343 DAG.getNode(Opc, MVT::i16, N0, N1));
2351 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2353 MVT VT = Op.getValueType();
2354 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2356 SDValue Op0 = Op.getOperand(0);
2359 case ISD::ZERO_EXTEND:
2360 case ISD::SIGN_EXTEND:
2361 case ISD::ANY_EXTEND: {
2362 MVT Op0VT = Op0.getValueType();
2363 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2365 assert(Op0VT == MVT::i32
2366 && "CellSPU: Zero/sign extending something other than i32");
2368 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2370 SDValue PromoteScalar =
2371 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2373 if (Opc != ISD::SIGN_EXTEND) {
2374 // Use a shuffle to zero extend the i32 to i64 directly:
2376 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2377 DAG.getConstant(0x80808080, MVT::i32),
2378 DAG.getConstant(0x00010203, MVT::i32),
2379 DAG.getConstant(0x80808080, MVT::i32),
2380 DAG.getConstant(0x08090a0b, MVT::i32));
2381 SDValue zextShuffle =
2382 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2383 PromoteScalar, PromoteScalar, shufMask);
2385 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2386 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2388 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2389 // right and propagate the sign bit) instruction.
2391 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2392 PromoteScalar, DAG.getConstant(4, MVT::i32));
2394 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2395 PromoteScalar, DAG.getConstant(32, MVT::i32));
2397 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2398 DAG.getConstant(0xf0f0, MVT::i16));
2399 SDValue CombineQuad =
2400 DAG.getNode(SPUISD::SELB, Op0VecVT,
2401 SignQuad, RotQuad, SelMask);
2403 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2404 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2409 // Turn operands into vectors to satisfy type checking (shufb works on
2412 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2414 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2415 SmallVector<SDValue, 16> ShufBytes;
2417 // Create the shuffle mask for "rotating" the borrow up one register slot
2418 // once the borrow is generated.
2419 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2420 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2421 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2422 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2425 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2426 SDValue ShiftedCarry =
2427 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2429 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2430 &ShufBytes[0], ShufBytes.size()));
2432 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2433 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2434 Op0, Op1, ShiftedCarry));
2438 // Turn operands into vectors to satisfy type checking (shufb works on
2441 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2443 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2444 SmallVector<SDValue, 16> ShufBytes;
2446 // Create the shuffle mask for "rotating" the borrow up one register slot
2447 // once the borrow is generated.
2448 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2449 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2450 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2451 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2454 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2455 SDValue ShiftedBorrow =
2456 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2457 BorrowGen, BorrowGen,
2458 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2459 &ShufBytes[0], ShufBytes.size()));
2461 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2462 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2463 Op0, Op1, ShiftedBorrow));
2467 SDValue ShiftAmt = Op.getOperand(1);
2468 MVT ShiftAmtVT = ShiftAmt.getValueType();
2469 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2471 DAG.getNode(SPUISD::SELB, VecVT,
2473 DAG.getConstant(0, VecVT),
2474 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2475 DAG.getConstant(0xff00ULL, MVT::i16)));
2476 SDValue ShiftAmtBytes =
2477 DAG.getNode(ISD::SRL, ShiftAmtVT,
2479 DAG.getConstant(3, ShiftAmtVT));
2480 SDValue ShiftAmtBits =
2481 DAG.getNode(ISD::AND, ShiftAmtVT,
2483 DAG.getConstant(7, ShiftAmtVT));
2485 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2486 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2487 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2488 MaskLower, ShiftAmtBytes),
2493 MVT VT = Op.getValueType();
2494 SDValue ShiftAmt = Op.getOperand(1);
2495 MVT ShiftAmtVT = ShiftAmt.getValueType();
2496 SDValue ShiftAmtBytes =
2497 DAG.getNode(ISD::SRL, ShiftAmtVT,
2499 DAG.getConstant(3, ShiftAmtVT));
2500 SDValue ShiftAmtBits =
2501 DAG.getNode(ISD::AND, ShiftAmtVT,
2503 DAG.getConstant(7, ShiftAmtVT));
2505 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2506 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2507 Op0, ShiftAmtBytes),
2512 // Promote Op0 to vector
2514 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2515 SDValue ShiftAmt = Op.getOperand(1);
2516 MVT ShiftVT = ShiftAmt.getValueType();
2518 // Negate variable shift amounts
2519 if (!isa<ConstantSDNode>(ShiftAmt)) {
2520 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2521 DAG.getConstant(0, ShiftVT), ShiftAmt);
2524 SDValue UpperHalfSign =
2525 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2526 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2527 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2528 Op0, DAG.getConstant(31, MVT::i32))));
2529 SDValue UpperHalfSignMask =
2530 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2531 SDValue UpperLowerMask =
2532 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2533 DAG.getConstant(0xff00, MVT::i16));
2534 SDValue UpperLowerSelect =
2535 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2536 UpperHalfSignMask, Op0, UpperLowerMask);
2537 SDValue RotateLeftBytes =
2538 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2539 UpperLowerSelect, ShiftAmt);
2540 SDValue RotateLeftBits =
2541 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2542 RotateLeftBytes, ShiftAmt);
2544 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2552 //! Lower byte immediate operations for v16i8 vectors:
2554 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2557 MVT VT = Op.getValueType();
2559 ConstVec = Op.getOperand(0);
2560 Arg = Op.getOperand(1);
2561 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2562 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2563 ConstVec = ConstVec.getOperand(0);
2565 ConstVec = Op.getOperand(1);
2566 Arg = Op.getOperand(0);
2567 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2568 ConstVec = ConstVec.getOperand(0);
2573 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2574 uint64_t VectorBits[2];
2575 uint64_t UndefBits[2];
2576 uint64_t SplatBits, SplatUndef;
2579 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2580 && isConstantSplat(VectorBits, UndefBits,
2581 VT.getVectorElementType().getSizeInBits(),
2582 SplatBits, SplatUndef, SplatSize)) {
2584 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2585 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2587 // Turn the BUILD_VECTOR into a set of target constants:
2588 for (size_t i = 0; i < tcVecSize; ++i)
2591 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2592 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2595 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2596 // lowered. Return the operation, rather than a null SDValue.
2600 //! Lower i32 multiplication
2601 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2603 switch (VT.getSimpleVT()) {
2605 cerr << "CellSPU: Unknown LowerMUL value type, got "
2606 << Op.getValueType().getMVTString()
2612 SDValue rA = Op.getOperand(0);
2613 SDValue rB = Op.getOperand(1);
2615 return DAG.getNode(ISD::ADD, MVT::i32,
2616 DAG.getNode(ISD::ADD, MVT::i32,
2617 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2618 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2619 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2626 //! Custom lowering for CTPOP (count population)
2628 Custom lowering code that counts the number ones in the input
2629 operand. SPU has such an instruction, but it counts the number of
2630 ones per byte, which then have to be accumulated.
2632 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2633 MVT VT = Op.getValueType();
2634 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2636 switch (VT.getSimpleVT()) {
2638 assert(false && "Invalid value type!");
2640 SDValue N = Op.getOperand(0);
2641 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2643 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2644 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2646 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2650 MachineFunction &MF = DAG.getMachineFunction();
2651 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2653 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2655 SDValue N = Op.getOperand(0);
2656 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2657 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2658 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2660 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2661 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2663 // CNTB_result becomes the chain to which all of the virtual registers
2664 // CNTB_reg, SUM1_reg become associated:
2665 SDValue CNTB_result =
2666 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2668 SDValue CNTB_rescopy =
2669 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2671 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2673 return DAG.getNode(ISD::AND, MVT::i16,
2674 DAG.getNode(ISD::ADD, MVT::i16,
2675 DAG.getNode(ISD::SRL, MVT::i16,
2682 MachineFunction &MF = DAG.getMachineFunction();
2683 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2685 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2686 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2688 SDValue N = Op.getOperand(0);
2689 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2690 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2691 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2692 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2694 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2695 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2697 // CNTB_result becomes the chain to which all of the virtual registers
2698 // CNTB_reg, SUM1_reg become associated:
2699 SDValue CNTB_result =
2700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2702 SDValue CNTB_rescopy =
2703 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2706 DAG.getNode(ISD::SRL, MVT::i32,
2707 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2710 DAG.getNode(ISD::ADD, MVT::i32,
2711 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2713 SDValue Sum1_rescopy =
2714 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2717 DAG.getNode(ISD::SRL, MVT::i32,
2718 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2721 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2722 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2724 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2734 //! Lower ISD::SELECT_CC
2736 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2739 \note Need to revisit this in the future: if the code path through the true
2740 and false value computations is longer than the latency of a branch (6
2741 cycles), then it would be more advantageous to branch and insert a new basic
2742 block and branch on the condition. However, this code does not make that
2743 assumption, given the simplisitc uses so far.
2746 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2747 MVT VT = Op.getValueType();
2748 SDValue lhs = Op.getOperand(0);
2749 SDValue rhs = Op.getOperand(1);
2750 SDValue trueval = Op.getOperand(2);
2751 SDValue falseval = Op.getOperand(3);
2752 SDValue condition = Op.getOperand(4);
2754 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2755 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2756 // with another "cannot select select_cc" assert:
2758 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2759 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2762 //! Custom (target-specific) lowering entry point
2764 This is where LLVM's DAG selection process calls to do target-specific
2768 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2770 unsigned Opc = (unsigned) Op.getOpcode();
2771 MVT VT = Op.getValueType();
2775 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2776 cerr << "Op.getOpcode() = " << Opc << "\n";
2777 cerr << "*Op.getNode():\n";
2778 Op.getNode()->dump();
2784 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2786 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2787 case ISD::ConstantPool:
2788 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2789 case ISD::GlobalAddress:
2790 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2791 case ISD::JumpTable:
2792 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2794 return LowerConstant(Op, DAG);
2795 case ISD::ConstantFP:
2796 return LowerConstantFP(Op, DAG);
2798 return LowerBRCOND(Op, DAG);
2799 case ISD::FORMAL_ARGUMENTS:
2800 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2802 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2804 return LowerRET(Op, DAG, getTargetMachine());
2807 // i8, i64 math ops:
2808 case ISD::ZERO_EXTEND:
2809 case ISD::SIGN_EXTEND:
2810 case ISD::ANY_EXTEND:
2819 return LowerI8Math(Op, DAG, Opc);
2820 else if (VT == MVT::i64)
2821 return LowerI64Math(Op, DAG, Opc);
2825 // Vector-related lowering.
2826 case ISD::BUILD_VECTOR:
2827 return LowerBUILD_VECTOR(Op, DAG);
2828 case ISD::SCALAR_TO_VECTOR:
2829 return LowerSCALAR_TO_VECTOR(Op, DAG);
2830 case ISD::VECTOR_SHUFFLE:
2831 return LowerVECTOR_SHUFFLE(Op, DAG);
2832 case ISD::EXTRACT_VECTOR_ELT:
2833 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2834 case ISD::INSERT_VECTOR_ELT:
2835 return LowerINSERT_VECTOR_ELT(Op, DAG);
2837 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2841 return LowerByteImmed(Op, DAG);
2843 // Vector and i8 multiply:
2846 return LowerVectorMUL(Op, DAG);
2847 else if (VT == MVT::i8)
2848 return LowerI8Math(Op, DAG, Opc);
2850 return LowerMUL(Op, DAG, VT, Opc);
2853 if (VT == MVT::f32 || VT == MVT::v4f32)
2854 return LowerFDIVf32(Op, DAG);
2856 // This is probably a libcall
2857 else if (Op.getValueType() == MVT::f64)
2858 return LowerFDIVf64(Op, DAG);
2861 assert(0 && "Calling FDIV on unsupported MVT");
2864 return LowerCTPOP(Op, DAG);
2866 case ISD::SELECT_CC:
2867 return LowerSELECT_CC(Op, DAG);
2873 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2874 SmallVectorImpl<SDValue>&Results,
2878 unsigned Opc = (unsigned) N->getOpcode();
2879 MVT OpVT = N->getValueType(0);
2883 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2884 cerr << "Op.getOpcode() = " << Opc << "\n";
2885 cerr << "*Op.getNode():\n";
2893 /* Otherwise, return unchanged */
2896 //===----------------------------------------------------------------------===//
2897 // Target Optimization Hooks
2898 //===----------------------------------------------------------------------===//
2901 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2904 TargetMachine &TM = getTargetMachine();
2906 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2907 SelectionDAG &DAG = DCI.DAG;
2908 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2909 MVT NodeVT = N->getValueType(0); // The node's value type
2910 MVT Op0VT = Op0.getValueType(); // The first operand's result
2911 SDValue Result; // Initially, empty result
2913 switch (N->getOpcode()) {
2916 SDValue Op1 = N->getOperand(1);
2918 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2919 SDValue Op01 = Op0.getOperand(1);
2920 if (Op01.getOpcode() == ISD::Constant
2921 || Op01.getOpcode() == ISD::TargetConstant) {
2922 // (add <const>, (SPUindirect <arg>, <const>)) ->
2923 // (SPUindirect <arg>, <const + const>)
2924 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2925 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2926 SDValue combinedConst =
2927 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
2929 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2930 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2931 DEBUG(cerr << "With: (SPUindirect <arg>, "
2932 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2933 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2934 Op0.getOperand(0), combinedConst);
2936 } else if (isa<ConstantSDNode>(Op0)
2937 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2938 SDValue Op11 = Op1.getOperand(1);
2939 if (Op11.getOpcode() == ISD::Constant
2940 || Op11.getOpcode() == ISD::TargetConstant) {
2941 // (add (SPUindirect <arg>, <const>), <const>) ->
2942 // (SPUindirect <arg>, <const + const>)
2943 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2944 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2945 SDValue combinedConst =
2946 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
2948 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2949 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2950 DEBUG(cerr << "With: (SPUindirect <arg>, "
2951 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2953 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2954 Op1.getOperand(0), combinedConst);
2959 case ISD::SIGN_EXTEND:
2960 case ISD::ZERO_EXTEND:
2961 case ISD::ANY_EXTEND: {
2962 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2963 // (any_extend (SPUextract_elt0 <arg>)) ->
2964 // (SPUextract_elt0 <arg>)
2965 // Types must match, however...
2966 DEBUG(cerr << "Replace: ");
2967 DEBUG(N->dump(&DAG));
2968 DEBUG(cerr << "\nWith: ");
2969 DEBUG(Op0.getNode()->dump(&DAG));
2970 DEBUG(cerr << "\n");
2976 case SPUISD::IndirectAddr: {
2977 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2978 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2979 if (CN->getZExtValue() == 0) {
2980 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2981 // (SPUaform <addr>, 0)
2983 DEBUG(cerr << "Replace: ");
2984 DEBUG(N->dump(&DAG));
2985 DEBUG(cerr << "\nWith: ");
2986 DEBUG(Op0.getNode()->dump(&DAG));
2987 DEBUG(cerr << "\n");
2994 case SPUISD::SHLQUAD_L_BITS:
2995 case SPUISD::SHLQUAD_L_BYTES:
2996 case SPUISD::VEC_SHL:
2997 case SPUISD::VEC_SRL:
2998 case SPUISD::VEC_SRA:
2999 case SPUISD::ROTQUAD_RZ_BYTES:
3000 case SPUISD::ROTQUAD_RZ_BITS: {
3001 SDValue Op1 = N->getOperand(1);
3003 if (isa<ConstantSDNode>(Op1)) {
3004 // Kill degenerate vector shifts:
3005 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3006 if (CN->getZExtValue() == 0) {
3012 case SPUISD::PROMOTE_SCALAR: {
3013 switch (Op0.getOpcode()) {
3016 case ISD::ANY_EXTEND:
3017 case ISD::ZERO_EXTEND:
3018 case ISD::SIGN_EXTEND: {
3019 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3021 // but only if the SPUpromote_scalar and <arg> types match.
3022 SDValue Op00 = Op0.getOperand(0);
3023 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3024 SDValue Op000 = Op00.getOperand(0);
3025 if (Op000.getValueType() == NodeVT) {
3031 case SPUISD::VEC2PREFSLOT: {
3032 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3034 Result = Op0.getOperand(0);
3041 // Otherwise, return unchanged.
3043 if (Result.getNode()) {
3044 DEBUG(cerr << "\nReplace.SPU: ");
3045 DEBUG(N->dump(&DAG));
3046 DEBUG(cerr << "\nWith: ");
3047 DEBUG(Result.getNode()->dump(&DAG));
3048 DEBUG(cerr << "\n");
3055 //===----------------------------------------------------------------------===//
3056 // Inline Assembly Support
3057 //===----------------------------------------------------------------------===//
3059 /// getConstraintType - Given a constraint letter, return the type of
3060 /// constraint it is for this target.
3061 SPUTargetLowering::ConstraintType
3062 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3063 if (ConstraintLetter.size() == 1) {
3064 switch (ConstraintLetter[0]) {
3071 return C_RegisterClass;
3074 return TargetLowering::getConstraintType(ConstraintLetter);
3077 std::pair<unsigned, const TargetRegisterClass*>
3078 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3081 if (Constraint.size() == 1) {
3082 // GCC RS6000 Constraint Letters
3083 switch (Constraint[0]) {
3087 return std::make_pair(0U, SPU::R64CRegisterClass);
3088 return std::make_pair(0U, SPU::R32CRegisterClass);
3091 return std::make_pair(0U, SPU::R32FPRegisterClass);
3092 else if (VT == MVT::f64)
3093 return std::make_pair(0U, SPU::R64FPRegisterClass);
3096 return std::make_pair(0U, SPU::GPRCRegisterClass);
3100 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3103 //! Compute used/known bits for a SPU operand
3105 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3109 const SelectionDAG &DAG,
3110 unsigned Depth ) const {
3112 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3115 switch (Op.getOpcode()) {
3117 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3127 case SPUISD::PROMOTE_SCALAR: {
3128 SDValue Op0 = Op.getOperand(0);
3129 MVT Op0VT = Op0.getValueType();
3130 unsigned Op0VTBits = Op0VT.getSizeInBits();
3131 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3132 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3133 KnownOne |= APInt(Op0VTBits, InMask, false);
3137 case SPUISD::LDRESULT:
3138 case SPUISD::VEC2PREFSLOT:
3139 case SPUISD::VEC2PREFSLOT_CHAINED: {
3140 MVT OpVT = Op.getValueType();
3141 unsigned OpVTBits = OpVT.getSizeInBits();
3142 uint64_t InMask = OpVT.getIntegerVTBitMask();
3143 KnownZero |= APInt(OpVTBits, ~InMask, false);
3144 KnownOne |= APInt(OpVTBits, InMask, false);
3149 case EXTRACT_I1_ZEXT:
3150 case EXTRACT_I1_SEXT:
3151 case EXTRACT_I8_ZEXT:
3152 case EXTRACT_I8_SEXT:
3157 case SPUISD::SHLQUAD_L_BITS:
3158 case SPUISD::SHLQUAD_L_BYTES:
3159 case SPUISD::VEC_SHL:
3160 case SPUISD::VEC_SRL:
3161 case SPUISD::VEC_SRA:
3162 case SPUISD::VEC_ROTL:
3163 case SPUISD::VEC_ROTR:
3164 case SPUISD::ROTQUAD_RZ_BYTES:
3165 case SPUISD::ROTQUAD_RZ_BITS:
3166 case SPUISD::ROTBYTES_LEFT:
3167 case SPUISD::ROTBYTES_LEFT_CHAINED:
3168 case SPUISD::SELECT_MASK:
3170 case SPUISD::FPInterp:
3171 case SPUISD::FPRecipEst:
3172 case SPUISD::SEXT32TO64:
3177 // LowerAsmOperandForConstraint
3179 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3180 char ConstraintLetter,
3182 std::vector<SDValue> &Ops,
3183 SelectionDAG &DAG) const {
3184 // Default, for the time being, to the base class handler
3185 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3189 /// isLegalAddressImmediate - Return true if the integer value can be used
3190 /// as the offset of the target addressing mode.
3191 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3192 const Type *Ty) const {
3193 // SPU's addresses are 256K:
3194 return (V > -(1 << 18) && V < (1 << 18) - 1);
3197 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3202 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3203 // The SPU target isn't yet aware of offsets.