1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FLOG , MVT::f64, Expand);
193 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
194 setOperationAction(ISD::FLOG10,MVT::f64, Expand);
195 setOperationAction(ISD::FEXP , MVT::f64, Expand);
196 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
197 setOperationAction(ISD::FSIN , MVT::f32, Expand);
198 setOperationAction(ISD::FCOS , MVT::f32, Expand);
199 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 setOperationAction(ISD::FLOG , MVT::f32, Expand);
201 setOperationAction(ISD::FLOG2, MVT::f32, Expand);
202 setOperationAction(ISD::FLOG10,MVT::f32, Expand);
203 setOperationAction(ISD::FEXP , MVT::f32, Expand);
204 setOperationAction(ISD::FEXP2, MVT::f32, Expand);
206 // If we're enabling GP optimizations, use hardware square root
207 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
208 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
211 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
213 // SPU can do rotate right and left, so legalize it... but customize for i8
214 // because instructions don't exist.
216 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
218 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
219 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
220 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
222 setOperationAction(ISD::ROTL, MVT::i32, Legal);
223 setOperationAction(ISD::ROTL, MVT::i16, Legal);
224 setOperationAction(ISD::ROTL, MVT::i8, Custom);
225 // SPU has no native version of shift left/right for i8
226 setOperationAction(ISD::SHL, MVT::i8, Custom);
227 setOperationAction(ISD::SRL, MVT::i8, Custom);
228 setOperationAction(ISD::SRA, MVT::i8, Custom);
229 // And SPU needs custom lowering for shift left/right for i64
230 setOperationAction(ISD::SHL, MVT::i64, Custom);
231 setOperationAction(ISD::SRL, MVT::i64, Custom);
232 setOperationAction(ISD::SRA, MVT::i64, Custom);
234 // Custom lower i8, i32 and i64 multiplications
235 setOperationAction(ISD::MUL, MVT::i8, Custom);
236 setOperationAction(ISD::MUL, MVT::i32, Custom);
237 setOperationAction(ISD::MUL, MVT::i64, Custom);
239 // Need to custom handle (some) common i8, i64 math ops
240 setOperationAction(ISD::ADD, MVT::i64, Custom);
241 setOperationAction(ISD::SUB, MVT::i8, Custom);
242 setOperationAction(ISD::SUB, MVT::i64, Custom);
244 // SPU does not have BSWAP. It does have i32 support CTLZ.
245 // CTPOP has to be custom lowered.
246 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
247 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
249 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
250 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
251 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
252 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
254 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
255 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
257 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
259 // SPU has a version of select that implements (a&~c)|(b&c), just like
260 // select ought to work:
261 setOperationAction(ISD::SELECT, MVT::i1, Promote);
262 setOperationAction(ISD::SELECT, MVT::i8, Legal);
263 setOperationAction(ISD::SELECT, MVT::i16, Legal);
264 setOperationAction(ISD::SELECT, MVT::i32, Legal);
265 setOperationAction(ISD::SELECT, MVT::i64, Expand);
267 setOperationAction(ISD::SETCC, MVT::i1, Promote);
268 setOperationAction(ISD::SETCC, MVT::i8, Legal);
269 setOperationAction(ISD::SETCC, MVT::i16, Legal);
270 setOperationAction(ISD::SETCC, MVT::i32, Legal);
271 setOperationAction(ISD::SETCC, MVT::i64, Expand);
273 // Zero extension and sign extension for i64 have to be
275 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
276 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
277 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
279 // SPU has a legal FP -> signed INT instruction
280 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
281 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 // FDIV on SPU requires custom lowering
286 setOperationAction(ISD::FDIV, MVT::f32, Custom);
287 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
289 // SPU has [U|S]INT_TO_FP
290 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
291 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
292 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
295 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
297 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
299 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
300 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
302 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
304 // We cannot sextinreg(i1). Expand to shifts.
305 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
307 // Support label based line numbers.
308 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
309 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
311 // We want to legalize GlobalAddress and ConstantPool nodes into the
312 // appropriate instructions to materialize the address.
313 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
315 MVT VT = (MVT::SimpleValueType)sctype;
317 setOperationAction(ISD::GlobalAddress, VT, Custom);
318 setOperationAction(ISD::ConstantPool, VT, Custom);
319 setOperationAction(ISD::JumpTable, VT, Custom);
322 // RET must be custom lowered, to meet ABI requirements
323 setOperationAction(ISD::RET, MVT::Other, Custom);
325 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
326 setOperationAction(ISD::VASTART , MVT::Other, Custom);
328 // Use the default implementation.
329 setOperationAction(ISD::VAARG , MVT::Other, Expand);
330 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
331 setOperationAction(ISD::VAEND , MVT::Other, Expand);
332 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
333 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
334 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
335 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
337 // Cell SPU has instructions for converting between i64 and fp.
338 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
339 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
341 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
342 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
344 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
345 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
347 // First set operation action for all vector types to expand. Then we
348 // will selectively turn on ones that can be effectively codegen'd.
349 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
354 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
356 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
357 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
358 MVT VT = (MVT::SimpleValueType)i;
360 // add/sub are legal for all supported vector VT's.
361 setOperationAction(ISD::ADD , VT, Legal);
362 setOperationAction(ISD::SUB , VT, Legal);
363 // mul has to be custom lowered.
364 setOperationAction(ISD::MUL , VT, Custom);
366 setOperationAction(ISD::AND , VT, Legal);
367 setOperationAction(ISD::OR , VT, Legal);
368 setOperationAction(ISD::XOR , VT, Legal);
369 setOperationAction(ISD::LOAD , VT, Legal);
370 setOperationAction(ISD::SELECT, VT, Legal);
371 setOperationAction(ISD::STORE, VT, Legal);
373 // These operations need to be expanded:
374 setOperationAction(ISD::SDIV, VT, Expand);
375 setOperationAction(ISD::SREM, VT, Expand);
376 setOperationAction(ISD::UDIV, VT, Expand);
377 setOperationAction(ISD::UREM, VT, Expand);
378 setOperationAction(ISD::FDIV, VT, Custom);
380 // Custom lower build_vector, constant pool spills, insert and
381 // extract vector elements:
382 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
383 setOperationAction(ISD::ConstantPool, VT, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
385 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
386 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
387 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
390 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
391 setOperationAction(ISD::AND, MVT::v16i8, Custom);
392 setOperationAction(ISD::OR, MVT::v16i8, Custom);
393 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
396 setShiftAmountType(MVT::i32);
397 setSetCCResultContents(ZeroOrOneSetCCResult);
399 setStackPointerRegisterToSaveRestore(SPU::R1);
401 // We have target-specific dag combine patterns for the following nodes:
402 setTargetDAGCombine(ISD::ADD);
403 setTargetDAGCombine(ISD::ZERO_EXTEND);
404 setTargetDAGCombine(ISD::SIGN_EXTEND);
405 setTargetDAGCombine(ISD::ANY_EXTEND);
407 computeRegisterProperties();
411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
413 if (node_names.empty()) {
414 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
415 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
416 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
417 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
418 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
419 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
420 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
421 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
422 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
423 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
424 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
425 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
426 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
427 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
428 = "SPUISD::EXTRACT_ELT0_CHAINED";
429 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
430 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
431 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
432 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
433 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
434 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
435 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
436 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
437 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
438 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
439 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
440 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
441 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
442 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
443 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
444 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
445 "SPUISD::ROTQUAD_RZ_BYTES";
446 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
447 "SPUISD::ROTQUAD_RZ_BITS";
448 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
449 "SPUISD::ROTBYTES_RIGHT_S";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
452 "SPUISD::ROTBYTES_LEFT_CHAINED";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
454 "SPUISD::ROTBYTES_LEFT_BITS";
455 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
456 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
457 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
458 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
459 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
460 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
461 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
462 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
463 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
466 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
468 return ((i != node_names.end()) ? i->second : 0);
471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
472 MVT VT = Op.getValueType();
479 //===----------------------------------------------------------------------===//
480 // Calling convention code:
481 //===----------------------------------------------------------------------===//
483 #include "SPUGenCallingConv.inc"
485 //===----------------------------------------------------------------------===//
486 // LowerOperation implementation
487 //===----------------------------------------------------------------------===//
489 /// Aligned load common code for CellSPU
491 \param[in] Op The SelectionDAG load or store operand
492 \param[in] DAG The selection DAG
493 \param[in] ST CellSPU subtarget information structure
494 \param[in,out] alignment Caller initializes this to the load or store node's
495 value from getAlignment(), may be updated while generating the aligned load
496 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
497 offset (divisible by 16, modulo 16 == 0)
498 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
499 offset of the preferred slot (modulo 16 != 0)
500 \param[in,out] VT Caller initializes this value type to the the load or store
501 node's loaded or stored value type; may be updated if an i1-extended load or
503 \param[out] was16aligned true if the base pointer had 16-byte alignment,
504 otherwise false. Can help to determine if the chunk needs to be rotated.
506 Both load and store lowering load a block of data aligned on a 16-byte
507 boundary. This is the common aligned load code shared between both.
510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
512 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
513 MVT &VT, bool &was16aligned)
515 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
516 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
517 SDValue basePtr = LSN->getBasePtr();
518 SDValue chain = LSN->getChain();
520 if (basePtr.getOpcode() == ISD::ADD) {
521 SDValue Op1 = basePtr.getNode()->getOperand(1);
523 if (Op1.getOpcode() == ISD::Constant
524 || Op1.getOpcode() == ISD::TargetConstant) {
525 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
527 alignOffs = (int) CN->getZExtValue();
528 prefSlotOffs = (int) (alignOffs & 0xf);
530 // Adjust the rotation amount to ensure that the final result ends up in
531 // the preferred slot:
532 prefSlotOffs -= vtm->prefslot_byte;
533 basePtr = basePtr.getOperand(0);
535 // Loading from memory, can we adjust alignment?
536 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
537 SDValue APtr = basePtr.getOperand(0);
538 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
539 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
540 alignment = GSDN->getGlobal()->getAlignment();
545 prefSlotOffs = -vtm->prefslot_byte;
547 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
548 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
549 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
550 prefSlotOffs = (int) (alignOffs & 0xf);
551 prefSlotOffs -= vtm->prefslot_byte;
552 basePtr = DAG.getRegister(SPU::R1, VT);
555 prefSlotOffs = -vtm->prefslot_byte;
558 if (alignment == 16) {
559 // Realign the base pointer as a D-Form address:
560 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
561 basePtr = DAG.getNode(ISD::ADD, PtrVT,
563 DAG.getConstant((alignOffs & ~0xf), PtrVT));
566 // Emit the vector load:
568 return DAG.getLoad(MVT::v16i8, chain, basePtr,
569 LSN->getSrcValue(), LSN->getSrcValueOffset(),
570 LSN->isVolatile(), 16);
573 // Unaligned load or we're using the "large memory" model, which means that
574 // we have to be very pessimistic:
575 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
577 DAG.getConstant(0, PtrVT));
581 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
582 DAG.getConstant((alignOffs & ~0xf), PtrVT));
583 was16aligned = false;
584 return DAG.getLoad(MVT::v16i8, chain, basePtr,
585 LSN->getSrcValue(), LSN->getSrcValueOffset(),
586 LSN->isVolatile(), 16);
589 /// Custom lower loads for CellSPU
591 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
592 within a 16-byte block, we have to rotate to extract the requested element.
595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
596 LoadSDNode *LN = cast<LoadSDNode>(Op);
597 SDValue the_chain = LN->getChain();
598 MVT VT = LN->getMemoryVT();
599 MVT OpVT = Op.getNode()->getValueType(0);
600 ISD::LoadExtType ExtType = LN->getExtensionType();
601 unsigned alignment = LN->getAlignment();
604 switch (LN->getAddressingMode()) {
605 case ISD::UNINDEXED: {
609 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
611 if (result.getNode() == 0)
614 the_chain = result.getValue(1);
615 // Rotate the chunk if necessary
618 if (rotamt != 0 || !was16aligned) {
619 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
624 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
626 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
627 LoadSDNode *LN1 = cast<LoadSDNode>(result);
628 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
629 DAG.getConstant(rotamt, PtrVT));
632 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
633 the_chain = result.getValue(1);
636 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
638 MVT vecVT = MVT::v16i8;
640 // Convert the loaded v16i8 vector to the appropriate vector type
641 // specified by the operand:
644 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
646 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
649 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
650 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
651 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
652 the_chain = result.getValue(1);
654 // Handle the sign and zero-extending loads for i1 and i8:
657 if (ExtType == ISD::SEXTLOAD) {
658 NewOpC = (OpVT == MVT::i1
659 ? SPUISD::EXTRACT_I1_SEXT
660 : SPUISD::EXTRACT_I8_SEXT);
662 assert(ExtType == ISD::ZEXTLOAD);
663 NewOpC = (OpVT == MVT::i1
664 ? SPUISD::EXTRACT_I1_ZEXT
665 : SPUISD::EXTRACT_I8_ZEXT);
668 result = DAG.getNode(NewOpC, OpVT, result);
671 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
672 SDValue retops[2] = {
677 result = DAG.getNode(SPUISD::LDRESULT, retvts,
678 retops, sizeof(retops) / sizeof(retops[0]));
685 case ISD::LAST_INDEXED_MODE:
686 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
688 cerr << (unsigned) LN->getAddressingMode() << "\n";
696 /// Custom lower stores for CellSPU
698 All CellSPU stores are aligned to 16-byte boundaries, so for elements
699 within a 16-byte block, we have to generate a shuffle to insert the
700 requested element into its place, then store the resulting block.
703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
704 StoreSDNode *SN = cast<StoreSDNode>(Op);
705 SDValue Value = SN->getValue();
706 MVT VT = Value.getValueType();
707 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
708 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
709 unsigned alignment = SN->getAlignment();
711 switch (SN->getAddressingMode()) {
712 case ISD::UNINDEXED: {
713 int chunk_offset, slot_offset;
716 // The vector type we really want to load from the 16-byte chunk, except
717 // in the case of MVT::i1, which has to be v16i8.
718 MVT vecVT, stVecVT = MVT::v16i8;
721 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
722 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
724 SDValue alignLoadVec =
725 AlignedLoad(Op, DAG, ST, SN, alignment,
726 chunk_offset, slot_offset, VT, was16aligned);
728 if (alignLoadVec.getNode() == 0)
731 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
732 SDValue basePtr = LN->getBasePtr();
733 SDValue the_chain = alignLoadVec.getValue(1);
734 SDValue theValue = SN->getValue();
738 && (theValue.getOpcode() == ISD::AssertZext
739 || theValue.getOpcode() == ISD::AssertSext)) {
740 // Drill down and get the value for zero- and sign-extended
742 theValue = theValue.getOperand(0);
747 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
748 SDValue insertEltPtr;
751 // If the base pointer is already a D-form address, then just create
752 // a new D-form address with a slot offset and the orignal base pointer.
753 // Otherwise generate a D-form address with the slot offset relative
754 // to the stack pointer, which is always aligned.
755 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
756 DEBUG(basePtr.getNode()->dump(&DAG));
759 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
760 (basePtr.getOpcode() == ISD::ADD
761 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
762 insertEltPtr = basePtr;
764 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
767 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
768 result = DAG.getNode(SPUISD::SHUFB, vecVT,
769 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
771 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
773 result = DAG.getStore(the_chain, result, basePtr,
774 LN->getSrcValue(), LN->getSrcValueOffset(),
775 LN->isVolatile(), LN->getAlignment());
784 case ISD::LAST_INDEXED_MODE:
785 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
787 cerr << (unsigned) SN->getAddressingMode() << "\n";
795 /// Generate the address of a constant pool entry.
797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
798 MVT PtrVT = Op.getValueType();
799 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
800 Constant *C = CP->getConstVal();
801 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
802 SDValue Zero = DAG.getConstant(0, PtrVT);
803 const TargetMachine &TM = DAG.getTarget();
805 if (TM.getRelocationModel() == Reloc::Static) {
806 if (!ST->usingLargeMem()) {
807 // Just return the SDValue with the constant pool address in it.
808 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
810 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
811 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
812 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
817 "LowerConstantPool: Relocation model other than static"
823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
824 MVT PtrVT = Op.getValueType();
825 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
826 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
827 SDValue Zero = DAG.getConstant(0, PtrVT);
828 const TargetMachine &TM = DAG.getTarget();
830 if (TM.getRelocationModel() == Reloc::Static) {
831 if (!ST->usingLargeMem()) {
832 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
834 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
835 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
836 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
841 "LowerJumpTable: Relocation model other than static not supported.");
846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
847 MVT PtrVT = Op.getValueType();
848 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
849 GlobalValue *GV = GSDN->getGlobal();
850 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
851 const TargetMachine &TM = DAG.getTarget();
852 SDValue Zero = DAG.getConstant(0, PtrVT);
854 if (TM.getRelocationModel() == Reloc::Static) {
855 if (!ST->usingLargeMem()) {
856 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
858 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
859 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
860 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
863 cerr << "LowerGlobalAddress: Relocation model other than static not "
872 //! Custom lower i64 integer constants
874 This code inserts all of the necessary juggling that needs to occur to load
875 a 64-bit constant into a register.
878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
879 MVT VT = Op.getValueType();
880 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
882 if (VT == MVT::i64) {
883 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
884 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
885 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
887 cerr << "LowerConstant: unhandled constant type "
897 //! Custom lower double precision floating point constants
899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
900 MVT VT = Op.getValueType();
901 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
904 "LowerConstantFP: Node is not ConstantFPSDNode");
906 if (VT == MVT::f64) {
907 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
908 return DAG.getNode(ISD::BIT_CONVERT, VT,
909 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
919 SDValue Cond = Op.getOperand(1);
920 MVT CondVT = Cond.getValueType();
923 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
924 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
925 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
927 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
930 return SDValue(); // Unchanged
934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
936 MachineFunction &MF = DAG.getMachineFunction();
937 MachineFrameInfo *MFI = MF.getFrameInfo();
938 MachineRegisterInfo &RegInfo = MF.getRegInfo();
939 SmallVector<SDValue, 8> ArgValues;
940 SDValue Root = Op.getOperand(0);
941 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
943 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
944 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
946 unsigned ArgOffset = SPUFrameInfo::minStackSize();
947 unsigned ArgRegIdx = 0;
948 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
950 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
952 // Add DAG nodes to load the arguments or copy them out of registers.
953 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
954 ArgNo != e; ++ArgNo) {
956 bool needsLoad = false;
957 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
958 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
960 switch (ObjectVT.getSimpleVT()) {
962 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
963 << ObjectVT.getMVTString()
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1008 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1018 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1033 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1044 // We need to load the argument to a virtual register if we determined above
1045 // that we ran out of physical registers of the appropriate type
1047 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050 ArgOffset += StackSlotSize;
1053 ArgValues.push_back(ArgVal);
1056 // If the function takes variable number of arguments, make a frame index for
1057 // the start of the first vararg value... for expansion of llvm.va_start.
1059 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1061 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062 // If this function is vararg, store any remaining integer argument regs to
1063 // their spots on the stack so that they may be loaded by deferencing the
1064 // result of va_next.
1065 SmallVector<SDValue, 8> MemOps;
1066 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071 MemOps.push_back(Store);
1072 // Increment the address by four for the next argument to store
1073 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1076 if (!MemOps.empty())
1077 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1080 ArgValues.push_back(Root);
1082 // Return the new list of results.
1083 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1093 int Addr = C->getZExtValue();
1094 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1095 (Addr << 14 >> 14) != Addr)
1096 return 0; // Top 14 bits have to be sext of immediate.
1098 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104 SDValue Chain = Op.getOperand(0);
1106 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1107 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue() != 0;
1109 SDValue Callee = Op.getOperand(4);
1110 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1111 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1112 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1113 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1115 // Handy pointer type
1116 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1118 // Accumulate how many bytes are to be pushed on the stack, including the
1119 // linkage area, and parameter passing area. According to the SPU ABI,
1120 // we minimally need space for [LR] and [SP]
1121 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1123 // Set up a copy of the stack pointer for use loading and storing any
1124 // arguments that may not fit in the registers available for argument
1126 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1128 // Figure out which arguments are going to go in registers, and which in
1130 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1131 unsigned ArgRegIdx = 0;
1133 // Keep track of registers passing arguments
1134 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1135 // And the arguments passed on the stack
1136 SmallVector<SDValue, 8> MemOpChains;
1138 for (unsigned i = 0; i != NumOps; ++i) {
1139 SDValue Arg = Op.getOperand(5+2*i);
1141 // PtrOff will be used to store the current argument to the stack if a
1142 // register cannot be found for it.
1143 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1144 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1146 switch (Arg.getValueType().getSimpleVT()) {
1147 default: assert(0 && "Unexpected ValueType for argument!");
1151 if (ArgRegIdx != NumArgRegs) {
1152 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155 ArgOffset += StackSlotSize;
1160 if (ArgRegIdx != NumArgRegs) {
1161 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1163 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1164 ArgOffset += StackSlotSize;
1171 if (ArgRegIdx != NumArgRegs) {
1172 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1174 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1175 ArgOffset += StackSlotSize;
1181 // Update number of stack bytes actually used, insert a call sequence start
1182 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1183 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1185 if (!MemOpChains.empty()) {
1186 // Adjust the stack pointer for the stack arguments.
1187 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1188 &MemOpChains[0], MemOpChains.size());
1191 // Build a sequence of copy-to-reg nodes chained together with token chain
1192 // and flag operands which copy the outgoing args into the appropriate regs.
1194 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1195 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1197 InFlag = Chain.getValue(1);
1200 SmallVector<SDValue, 8> Ops;
1201 unsigned CallOpc = SPUISD::CALL;
1203 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1204 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1205 // node so that legalize doesn't hack it.
1206 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1207 GlobalValue *GV = G->getGlobal();
1208 MVT CalleeVT = Callee.getValueType();
1209 SDValue Zero = DAG.getConstant(0, PtrVT);
1210 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1212 if (!ST->usingLargeMem()) {
1213 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1214 // style calls, otherwise, external symbols are BRASL calls. This assumes
1215 // that declared/defined symbols are in the same compilation unit and can
1216 // be reached through PC-relative jumps.
1219 // This may be an unsafe assumption for JIT and really large compilation
1221 if (GV->isDeclaration()) {
1222 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1224 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1227 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1229 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1231 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1232 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1233 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1234 // If this is an absolute destination address that appears to be a legal
1235 // local store address, use the munged value.
1236 Callee = SDValue(Dest, 0);
1239 Ops.push_back(Chain);
1240 Ops.push_back(Callee);
1242 // Add argument registers to the end of the list so that they are known live
1244 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1245 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1246 RegsToPass[i].second.getValueType()));
1248 if (InFlag.getNode())
1249 Ops.push_back(InFlag);
1250 // Returns a chain and a flag for retval copy to use.
1251 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1252 &Ops[0], Ops.size());
1253 InFlag = Chain.getValue(1);
1255 Chain = DAG.getCALLSEQ_END(Chain,
1256 DAG.getConstant(NumStackBytes, PtrVT),
1257 DAG.getConstant(0, PtrVT),
1259 if (Op.getNode()->getValueType(0) != MVT::Other)
1260 InFlag = Chain.getValue(1);
1262 SDValue ResultVals[3];
1263 unsigned NumResults = 0;
1265 // If the call has results, copy the values out of the ret val registers.
1266 switch (Op.getNode()->getValueType(0).getSimpleVT()) {
1267 default: assert(0 && "Unexpected ret value!");
1268 case MVT::Other: break;
1270 if (Op.getNode()->getValueType(1) == MVT::i32) {
1271 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1273 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1274 Chain.getValue(2)).getValue(1);
1275 ResultVals[1] = Chain.getValue(0);
1278 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1279 ResultVals[0] = Chain.getValue(0);
1284 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1285 ResultVals[0] = Chain.getValue(0);
1290 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1291 InFlag).getValue(1);
1292 ResultVals[0] = Chain.getValue(0);
1300 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1301 InFlag).getValue(1);
1302 ResultVals[0] = Chain.getValue(0);
1307 // If the function returns void, just return the chain.
1308 if (NumResults == 0)
1311 // Otherwise, merge everything together with a MERGE_VALUES node.
1312 ResultVals[NumResults++] = Chain;
1313 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1314 return Res.getValue(Op.getResNo());
1318 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1319 SmallVector<CCValAssign, 16> RVLocs;
1320 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1321 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1322 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1323 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1325 // If this is the first return lowered for this function, add the regs to the
1326 // liveout set for the function.
1327 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1328 for (unsigned i = 0; i != RVLocs.size(); ++i)
1329 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1332 SDValue Chain = Op.getOperand(0);
1335 // Copy the result values into the output registers.
1336 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1337 CCValAssign &VA = RVLocs[i];
1338 assert(VA.isRegLoc() && "Can only return in registers!");
1339 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1340 Flag = Chain.getValue(1);
1344 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1346 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1350 //===----------------------------------------------------------------------===//
1351 // Vector related lowering:
1352 //===----------------------------------------------------------------------===//
1354 static ConstantSDNode *
1355 getVecImm(SDNode *N) {
1356 SDValue OpVal(0, 0);
1358 // Check to see if this buildvec has a single non-undef value in its elements.
1359 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1360 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1361 if (OpVal.getNode() == 0)
1362 OpVal = N->getOperand(i);
1363 else if (OpVal != N->getOperand(i))
1367 if (OpVal.getNode() != 0) {
1368 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1373 return 0; // All UNDEF: use implicit def.; not Constant node
1376 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1377 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1379 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1381 if (ConstantSDNode *CN = getVecImm(N)) {
1382 uint64_t Value = CN->getZExtValue();
1383 if (ValueType == MVT::i64) {
1384 uint64_t UValue = CN->getZExtValue();
1385 uint32_t upper = uint32_t(UValue >> 32);
1386 uint32_t lower = uint32_t(UValue);
1389 Value = Value >> 32;
1391 if (Value <= 0x3ffff)
1392 return DAG.getConstant(Value, ValueType);
1398 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 16-bit constant, and if so, return the
1401 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1403 if (ConstantSDNode *CN = getVecImm(N)) {
1404 int64_t Value = CN->getSignExtended();
1405 if (ValueType == MVT::i64) {
1406 uint64_t UValue = CN->getZExtValue();
1407 uint32_t upper = uint32_t(UValue >> 32);
1408 uint32_t lower = uint32_t(UValue);
1411 Value = Value >> 32;
1413 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1414 return DAG.getConstant(Value, ValueType);
1421 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1422 /// and the value fits into a signed 10-bit constant, and if so, return the
1424 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 int64_t Value = CN->getSignExtended();
1428 if (ValueType == MVT::i64) {
1429 uint64_t UValue = CN->getZExtValue();
1430 uint32_t upper = uint32_t(UValue >> 32);
1431 uint32_t lower = uint32_t(UValue);
1434 Value = Value >> 32;
1436 if (isS10Constant(Value))
1437 return DAG.getConstant(Value, ValueType);
1443 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1444 /// and the value fits into a signed 8-bit constant, and if so, return the
1447 /// @note: The incoming vector is v16i8 because that's the only way we can load
1448 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1450 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 int Value = (int) CN->getZExtValue();
1454 if (ValueType == MVT::i16
1455 && Value <= 0xffff /* truncated from uint64_t */
1456 && ((short) Value >> 8) == ((short) Value & 0xff))
1457 return DAG.getConstant(Value & 0xff, ValueType);
1458 else if (ValueType == MVT::i8
1459 && (Value & 0xff) == Value)
1460 return DAG.getConstant(Value, ValueType);
1466 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1467 /// and the value fits into a signed 16-bit constant, and if so, return the
1469 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1471 if (ConstantSDNode *CN = getVecImm(N)) {
1472 uint64_t Value = CN->getZExtValue();
1473 if ((ValueType == MVT::i32
1474 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1475 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1476 return DAG.getConstant(Value >> 16, ValueType);
1482 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1483 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1484 if (ConstantSDNode *CN = getVecImm(N)) {
1485 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1491 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1492 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1493 if (ConstantSDNode *CN = getVecImm(N)) {
1494 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1500 // If this is a vector of constants or undefs, get the bits. A bit in
1501 // UndefBits is set if the corresponding element of the vector is an
1502 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1503 // zero. Return true if this is not an array of constants, false if it is.
1505 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1506 uint64_t UndefBits[2]) {
1507 // Start with zero'd results.
1508 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1510 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1511 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1512 SDValue OpVal = BV->getOperand(i);
1514 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1515 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1517 uint64_t EltBits = 0;
1518 if (OpVal.getOpcode() == ISD::UNDEF) {
1519 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1520 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1522 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1523 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1524 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1525 const APFloat &apf = CN->getValueAPF();
1526 EltBits = (CN->getValueType(0) == MVT::f32
1527 ? FloatToBits(apf.convertToFloat())
1528 : DoubleToBits(apf.convertToDouble()));
1530 // Nonconstant element.
1534 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1537 //printf("%llx %llx %llx %llx\n",
1538 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1542 /// If this is a splat (repetition) of a value across the whole vector, return
1543 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1544 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1545 /// SplatSize = 1 byte.
1546 static bool isConstantSplat(const uint64_t Bits128[2],
1547 const uint64_t Undef128[2],
1549 uint64_t &SplatBits, uint64_t &SplatUndef,
1551 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1552 // the same as the lower 64-bits, ignoring undefs.
1553 uint64_t Bits64 = Bits128[0] | Bits128[1];
1554 uint64_t Undef64 = Undef128[0] & Undef128[1];
1555 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1556 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1557 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1558 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1560 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1561 if (MinSplatBits < 64) {
1563 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1565 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1566 if (MinSplatBits < 32) {
1568 // If the top 16-bits are different than the lower 16-bits, ignoring
1569 // undefs, we have an i32 splat.
1570 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1571 if (MinSplatBits < 16) {
1572 // If the top 8-bits are different than the lower 8-bits, ignoring
1573 // undefs, we have an i16 splat.
1574 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1575 == ((Bits16 >> 8) & ~Undef16)) {
1576 // Otherwise, we have an 8-bit splat.
1577 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1578 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1584 SplatUndef = Undef16;
1591 SplatUndef = Undef32;
1597 SplatBits = Bits128[0];
1598 SplatUndef = Undef128[0];
1604 return false; // Can't be a splat if two pieces don't match.
1607 // If this is a case we can't handle, return null and let the default
1608 // expansion code take care of it. If we CAN select this case, and if it
1609 // selects to a single instruction, return Op. Otherwise, if we can codegen
1610 // this case more efficiently than a constant pool load, lower it to the
1611 // sequence of ops that should be used.
1612 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1613 MVT VT = Op.getValueType();
1614 // If this is a vector of constants or undefs, get the bits. A bit in
1615 // UndefBits is set if the corresponding element of the vector is an
1616 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1618 uint64_t VectorBits[2];
1619 uint64_t UndefBits[2];
1620 uint64_t SplatBits, SplatUndef;
1622 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1623 || !isConstantSplat(VectorBits, UndefBits,
1624 VT.getVectorElementType().getSizeInBits(),
1625 SplatBits, SplatUndef, SplatSize))
1626 return SDValue(); // Not a constant vector, not a splat.
1628 switch (VT.getSimpleVT()) {
1631 uint32_t Value32 = SplatBits;
1632 assert(SplatSize == 4
1633 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1634 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1635 SDValue T = DAG.getConstant(Value32, MVT::i32);
1636 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1637 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1641 uint64_t f64val = SplatBits;
1642 assert(SplatSize == 8
1643 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1644 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1645 SDValue T = DAG.getConstant(f64val, MVT::i64);
1646 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1647 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1651 // 8-bit constants have to be expanded to 16-bits
1652 unsigned short Value16 = SplatBits | (SplatBits << 8);
1654 for (int i = 0; i < 8; ++i)
1655 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1656 return DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1660 unsigned short Value16;
1662 Value16 = (unsigned short) (SplatBits & 0xffff);
1664 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1665 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1667 for (int i = 0; i < 8; ++i) Ops[i] = T;
1668 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1671 unsigned int Value = SplatBits;
1672 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1673 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1676 uint64_t val = SplatBits;
1677 uint32_t upper = uint32_t(val >> 32);
1678 uint32_t lower = uint32_t(val);
1680 if (upper == lower) {
1681 // Magic constant that can be matched by IL, ILA, et. al.
1682 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1683 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1687 SmallVector<SDValue, 16> ShufBytes;
1689 bool upper_special, lower_special;
1691 // NOTE: This code creates common-case shuffle masks that can be easily
1692 // detected as common expressions. It is not attempting to create highly
1693 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1695 // Detect if the upper or lower half is a special shuffle mask pattern:
1696 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1697 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1699 // Create lower vector if not a special pattern
1700 if (!lower_special) {
1701 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1702 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1703 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1704 LO32C, LO32C, LO32C, LO32C));
1707 // Create upper vector if not a special pattern
1708 if (!upper_special) {
1709 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1710 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1711 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1712 HI32C, HI32C, HI32C, HI32C));
1715 // If either upper or lower are special, then the two input operands are
1716 // the same (basically, one of them is a "don't care")
1721 if (lower_special && upper_special) {
1722 // Unhappy situation... both upper and lower are special, so punt with
1723 // a target constant:
1724 SDValue Zero = DAG.getConstant(0, MVT::i32);
1725 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1729 for (int i = 0; i < 4; ++i) {
1731 for (int j = 0; j < 4; ++j) {
1733 bool process_upper, process_lower;
1735 process_upper = (upper_special && (i & 1) == 0);
1736 process_lower = (lower_special && (i & 1) == 1);
1738 if (process_upper || process_lower) {
1739 if ((process_upper && upper == 0)
1740 || (process_lower && lower == 0))
1742 else if ((process_upper && upper == 0xffffffff)
1743 || (process_lower && lower == 0xffffffff))
1745 else if ((process_upper && upper == 0x80000000)
1746 || (process_lower && lower == 0x80000000))
1747 val |= (j == 0 ? 0xe0 : 0x80);
1749 val |= i * 4 + j + ((i & 1) * 16);
1752 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1755 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1756 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1757 &ShufBytes[0], ShufBytes.size()));
1765 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1766 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1767 /// permutation vector, V3, is monotonically increasing with one "exception"
1768 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1769 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1770 /// In either case, the net result is going to eventually invoke SHUFB to
1771 /// permute/shuffle the bytes from V1 and V2.
1773 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1774 /// control word for byte/halfword/word insertion. This takes care of a single
1775 /// element move from V2 into V1.
1777 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1778 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1779 SDValue V1 = Op.getOperand(0);
1780 SDValue V2 = Op.getOperand(1);
1781 SDValue PermMask = Op.getOperand(2);
1783 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1785 // If we have a single element being moved from V1 to V2, this can be handled
1786 // using the C*[DX] compute mask instructions, but the vector elements have
1787 // to be monotonically increasing with one exception element.
1788 MVT EltVT = V1.getValueType().getVectorElementType();
1789 unsigned EltsFromV2 = 0;
1791 unsigned V2EltIdx0 = 0;
1792 unsigned CurrElt = 0;
1793 bool monotonic = true;
1794 if (EltVT == MVT::i8)
1796 else if (EltVT == MVT::i16)
1798 else if (EltVT == MVT::i32)
1801 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1803 for (unsigned i = 0, e = PermMask.getNumOperands();
1804 EltsFromV2 <= 1 && monotonic && i != e;
1807 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1812 if (SrcElt >= V2EltIdx0) {
1814 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1815 } else if (CurrElt != SrcElt) {
1822 if (EltsFromV2 == 1 && monotonic) {
1823 // Compute mask and shuffle
1824 MachineFunction &MF = DAG.getMachineFunction();
1825 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1826 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1827 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1828 // Initialize temporary register to 0
1829 SDValue InitTempReg =
1830 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1831 // Copy register's contents as index in INSERT_MASK:
1832 SDValue ShufMaskOp =
1833 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1834 DAG.getTargetConstant(V2Elt, MVT::i32),
1835 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1836 // Use shuffle mask in SHUFB synthetic instruction:
1837 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1839 // Convert the SHUFFLE_VECTOR mask's input element units to the
1841 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1843 SmallVector<SDValue, 16> ResultMask;
1844 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1846 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1849 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1851 for (unsigned j = 0; j < BytesPerElement; ++j) {
1852 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1857 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1858 &ResultMask[0], ResultMask.size());
1859 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1863 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1864 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1866 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867 // For a constant, build the appropriate constant vector, which will
1868 // eventually simplify to a vector register load.
1870 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871 SmallVector<SDValue, 16> ConstVecValues;
1875 // Create a constant vector:
1876 switch (Op.getValueType().getSimpleVT()) {
1877 default: assert(0 && "Unexpected constant value type in "
1878 "LowerSCALAR_TO_VECTOR");
1879 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1887 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888 for (size_t j = 0; j < n_copies; ++j)
1889 ConstVecValues.push_back(CValue);
1891 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1892 &ConstVecValues[0], ConstVecValues.size());
1894 // Otherwise, copy the value from one register to another:
1895 switch (Op0.getValueType().getSimpleVT()) {
1896 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1903 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1910 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1911 switch (Op.getValueType().getSimpleVT()) {
1913 cerr << "CellSPU: Unknown vector multiplication, got "
1914 << Op.getValueType().getMVTString()
1920 SDValue rA = Op.getOperand(0);
1921 SDValue rB = Op.getOperand(1);
1922 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1923 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1924 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1925 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1927 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1931 // Multiply two v8i16 vectors (pipeline friendly version):
1932 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1933 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1934 // c) Use SELB to select upper and lower halves from the intermediate results
1936 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1937 // dual-issue. This code does manage to do this, even if it's a little on
1940 MachineFunction &MF = DAG.getMachineFunction();
1941 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1942 SDValue Chain = Op.getOperand(0);
1943 SDValue rA = Op.getOperand(0);
1944 SDValue rB = Op.getOperand(1);
1945 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1946 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1949 DAG.getCopyToReg(Chain, FSMBIreg,
1950 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1951 DAG.getConstant(0xcccc, MVT::i16)));
1954 DAG.getCopyToReg(FSMBOp, HiProdReg,
1955 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1957 SDValue HHProd_v4i32 =
1958 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1959 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1961 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1962 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1963 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1964 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1966 DAG.getConstant(16, MVT::i16))),
1967 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1970 // This M00sE is N@stI! (apologies to Monty Python)
1972 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1973 // is to break it all apart, sign extend, and reassemble the various
1974 // intermediate products.
1976 SDValue rA = Op.getOperand(0);
1977 SDValue rB = Op.getOperand(1);
1978 SDValue c8 = DAG.getConstant(8, MVT::i32);
1979 SDValue c16 = DAG.getConstant(16, MVT::i32);
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1986 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1988 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1991 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1994 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1995 DAG.getConstant(0x2222, MVT::i16));
1997 SDValue LoProdParts =
1998 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1999 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2000 LLProd, LHProd, FSMBmask));
2002 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2005 DAG.getNode(ISD::AND, MVT::v4i32,
2007 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2008 LoProdMask, LoProdMask,
2009 LoProdMask, LoProdMask));
2012 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2013 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2016 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2017 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2020 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2021 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2022 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2025 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2026 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2027 DAG.getNode(SPUISD::VEC_SRA,
2028 MVT::v4i32, rAH, c8)),
2029 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2030 DAG.getNode(SPUISD::VEC_SRA,
2031 MVT::v4i32, rBH, c8)));
2034 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2036 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2040 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2042 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2043 DAG.getNode(ISD::OR, MVT::v4i32,
2051 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2052 MachineFunction &MF = DAG.getMachineFunction();
2053 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2055 SDValue A = Op.getOperand(0);
2056 SDValue B = Op.getOperand(1);
2057 MVT VT = Op.getValueType();
2059 unsigned VRegBR, VRegC;
2061 if (VT == MVT::f32) {
2062 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2063 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2065 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2066 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2068 // TODO: make sure we're feeding FPInterp the right arguments
2069 // Right now: fi B, frest(B)
2072 // (Floating Interpolate (FP Reciprocal Estimate B))
2074 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2075 DAG.getNode(SPUISD::FPInterp, VT, B,
2076 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2078 // Computes A * BRcpl and stores in a temporary register
2080 DAG.getCopyToReg(BRcpl, VRegC,
2081 DAG.getNode(ISD::FMUL, VT, A,
2082 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2083 // What's the Chain variable do? It's magic!
2084 // TODO: set Chain = Op(0).getEntryNode()
2086 return DAG.getNode(ISD::FADD, VT,
2087 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2088 DAG.getNode(ISD::FMUL, VT,
2089 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2090 DAG.getNode(ISD::FSUB, VT, A,
2091 DAG.getNode(ISD::FMUL, VT, B,
2092 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2095 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2096 MVT VT = Op.getValueType();
2097 SDValue N = Op.getOperand(0);
2098 SDValue Elt = Op.getOperand(1);
2099 SDValue ShufMask[16];
2100 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2102 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2104 int EltNo = (int) C->getZExtValue();
2107 if (VT == MVT::i8 && EltNo >= 16)
2108 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2109 else if (VT == MVT::i16 && EltNo >= 8)
2110 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2111 else if (VT == MVT::i32 && EltNo >= 4)
2112 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2113 else if (VT == MVT::i64 && EltNo >= 2)
2114 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2116 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2117 // i32 and i64: Element 0 is the preferred slot
2118 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2121 // Need to generate shuffle mask and extract:
2122 int prefslot_begin = -1, prefslot_end = -1;
2123 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2125 switch (VT.getSimpleVT()) {
2127 assert(false && "Invalid value type!");
2129 prefslot_begin = prefslot_end = 3;
2133 prefslot_begin = 2; prefslot_end = 3;
2137 prefslot_begin = 0; prefslot_end = 3;
2141 prefslot_begin = 0; prefslot_end = 7;
2146 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2147 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2149 for (int i = 0; i < 16; ++i) {
2150 // zero fill uppper part of preferred slot, don't care about the
2152 unsigned int mask_val;
2154 if (i <= prefslot_end) {
2156 ((i < prefslot_begin)
2158 : elt_byte + (i - prefslot_begin));
2160 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2162 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2165 SDValue ShufMaskVec =
2166 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2168 sizeof(ShufMask) / sizeof(ShufMask[0]));
2170 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2171 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2172 N, N, ShufMaskVec));
2176 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2177 SDValue VecOp = Op.getOperand(0);
2178 SDValue ValOp = Op.getOperand(1);
2179 SDValue IdxOp = Op.getOperand(2);
2180 MVT VT = Op.getValueType();
2182 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2183 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2185 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2186 // Use $2 because it's always 16-byte aligned and it's available:
2187 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2190 DAG.getNode(SPUISD::SHUFB, VT,
2191 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2193 DAG.getNode(SPUISD::INSERT_MASK, VT,
2194 DAG.getNode(ISD::ADD, PtrVT,
2196 DAG.getConstant(CN->getZExtValue(),
2202 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2204 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2206 assert(Op.getValueType() == MVT::i8);
2209 assert(0 && "Unhandled i8 math operator");
2213 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2215 SDValue N1 = Op.getOperand(1);
2216 N0 = (N0.getOpcode() != ISD::Constant
2217 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2218 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2220 N1 = (N1.getOpcode() != ISD::Constant
2221 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2222 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2224 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225 DAG.getNode(Opc, MVT::i16, N0, N1));
2229 SDValue N1 = Op.getOperand(1);
2231 N0 = (N0.getOpcode() != ISD::Constant
2232 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2233 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2235 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2238 N1 = (N1.getOpcode() != ISD::Constant
2239 ? DAG.getNode(N1Opc, MVT::i16, N1)
2240 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2243 DAG.getNode(ISD::OR, MVT::i16, N0,
2244 DAG.getNode(ISD::SHL, MVT::i16,
2245 N0, DAG.getConstant(8, MVT::i16)));
2246 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2251 SDValue N1 = Op.getOperand(1);
2253 N0 = (N0.getOpcode() != ISD::Constant
2254 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2255 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2257 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2260 N1 = (N1.getOpcode() != ISD::Constant
2261 ? DAG.getNode(N1Opc, MVT::i16, N1)
2262 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2264 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265 DAG.getNode(Opc, MVT::i16, N0, N1));
2268 SDValue N1 = Op.getOperand(1);
2270 N0 = (N0.getOpcode() != ISD::Constant
2271 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2272 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2274 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2277 N1 = (N1.getOpcode() != ISD::Constant
2278 ? DAG.getNode(N1Opc, MVT::i16, N1)
2279 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2281 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2282 DAG.getNode(Opc, MVT::i16, N0, N1));
2285 SDValue N1 = Op.getOperand(1);
2287 N0 = (N0.getOpcode() != ISD::Constant
2288 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2289 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2291 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2292 N1 = (N1.getOpcode() != ISD::Constant
2293 ? DAG.getNode(N1Opc, MVT::i16, N1)
2294 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2296 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2297 DAG.getNode(Opc, MVT::i16, N0, N1));
2305 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2307 MVT VT = Op.getValueType();
2308 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2310 SDValue Op0 = Op.getOperand(0);
2313 case ISD::ZERO_EXTEND:
2314 case ISD::SIGN_EXTEND:
2315 case ISD::ANY_EXTEND: {
2316 MVT Op0VT = Op0.getValueType();
2317 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2319 assert(Op0VT == MVT::i32
2320 && "CellSPU: Zero/sign extending something other than i32");
2321 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2323 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2324 ? SPUISD::ROTBYTES_RIGHT_S
2325 : SPUISD::ROTQUAD_RZ_BYTES);
2326 SDValue PromoteScalar =
2327 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2329 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2330 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2331 DAG.getNode(NewOpc, Op0VecVT,
2333 DAG.getConstant(4, MVT::i32))));
2337 // Turn operands into vectors to satisfy type checking (shufb works on
2340 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2342 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2343 SmallVector<SDValue, 16> ShufBytes;
2345 // Create the shuffle mask for "rotating" the borrow up one register slot
2346 // once the borrow is generated.
2347 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2348 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2349 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2350 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2353 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2354 SDValue ShiftedCarry =
2355 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2357 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2358 &ShufBytes[0], ShufBytes.size()));
2360 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2361 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2362 Op0, Op1, ShiftedCarry));
2366 // Turn operands into vectors to satisfy type checking (shufb works on
2369 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2371 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2372 SmallVector<SDValue, 16> ShufBytes;
2374 // Create the shuffle mask for "rotating" the borrow up one register slot
2375 // once the borrow is generated.
2376 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2377 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2378 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2379 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2382 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2383 SDValue ShiftedBorrow =
2384 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2385 BorrowGen, BorrowGen,
2386 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2387 &ShufBytes[0], ShufBytes.size()));
2389 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2390 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2391 Op0, Op1, ShiftedBorrow));
2395 SDValue ShiftAmt = Op.getOperand(1);
2396 MVT ShiftAmtVT = ShiftAmt.getValueType();
2397 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2399 DAG.getNode(SPUISD::SELB, VecVT,
2401 DAG.getConstant(0, VecVT),
2402 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2403 DAG.getConstant(0xff00ULL, MVT::i16)));
2404 SDValue ShiftAmtBytes =
2405 DAG.getNode(ISD::SRL, ShiftAmtVT,
2407 DAG.getConstant(3, ShiftAmtVT));
2408 SDValue ShiftAmtBits =
2409 DAG.getNode(ISD::AND, ShiftAmtVT,
2411 DAG.getConstant(7, ShiftAmtVT));
2413 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2414 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2415 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2416 MaskLower, ShiftAmtBytes),
2421 MVT VT = Op.getValueType();
2422 SDValue ShiftAmt = Op.getOperand(1);
2423 MVT ShiftAmtVT = ShiftAmt.getValueType();
2424 SDValue ShiftAmtBytes =
2425 DAG.getNode(ISD::SRL, ShiftAmtVT,
2427 DAG.getConstant(3, ShiftAmtVT));
2428 SDValue ShiftAmtBits =
2429 DAG.getNode(ISD::AND, ShiftAmtVT,
2431 DAG.getConstant(7, ShiftAmtVT));
2433 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2434 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2435 Op0, ShiftAmtBytes),
2440 // Promote Op0 to vector
2442 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2443 SDValue ShiftAmt = Op.getOperand(1);
2444 MVT ShiftVT = ShiftAmt.getValueType();
2446 // Negate variable shift amounts
2447 if (!isa<ConstantSDNode>(ShiftAmt)) {
2448 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2449 DAG.getConstant(0, ShiftVT), ShiftAmt);
2452 SDValue UpperHalfSign =
2453 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2454 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2455 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2456 Op0, DAG.getConstant(31, MVT::i32))));
2457 SDValue UpperHalfSignMask =
2458 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2459 SDValue UpperLowerMask =
2460 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2461 DAG.getConstant(0xff00, MVT::i16));
2462 SDValue UpperLowerSelect =
2463 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2464 UpperHalfSignMask, Op0, UpperLowerMask);
2465 SDValue RotateLeftBytes =
2466 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2467 UpperLowerSelect, ShiftAmt);
2468 SDValue RotateLeftBits =
2469 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2470 RotateLeftBytes, ShiftAmt);
2472 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2480 //! Lower byte immediate operations for v16i8 vectors:
2482 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2485 MVT VT = Op.getValueType();
2487 ConstVec = Op.getOperand(0);
2488 Arg = Op.getOperand(1);
2489 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2490 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2491 ConstVec = ConstVec.getOperand(0);
2493 ConstVec = Op.getOperand(1);
2494 Arg = Op.getOperand(0);
2495 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2496 ConstVec = ConstVec.getOperand(0);
2501 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2502 uint64_t VectorBits[2];
2503 uint64_t UndefBits[2];
2504 uint64_t SplatBits, SplatUndef;
2507 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2508 && isConstantSplat(VectorBits, UndefBits,
2509 VT.getVectorElementType().getSizeInBits(),
2510 SplatBits, SplatUndef, SplatSize)) {
2512 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2513 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2515 // Turn the BUILD_VECTOR into a set of target constants:
2516 for (size_t i = 0; i < tcVecSize; ++i)
2519 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2520 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2523 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2524 // lowered. Return the operation, rather than a null SDValue.
2528 //! Lower i32 multiplication
2529 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2531 switch (VT.getSimpleVT()) {
2533 cerr << "CellSPU: Unknown LowerMUL value type, got "
2534 << Op.getValueType().getMVTString()
2540 SDValue rA = Op.getOperand(0);
2541 SDValue rB = Op.getOperand(1);
2543 return DAG.getNode(ISD::ADD, MVT::i32,
2544 DAG.getNode(ISD::ADD, MVT::i32,
2545 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2546 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2547 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2554 //! Custom lowering for CTPOP (count population)
2556 Custom lowering code that counts the number ones in the input
2557 operand. SPU has such an instruction, but it counts the number of
2558 ones per byte, which then have to be accumulated.
2560 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2561 MVT VT = Op.getValueType();
2562 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2564 switch (VT.getSimpleVT()) {
2566 assert(false && "Invalid value type!");
2568 SDValue N = Op.getOperand(0);
2569 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2571 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2572 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2574 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2578 MachineFunction &MF = DAG.getMachineFunction();
2579 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2581 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2583 SDValue N = Op.getOperand(0);
2584 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2585 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2586 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2588 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2589 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2591 // CNTB_result becomes the chain to which all of the virtual registers
2592 // CNTB_reg, SUM1_reg become associated:
2593 SDValue CNTB_result =
2594 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2596 SDValue CNTB_rescopy =
2597 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2599 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2601 return DAG.getNode(ISD::AND, MVT::i16,
2602 DAG.getNode(ISD::ADD, MVT::i16,
2603 DAG.getNode(ISD::SRL, MVT::i16,
2610 MachineFunction &MF = DAG.getMachineFunction();
2611 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2613 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2614 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2616 SDValue N = Op.getOperand(0);
2617 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2618 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2619 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2620 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2622 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2623 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2625 // CNTB_result becomes the chain to which all of the virtual registers
2626 // CNTB_reg, SUM1_reg become associated:
2627 SDValue CNTB_result =
2628 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2630 SDValue CNTB_rescopy =
2631 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2634 DAG.getNode(ISD::SRL, MVT::i32,
2635 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2638 DAG.getNode(ISD::ADD, MVT::i32,
2639 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2641 SDValue Sum1_rescopy =
2642 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2645 DAG.getNode(ISD::SRL, MVT::i32,
2646 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2649 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2650 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2652 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2662 /// LowerOperation - Provide custom lowering hooks for some operations.
2665 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2667 unsigned Opc = (unsigned) Op.getOpcode();
2668 MVT VT = Op.getValueType();
2672 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2673 cerr << "Op.getOpcode() = " << Opc << "\n";
2674 cerr << "*Op.getNode():\n";
2675 Op.getNode()->dump();
2681 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2683 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2684 case ISD::ConstantPool:
2685 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2686 case ISD::GlobalAddress:
2687 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2688 case ISD::JumpTable:
2689 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2691 return LowerConstant(Op, DAG);
2692 case ISD::ConstantFP:
2693 return LowerConstantFP(Op, DAG);
2695 return LowerBRCOND(Op, DAG);
2696 case ISD::FORMAL_ARGUMENTS:
2697 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2699 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2701 return LowerRET(Op, DAG, getTargetMachine());
2704 // i8, i64 math ops:
2705 case ISD::ZERO_EXTEND:
2706 case ISD::SIGN_EXTEND:
2707 case ISD::ANY_EXTEND:
2716 return LowerI8Math(Op, DAG, Opc);
2717 else if (VT == MVT::i64)
2718 return LowerI64Math(Op, DAG, Opc);
2722 // Vector-related lowering.
2723 case ISD::BUILD_VECTOR:
2724 return LowerBUILD_VECTOR(Op, DAG);
2725 case ISD::SCALAR_TO_VECTOR:
2726 return LowerSCALAR_TO_VECTOR(Op, DAG);
2727 case ISD::VECTOR_SHUFFLE:
2728 return LowerVECTOR_SHUFFLE(Op, DAG);
2729 case ISD::EXTRACT_VECTOR_ELT:
2730 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2731 case ISD::INSERT_VECTOR_ELT:
2732 return LowerINSERT_VECTOR_ELT(Op, DAG);
2734 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2738 return LowerByteImmed(Op, DAG);
2740 // Vector and i8 multiply:
2743 return LowerVectorMUL(Op, DAG);
2744 else if (VT == MVT::i8)
2745 return LowerI8Math(Op, DAG, Opc);
2747 return LowerMUL(Op, DAG, VT, Opc);
2750 if (VT == MVT::f32 || VT == MVT::v4f32)
2751 return LowerFDIVf32(Op, DAG);
2752 // else if (Op.getValueType() == MVT::f64)
2753 // return LowerFDIVf64(Op, DAG);
2755 assert(0 && "Calling FDIV on unsupported MVT");
2758 return LowerCTPOP(Op, DAG);
2764 //===----------------------------------------------------------------------===//
2765 // Target Optimization Hooks
2766 //===----------------------------------------------------------------------===//
2769 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2772 TargetMachine &TM = getTargetMachine();
2774 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2775 SelectionDAG &DAG = DCI.DAG;
2776 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2777 SDValue Result; // Initially, NULL result
2779 switch (N->getOpcode()) {
2782 SDValue Op1 = N->getOperand(1);
2784 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2785 SDValue Op01 = Op0.getOperand(1);
2786 if (Op01.getOpcode() == ISD::Constant
2787 || Op01.getOpcode() == ISD::TargetConstant) {
2788 // (add <const>, (SPUindirect <arg>, <const>)) ->
2789 // (SPUindirect <arg>, <const + const>)
2790 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2791 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2792 SDValue combinedConst =
2793 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2794 Op0.getValueType());
2796 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2797 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2798 DEBUG(cerr << "With: (SPUindirect <arg>, "
2799 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2800 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2801 Op0.getOperand(0), combinedConst);
2803 } else if (isa<ConstantSDNode>(Op0)
2804 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2805 SDValue Op11 = Op1.getOperand(1);
2806 if (Op11.getOpcode() == ISD::Constant
2807 || Op11.getOpcode() == ISD::TargetConstant) {
2808 // (add (SPUindirect <arg>, <const>), <const>) ->
2809 // (SPUindirect <arg>, <const + const>)
2810 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2811 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2812 SDValue combinedConst =
2813 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2814 Op0.getValueType());
2816 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2817 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2818 DEBUG(cerr << "With: (SPUindirect <arg>, "
2819 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2821 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2822 Op1.getOperand(0), combinedConst);
2827 case ISD::SIGN_EXTEND:
2828 case ISD::ZERO_EXTEND:
2829 case ISD::ANY_EXTEND: {
2830 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2831 N->getValueType(0) == Op0.getValueType()) {
2832 // (any_extend (SPUextract_elt0 <arg>)) ->
2833 // (SPUextract_elt0 <arg>)
2834 // Types must match, however...
2835 DEBUG(cerr << "Replace: ");
2836 DEBUG(N->dump(&DAG));
2837 DEBUG(cerr << "\nWith: ");
2838 DEBUG(Op0.getNode()->dump(&DAG));
2839 DEBUG(cerr << "\n");
2845 case SPUISD::IndirectAddr: {
2846 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2847 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2848 if (CN->getZExtValue() == 0) {
2849 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2850 // (SPUaform <addr>, 0)
2852 DEBUG(cerr << "Replace: ");
2853 DEBUG(N->dump(&DAG));
2854 DEBUG(cerr << "\nWith: ");
2855 DEBUG(Op0.getNode()->dump(&DAG));
2856 DEBUG(cerr << "\n");
2863 case SPUISD::SHLQUAD_L_BITS:
2864 case SPUISD::SHLQUAD_L_BYTES:
2865 case SPUISD::VEC_SHL:
2866 case SPUISD::VEC_SRL:
2867 case SPUISD::VEC_SRA:
2868 case SPUISD::ROTQUAD_RZ_BYTES:
2869 case SPUISD::ROTQUAD_RZ_BITS: {
2870 SDValue Op1 = N->getOperand(1);
2872 if (isa<ConstantSDNode>(Op1)) {
2873 // Kill degenerate vector shifts:
2874 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2876 if (CN->getZExtValue() == 0) {
2882 case SPUISD::PROMOTE_SCALAR: {
2883 switch (Op0.getOpcode()) {
2886 case ISD::ANY_EXTEND:
2887 case ISD::ZERO_EXTEND:
2888 case ISD::SIGN_EXTEND: {
2889 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2891 // but only if the SPUpromote_scalar and <arg> types match.
2892 SDValue Op00 = Op0.getOperand(0);
2893 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2894 SDValue Op000 = Op00.getOperand(0);
2895 if (Op000.getValueType() == N->getValueType(0)) {
2901 case SPUISD::EXTRACT_ELT0: {
2902 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2904 Result = Op0.getOperand(0);
2911 // Otherwise, return unchanged.
2913 if (Result.getNode()) {
2914 DEBUG(cerr << "\nReplace.SPU: ");
2915 DEBUG(N->dump(&DAG));
2916 DEBUG(cerr << "\nWith: ");
2917 DEBUG(Result.getNode()->dump(&DAG));
2918 DEBUG(cerr << "\n");
2925 //===----------------------------------------------------------------------===//
2926 // Inline Assembly Support
2927 //===----------------------------------------------------------------------===//
2929 /// getConstraintType - Given a constraint letter, return the type of
2930 /// constraint it is for this target.
2931 SPUTargetLowering::ConstraintType
2932 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2933 if (ConstraintLetter.size() == 1) {
2934 switch (ConstraintLetter[0]) {
2941 return C_RegisterClass;
2944 return TargetLowering::getConstraintType(ConstraintLetter);
2947 std::pair<unsigned, const TargetRegisterClass*>
2948 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2951 if (Constraint.size() == 1) {
2952 // GCC RS6000 Constraint Letters
2953 switch (Constraint[0]) {
2957 return std::make_pair(0U, SPU::R64CRegisterClass);
2958 return std::make_pair(0U, SPU::R32CRegisterClass);
2961 return std::make_pair(0U, SPU::R32FPRegisterClass);
2962 else if (VT == MVT::f64)
2963 return std::make_pair(0U, SPU::R64FPRegisterClass);
2966 return std::make_pair(0U, SPU::GPRCRegisterClass);
2970 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2973 //! Compute used/known bits for a SPU operand
2975 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2979 const SelectionDAG &DAG,
2980 unsigned Depth ) const {
2982 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2985 switch (Op.getOpcode()) {
2987 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2997 case SPUISD::PROMOTE_SCALAR: {
2998 SDValue Op0 = Op.getOperand(0);
2999 MVT Op0VT = Op0.getValueType();
3000 unsigned Op0VTBits = Op0VT.getSizeInBits();
3001 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3002 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3003 KnownOne |= APInt(Op0VTBits, InMask, false);
3007 case SPUISD::LDRESULT:
3008 case SPUISD::EXTRACT_ELT0:
3009 case SPUISD::EXTRACT_ELT0_CHAINED: {
3010 MVT OpVT = Op.getValueType();
3011 unsigned OpVTBits = OpVT.getSizeInBits();
3012 uint64_t InMask = OpVT.getIntegerVTBitMask();
3013 KnownZero |= APInt(OpVTBits, ~InMask, false);
3014 KnownOne |= APInt(OpVTBits, InMask, false);
3019 case EXTRACT_I1_ZEXT:
3020 case EXTRACT_I1_SEXT:
3021 case EXTRACT_I8_ZEXT:
3022 case EXTRACT_I8_SEXT:
3027 case SPUISD::SHLQUAD_L_BITS:
3028 case SPUISD::SHLQUAD_L_BYTES:
3029 case SPUISD::VEC_SHL:
3030 case SPUISD::VEC_SRL:
3031 case SPUISD::VEC_SRA:
3032 case SPUISD::VEC_ROTL:
3033 case SPUISD::VEC_ROTR:
3034 case SPUISD::ROTQUAD_RZ_BYTES:
3035 case SPUISD::ROTQUAD_RZ_BITS:
3036 case SPUISD::ROTBYTES_RIGHT_S:
3037 case SPUISD::ROTBYTES_LEFT:
3038 case SPUISD::ROTBYTES_LEFT_CHAINED:
3039 case SPUISD::SELECT_MASK:
3041 case SPUISD::FPInterp:
3042 case SPUISD::FPRecipEst:
3043 case SPUISD::SEXT32TO64:
3048 // LowerAsmOperandForConstraint
3050 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3051 char ConstraintLetter,
3052 std::vector<SDValue> &Ops,
3053 SelectionDAG &DAG) const {
3054 // Default, for the time being, to the base class handler
3055 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3058 /// isLegalAddressImmediate - Return true if the integer value can be used
3059 /// as the offset of the target addressing mode.
3060 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3061 const Type *Ty) const {
3062 // SPU's addresses are 256K:
3063 return (V > -(1 << 18) && V < (1 << 18) - 1);
3066 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {