1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::Constant, MVT::i64, Custom);
156 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
157 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
159 // SPU's loads and stores have to be custom lowered:
160 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
162 MVT VT = (MVT::SimpleValueType)sctype;
164 setOperationAction(ISD::LOAD, VT, Custom);
165 setOperationAction(ISD::STORE, VT, Custom);
168 // Custom lower BRCOND for i8 to "promote" the result to i16
169 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
171 // Expand the jumptable branches
172 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
174 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
176 // SPU has no intrinsics for these particular operations:
177 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
179 // PowerPC has no SREM/UREM instructions
180 setOperationAction(ISD::SREM, MVT::i32, Expand);
181 setOperationAction(ISD::UREM, MVT::i32, Expand);
182 setOperationAction(ISD::SREM, MVT::i64, Expand);
183 setOperationAction(ISD::UREM, MVT::i64, Expand);
185 // We don't support sin/cos/sqrt/fmod
186 setOperationAction(ISD::FSIN , MVT::f64, Expand);
187 setOperationAction(ISD::FCOS , MVT::f64, Expand);
188 setOperationAction(ISD::FREM , MVT::f64, Expand);
189 setOperationAction(ISD::FSIN , MVT::f32, Expand);
190 setOperationAction(ISD::FCOS , MVT::f32, Expand);
191 setOperationAction(ISD::FREM , MVT::f32, Expand);
193 // If we're enabling GP optimizations, use hardware square root
194 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
195 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
197 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
198 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
200 // SPU can do rotate right and left, so legalize it... but customize for i8
201 // because instructions don't exist.
203 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
205 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
206 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
207 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
209 setOperationAction(ISD::ROTL, MVT::i32, Legal);
210 setOperationAction(ISD::ROTL, MVT::i16, Legal);
211 setOperationAction(ISD::ROTL, MVT::i8, Custom);
213 // SPU has no native version of shift left/right for i8
214 setOperationAction(ISD::SHL, MVT::i8, Custom);
215 setOperationAction(ISD::SRL, MVT::i8, Custom);
216 setOperationAction(ISD::SRA, MVT::i8, Custom);
218 // SPU needs custom lowering for shift left/right for i64
219 setOperationAction(ISD::SHL, MVT::i64, Custom);
220 setOperationAction(ISD::SRL, MVT::i64, Custom);
221 setOperationAction(ISD::SRA, MVT::i64, Custom);
223 // Custom lower i8, i32 and i64 multiplications
224 setOperationAction(ISD::MUL, MVT::i8, Custom);
225 setOperationAction(ISD::MUL, MVT::i32, Custom);
226 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
228 // SMUL_LOHI, UMUL_LOHI
229 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
230 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
231 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
232 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
234 // Need to custom handle (some) common i8, i64 math ops
235 setOperationAction(ISD::ADD, MVT::i64, Custom);
236 setOperationAction(ISD::SUB, MVT::i8, Custom);
237 setOperationAction(ISD::SUB, MVT::i64, Custom);
239 // SPU does not have BSWAP. It does have i32 support CTLZ.
240 // CTPOP has to be custom lowered.
241 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
242 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
244 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
245 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
246 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
247 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
249 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
250 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
252 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
254 // SPU has a version of select that implements (a&~c)|(b&c), just like
255 // select ought to work:
256 setOperationAction(ISD::SELECT, MVT::i8, Legal);
257 setOperationAction(ISD::SELECT, MVT::i16, Legal);
258 setOperationAction(ISD::SELECT, MVT::i32, Legal);
259 setOperationAction(ISD::SELECT, MVT::i64, Expand);
261 setOperationAction(ISD::SETCC, MVT::i8, Legal);
262 setOperationAction(ISD::SETCC, MVT::i16, Legal);
263 setOperationAction(ISD::SETCC, MVT::i32, Legal);
264 setOperationAction(ISD::SETCC, MVT::i64, Expand);
266 // Zero extension and sign extension for i64 have to be
268 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
269 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
270 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
272 // SPU has a legal FP -> signed INT instruction
273 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
274 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
275 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
276 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
278 // FDIV on SPU requires custom lowering
279 setOperationAction(ISD::FDIV, MVT::f32, Custom);
280 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
282 // SPU has [U|S]INT_TO_FP
283 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
288 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
289 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
290 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
292 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
293 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
294 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
295 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
297 // We cannot sextinreg(i1). Expand to shifts.
298 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
300 // Support label based line numbers.
301 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
302 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
304 // We want to legalize GlobalAddress and ConstantPool nodes into the
305 // appropriate instructions to materialize the address.
306 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
308 MVT VT = (MVT::SimpleValueType)sctype;
310 setOperationAction(ISD::GlobalAddress, VT, Custom);
311 setOperationAction(ISD::ConstantPool, VT, Custom);
312 setOperationAction(ISD::JumpTable, VT, Custom);
315 // RET must be custom lowered, to meet ABI requirements
316 setOperationAction(ISD::RET, MVT::Other, Custom);
318 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
319 setOperationAction(ISD::VASTART , MVT::Other, Custom);
321 // Use the default implementation.
322 setOperationAction(ISD::VAARG , MVT::Other, Expand);
323 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
324 setOperationAction(ISD::VAEND , MVT::Other, Expand);
325 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
326 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
327 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
328 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
330 // Cell SPU has instructions for converting between i64 and fp.
331 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
332 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
334 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
335 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
337 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
338 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
340 // First set operation action for all vector types to expand. Then we
341 // will selectively turn on ones that can be effectively codegen'd.
342 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
345 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
346 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
347 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
349 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
350 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
351 MVT VT = (MVT::SimpleValueType)i;
353 // add/sub are legal for all supported vector VT's.
354 setOperationAction(ISD::ADD , VT, Legal);
355 setOperationAction(ISD::SUB , VT, Legal);
356 // mul has to be custom lowered.
357 setOperationAction(ISD::MUL , VT, Custom);
359 setOperationAction(ISD::AND , VT, Legal);
360 setOperationAction(ISD::OR , VT, Legal);
361 setOperationAction(ISD::XOR , VT, Legal);
362 setOperationAction(ISD::LOAD , VT, Legal);
363 setOperationAction(ISD::SELECT, VT, Legal);
364 setOperationAction(ISD::STORE, VT, Legal);
366 // These operations need to be expanded:
367 setOperationAction(ISD::SDIV, VT, Expand);
368 setOperationAction(ISD::SREM, VT, Expand);
369 setOperationAction(ISD::UDIV, VT, Expand);
370 setOperationAction(ISD::UREM, VT, Expand);
371 setOperationAction(ISD::FDIV, VT, Custom);
373 // Custom lower build_vector, constant pool spills, insert and
374 // extract vector elements:
375 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
376 setOperationAction(ISD::ConstantPool, VT, Custom);
377 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
378 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
379 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
380 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
383 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
384 setOperationAction(ISD::AND, MVT::v16i8, Custom);
385 setOperationAction(ISD::OR, MVT::v16i8, Custom);
386 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
387 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
389 setShiftAmountType(MVT::i32);
390 setSetCCResultContents(ZeroOrOneSetCCResult);
392 setStackPointerRegisterToSaveRestore(SPU::R1);
394 // We have target-specific dag combine patterns for the following nodes:
395 setTargetDAGCombine(ISD::ADD);
396 setTargetDAGCombine(ISD::ZERO_EXTEND);
397 setTargetDAGCombine(ISD::SIGN_EXTEND);
398 setTargetDAGCombine(ISD::ANY_EXTEND);
400 computeRegisterProperties();
404 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
406 if (node_names.empty()) {
407 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
408 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
409 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
410 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
411 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
412 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
413 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
414 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
415 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
416 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
417 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
418 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
419 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
420 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
421 = "SPUISD::EXTRACT_ELT0_CHAINED";
422 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
423 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
424 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
425 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
426 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
427 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
428 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
429 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
430 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
431 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
432 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
433 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
434 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
435 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
436 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
437 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
438 "SPUISD::ROTQUAD_RZ_BYTES";
439 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
440 "SPUISD::ROTQUAD_RZ_BITS";
441 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
442 "SPUISD::ROTBYTES_RIGHT_S";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
444 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
445 "SPUISD::ROTBYTES_LEFT_CHAINED";
446 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
447 "SPUISD::ROTBYTES_LEFT_BITS";
448 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
449 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
450 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
451 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
452 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
453 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
454 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
455 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
456 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
459 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
461 return ((i != node_names.end()) ? i->second : 0);
464 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
465 MVT VT = Op.getValueType();
466 return (VT.isInteger() ? VT : MVT(MVT::i32));
469 //===----------------------------------------------------------------------===//
470 // Calling convention code:
471 //===----------------------------------------------------------------------===//
473 #include "SPUGenCallingConv.inc"
475 //===----------------------------------------------------------------------===//
476 // LowerOperation implementation
477 //===----------------------------------------------------------------------===//
479 /// Aligned load common code for CellSPU
481 \param[in] Op The SelectionDAG load or store operand
482 \param[in] DAG The selection DAG
483 \param[in] ST CellSPU subtarget information structure
484 \param[in,out] alignment Caller initializes this to the load or store node's
485 value from getAlignment(), may be updated while generating the aligned load
486 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
487 offset (divisible by 16, modulo 16 == 0)
488 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
489 offset of the preferred slot (modulo 16 != 0)
490 \param[in,out] VT Caller initializes this value type to the the load or store
491 node's loaded or stored value type; may be updated if an i1-extended load or
493 \param[out] was16aligned true if the base pointer had 16-byte alignment,
494 otherwise false. Can help to determine if the chunk needs to be rotated.
496 Both load and store lowering load a block of data aligned on a 16-byte
497 boundary. This is the common aligned load code shared between both.
500 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
502 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
503 MVT &VT, bool &was16aligned)
505 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
506 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
507 SDValue basePtr = LSN->getBasePtr();
508 SDValue chain = LSN->getChain();
510 if (basePtr.getOpcode() == ISD::ADD) {
511 SDValue Op1 = basePtr.getNode()->getOperand(1);
513 if (Op1.getOpcode() == ISD::Constant
514 || Op1.getOpcode() == ISD::TargetConstant) {
515 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
517 alignOffs = (int) CN->getZExtValue();
518 prefSlotOffs = (int) (alignOffs & 0xf);
520 // Adjust the rotation amount to ensure that the final result ends up in
521 // the preferred slot:
522 prefSlotOffs -= vtm->prefslot_byte;
523 basePtr = basePtr.getOperand(0);
525 // Loading from memory, can we adjust alignment?
526 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
527 SDValue APtr = basePtr.getOperand(0);
528 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
529 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
530 alignment = GSDN->getGlobal()->getAlignment();
535 prefSlotOffs = -vtm->prefslot_byte;
537 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
538 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
539 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
540 prefSlotOffs = (int) (alignOffs & 0xf);
541 prefSlotOffs -= vtm->prefslot_byte;
542 basePtr = DAG.getRegister(SPU::R1, VT);
545 prefSlotOffs = -vtm->prefslot_byte;
548 if (alignment == 16) {
549 // Realign the base pointer as a D-Form address:
550 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
551 basePtr = DAG.getNode(ISD::ADD, PtrVT,
553 DAG.getConstant((alignOffs & ~0xf), PtrVT));
556 // Emit the vector load:
558 return DAG.getLoad(MVT::v16i8, chain, basePtr,
559 LSN->getSrcValue(), LSN->getSrcValueOffset(),
560 LSN->isVolatile(), 16);
563 // Unaligned load or we're using the "large memory" model, which means that
564 // we have to be very pessimistic:
565 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
566 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
567 DAG.getConstant(0, PtrVT));
571 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
572 DAG.getConstant((alignOffs & ~0xf), PtrVT));
573 was16aligned = false;
574 return DAG.getLoad(MVT::v16i8, chain, basePtr,
575 LSN->getSrcValue(), LSN->getSrcValueOffset(),
576 LSN->isVolatile(), 16);
579 /// Custom lower loads for CellSPU
581 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
582 within a 16-byte block, we have to rotate to extract the requested element.
585 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
586 LoadSDNode *LN = cast<LoadSDNode>(Op);
587 SDValue the_chain = LN->getChain();
588 MVT VT = LN->getMemoryVT();
589 MVT OpVT = Op.getNode()->getValueType(0);
590 ISD::LoadExtType ExtType = LN->getExtensionType();
591 unsigned alignment = LN->getAlignment();
594 switch (LN->getAddressingMode()) {
595 case ISD::UNINDEXED: {
599 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
601 if (result.getNode() == 0)
604 the_chain = result.getValue(1);
605 // Rotate the chunk if necessary
608 if (rotamt != 0 || !was16aligned) {
609 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
614 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
616 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
617 LoadSDNode *LN1 = cast<LoadSDNode>(result);
618 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
619 DAG.getConstant(rotamt, PtrVT));
622 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
623 the_chain = result.getValue(1);
626 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
628 MVT vecVT = MVT::v16i8;
630 // Convert the loaded v16i8 vector to the appropriate vector type
631 // specified by the operand:
634 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
636 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
639 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
640 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
641 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
642 the_chain = result.getValue(1);
644 // Handle the sign and zero-extending loads for i1 and i8:
647 if (ExtType == ISD::SEXTLOAD) {
648 NewOpC = (OpVT == MVT::i1
649 ? SPUISD::EXTRACT_I1_SEXT
650 : SPUISD::EXTRACT_I8_SEXT);
652 assert(ExtType == ISD::ZEXTLOAD);
653 NewOpC = (OpVT == MVT::i1
654 ? SPUISD::EXTRACT_I1_ZEXT
655 : SPUISD::EXTRACT_I8_ZEXT);
658 result = DAG.getNode(NewOpC, OpVT, result);
661 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
662 SDValue retops[2] = {
667 result = DAG.getNode(SPUISD::LDRESULT, retvts,
668 retops, sizeof(retops) / sizeof(retops[0]));
675 case ISD::LAST_INDEXED_MODE:
676 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
678 cerr << (unsigned) LN->getAddressingMode() << "\n";
686 /// Custom lower stores for CellSPU
688 All CellSPU stores are aligned to 16-byte boundaries, so for elements
689 within a 16-byte block, we have to generate a shuffle to insert the
690 requested element into its place, then store the resulting block.
693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
694 StoreSDNode *SN = cast<StoreSDNode>(Op);
695 SDValue Value = SN->getValue();
696 MVT VT = Value.getValueType();
697 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
698 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
699 unsigned alignment = SN->getAlignment();
701 switch (SN->getAddressingMode()) {
702 case ISD::UNINDEXED: {
703 int chunk_offset, slot_offset;
706 // The vector type we really want to load from the 16-byte chunk.
707 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
708 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
710 SDValue alignLoadVec =
711 AlignedLoad(Op, DAG, ST, SN, alignment,
712 chunk_offset, slot_offset, VT, was16aligned);
714 if (alignLoadVec.getNode() == 0)
717 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
718 SDValue basePtr = LN->getBasePtr();
719 SDValue the_chain = alignLoadVec.getValue(1);
720 SDValue theValue = SN->getValue();
724 && (theValue.getOpcode() == ISD::AssertZext
725 || theValue.getOpcode() == ISD::AssertSext)) {
726 // Drill down and get the value for zero- and sign-extended
728 theValue = theValue.getOperand(0);
733 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
734 SDValue insertEltPtr;
736 // If the base pointer is already a D-form address, then just create
737 // a new D-form address with a slot offset and the orignal base pointer.
738 // Otherwise generate a D-form address with the slot offset relative
739 // to the stack pointer, which is always aligned.
740 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
741 DEBUG(basePtr.getNode()->dump(&DAG));
744 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
745 (basePtr.getOpcode() == ISD::ADD
746 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
747 insertEltPtr = basePtr;
749 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
752 SDValue insertEltOp =
753 DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
754 SDValue vectorizeOp =
755 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
757 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
758 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
760 result = DAG.getStore(the_chain, result, basePtr,
761 LN->getSrcValue(), LN->getSrcValueOffset(),
762 LN->isVolatile(), LN->getAlignment());
764 #if 0 && defined(NDEBUG)
765 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
766 const SDValue ¤tRoot = DAG.getRoot();
769 cerr << "------- CellSPU:LowerStore result:\n";
772 DAG.setRoot(currentRoot);
783 case ISD::LAST_INDEXED_MODE:
784 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
786 cerr << (unsigned) SN->getAddressingMode() << "\n";
794 /// Generate the address of a constant pool entry.
796 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
797 MVT PtrVT = Op.getValueType();
798 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
799 Constant *C = CP->getConstVal();
800 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
801 SDValue Zero = DAG.getConstant(0, PtrVT);
802 const TargetMachine &TM = DAG.getTarget();
804 if (TM.getRelocationModel() == Reloc::Static) {
805 if (!ST->usingLargeMem()) {
806 // Just return the SDValue with the constant pool address in it.
807 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
809 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
810 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
811 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
816 "LowerConstantPool: Relocation model other than static"
822 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
823 MVT PtrVT = Op.getValueType();
824 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
825 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
829 if (TM.getRelocationModel() == Reloc::Static) {
830 if (!ST->usingLargeMem()) {
831 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
833 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
834 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
835 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
840 "LowerJumpTable: Relocation model other than static not supported.");
845 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
846 MVT PtrVT = Op.getValueType();
847 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
848 GlobalValue *GV = GSDN->getGlobal();
849 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
850 const TargetMachine &TM = DAG.getTarget();
851 SDValue Zero = DAG.getConstant(0, PtrVT);
853 if (TM.getRelocationModel() == Reloc::Static) {
854 if (!ST->usingLargeMem()) {
855 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
857 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
858 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
859 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
862 cerr << "LowerGlobalAddress: Relocation model other than static not "
871 //! Custom lower i64 integer constants
873 This code inserts all of the necessary juggling that needs to occur to load
874 a 64-bit constant into a register.
877 LowerConstant(SDValue Op, SelectionDAG &DAG) {
878 MVT VT = Op.getValueType();
879 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
881 if (VT == MVT::i64) {
882 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
883 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
884 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
886 cerr << "LowerConstant: unhandled constant type "
896 //! Custom lower double precision floating point constants
898 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
899 MVT VT = Op.getValueType();
900 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
903 "LowerConstantFP: Node is not ConstantFPSDNode");
905 if (VT == MVT::f64) {
906 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
907 return DAG.getNode(ISD::BIT_CONVERT, VT,
908 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
914 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
916 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
918 SDValue Cond = Op.getOperand(1);
919 MVT CondVT = Cond.getValueType();
922 if (CondVT == MVT::i8) {
924 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
926 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
929 return SDValue(); // Unchanged
933 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
935 MachineFunction &MF = DAG.getMachineFunction();
936 MachineFrameInfo *MFI = MF.getFrameInfo();
937 MachineRegisterInfo &RegInfo = MF.getRegInfo();
938 SmallVector<SDValue, 48> ArgValues;
939 SDValue Root = Op.getOperand(0);
940 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
942 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset = SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx = 0;
947 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
949 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
953 ArgNo != e; ++ArgNo) {
954 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
955 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
958 if (ArgRegIdx < NumArgRegs) {
959 const TargetRegisterClass *ArgRegClass;
961 switch (ObjectVT.getSimpleVT()) {
963 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT.getMVTString()
969 ArgRegClass = &SPU::R8CRegClass;
972 ArgRegClass = &SPU::R16CRegClass;
975 ArgRegClass = &SPU::R32CRegClass;
978 ArgRegClass = &SPU::R64CRegClass;
981 ArgRegClass = &SPU::R32FPRegClass;
984 ArgRegClass = &SPU::R64FPRegClass;
992 ArgRegClass = &SPU::VECREGRegClass;
996 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
997 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
998 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1001 // We need to load the argument to a virtual register if we determined
1002 // above that we ran out of physical registers of the appropriate type
1003 // or we're forced to do vararg
1004 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1005 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1006 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1007 ArgOffset += StackSlotSize;
1010 ArgValues.push_back(ArgVal);
1012 Root = ArgVal.getOperand(0);
1017 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1018 // We will spill (79-3)+1 registers to the stack
1019 SmallVector<SDValue, 79-3+1> MemOps;
1021 // Create the frame slot
1023 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1024 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1025 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1026 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1027 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1028 Root = Store.getOperand(0);
1029 MemOps.push_back(Store);
1031 // Increment address by stack slot size for the next stored argument
1032 ArgOffset += StackSlotSize;
1034 if (!MemOps.empty())
1035 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1038 ArgValues.push_back(Root);
1040 // Return the new list of results.
1041 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1045 /// isLSAAddress - Return the immediate to use if the specified
1046 /// value is representable as a LSA address.
1047 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1048 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1051 int Addr = C->getZExtValue();
1052 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1053 (Addr << 14 >> 14) != Addr)
1054 return 0; // Top 14 bits have to be sext of immediate.
1056 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1061 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1062 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1063 SDValue Chain = TheCall->getChain();
1064 SDValue Callee = TheCall->getCallee();
1065 unsigned NumOps = TheCall->getNumArgs();
1066 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1067 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1068 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1070 // Handy pointer type
1071 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1073 // Accumulate how many bytes are to be pushed on the stack, including the
1074 // linkage area, and parameter passing area. According to the SPU ABI,
1075 // we minimally need space for [LR] and [SP]
1076 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1078 // Set up a copy of the stack pointer for use loading and storing any
1079 // arguments that may not fit in the registers available for argument
1081 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1083 // Figure out which arguments are going to go in registers, and which in
1085 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1086 unsigned ArgRegIdx = 0;
1088 // Keep track of registers passing arguments
1089 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1090 // And the arguments passed on the stack
1091 SmallVector<SDValue, 8> MemOpChains;
1093 for (unsigned i = 0; i != NumOps; ++i) {
1094 SDValue Arg = TheCall->getArg(i);
1096 // PtrOff will be used to store the current argument to the stack if a
1097 // register cannot be found for it.
1098 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1099 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1101 switch (Arg.getValueType().getSimpleVT()) {
1102 default: assert(0 && "Unexpected ValueType for argument!");
1106 if (ArgRegIdx != NumArgRegs) {
1107 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1109 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1110 ArgOffset += StackSlotSize;
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1126 if (ArgRegIdx != NumArgRegs) {
1127 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1130 ArgOffset += StackSlotSize;
1136 // Update number of stack bytes actually used, insert a call sequence start
1137 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1138 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1141 if (!MemOpChains.empty()) {
1142 // Adjust the stack pointer for the stack arguments.
1143 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1144 &MemOpChains[0], MemOpChains.size());
1147 // Build a sequence of copy-to-reg nodes chained together with token chain
1148 // and flag operands which copy the outgoing args into the appropriate regs.
1150 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1151 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1153 InFlag = Chain.getValue(1);
1156 SmallVector<SDValue, 8> Ops;
1157 unsigned CallOpc = SPUISD::CALL;
1159 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1160 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1161 // node so that legalize doesn't hack it.
1162 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1163 GlobalValue *GV = G->getGlobal();
1164 MVT CalleeVT = Callee.getValueType();
1165 SDValue Zero = DAG.getConstant(0, PtrVT);
1166 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1168 if (!ST->usingLargeMem()) {
1169 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1170 // style calls, otherwise, external symbols are BRASL calls. This assumes
1171 // that declared/defined symbols are in the same compilation unit and can
1172 // be reached through PC-relative jumps.
1175 // This may be an unsafe assumption for JIT and really large compilation
1177 if (GV->isDeclaration()) {
1178 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1180 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1183 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1185 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1187 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1188 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1189 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1190 // If this is an absolute destination address that appears to be a legal
1191 // local store address, use the munged value.
1192 Callee = SDValue(Dest, 0);
1195 Ops.push_back(Chain);
1196 Ops.push_back(Callee);
1198 // Add argument registers to the end of the list so that they are known live
1200 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1201 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1202 RegsToPass[i].second.getValueType()));
1204 if (InFlag.getNode())
1205 Ops.push_back(InFlag);
1206 // Returns a chain and a flag for retval copy to use.
1207 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1208 &Ops[0], Ops.size());
1209 InFlag = Chain.getValue(1);
1211 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1212 DAG.getIntPtrConstant(0, true), InFlag);
1213 if (TheCall->getValueType(0) != MVT::Other)
1214 InFlag = Chain.getValue(1);
1216 SDValue ResultVals[3];
1217 unsigned NumResults = 0;
1219 // If the call has results, copy the values out of the ret val registers.
1220 switch (TheCall->getValueType(0).getSimpleVT()) {
1221 default: assert(0 && "Unexpected ret value!");
1222 case MVT::Other: break;
1224 if (TheCall->getValueType(1) == MVT::i32) {
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1226 ResultVals[0] = Chain.getValue(0);
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1228 Chain.getValue(2)).getValue(1);
1229 ResultVals[1] = Chain.getValue(0);
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1244 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1245 InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1255 InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1261 // If the function returns void, just return the chain.
1262 if (NumResults == 0)
1265 // Otherwise, merge everything together with a MERGE_VALUES node.
1266 ResultVals[NumResults++] = Chain;
1267 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1268 return Res.getValue(Op.getResNo());
1272 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1273 SmallVector<CCValAssign, 16> RVLocs;
1274 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1275 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1276 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1277 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1279 // If this is the first return lowered for this function, add the regs to the
1280 // liveout set for the function.
1281 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1282 for (unsigned i = 0; i != RVLocs.size(); ++i)
1283 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1286 SDValue Chain = Op.getOperand(0);
1289 // Copy the result values into the output registers.
1290 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1291 CCValAssign &VA = RVLocs[i];
1292 assert(VA.isRegLoc() && "Can only return in registers!");
1293 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1294 Flag = Chain.getValue(1);
1298 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1300 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1304 //===----------------------------------------------------------------------===//
1305 // Vector related lowering:
1306 //===----------------------------------------------------------------------===//
1308 static ConstantSDNode *
1309 getVecImm(SDNode *N) {
1310 SDValue OpVal(0, 0);
1312 // Check to see if this buildvec has a single non-undef value in its elements.
1313 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1314 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1315 if (OpVal.getNode() == 0)
1316 OpVal = N->getOperand(i);
1317 else if (OpVal != N->getOperand(i))
1321 if (OpVal.getNode() != 0) {
1322 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1327 return 0; // All UNDEF: use implicit def.; not Constant node
1330 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1331 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1333 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1335 if (ConstantSDNode *CN = getVecImm(N)) {
1336 uint64_t Value = CN->getZExtValue();
1337 if (ValueType == MVT::i64) {
1338 uint64_t UValue = CN->getZExtValue();
1339 uint32_t upper = uint32_t(UValue >> 32);
1340 uint32_t lower = uint32_t(UValue);
1343 Value = Value >> 32;
1345 if (Value <= 0x3ffff)
1346 return DAG.getTargetConstant(Value, ValueType);
1352 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1353 /// and the value fits into a signed 16-bit constant, and if so, return the
1355 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357 if (ConstantSDNode *CN = getVecImm(N)) {
1358 int64_t Value = CN->getSExtValue();
1359 if (ValueType == MVT::i64) {
1360 uint64_t UValue = CN->getZExtValue();
1361 uint32_t upper = uint32_t(UValue >> 32);
1362 uint32_t lower = uint32_t(UValue);
1365 Value = Value >> 32;
1367 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1368 return DAG.getTargetConstant(Value, ValueType);
1375 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1376 /// and the value fits into a signed 10-bit constant, and if so, return the
1378 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1380 if (ConstantSDNode *CN = getVecImm(N)) {
1381 int64_t Value = CN->getSExtValue();
1382 if (ValueType == MVT::i64) {
1383 uint64_t UValue = CN->getZExtValue();
1384 uint32_t upper = uint32_t(UValue >> 32);
1385 uint32_t lower = uint32_t(UValue);
1388 Value = Value >> 32;
1390 if (isS10Constant(Value))
1391 return DAG.getTargetConstant(Value, ValueType);
1397 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1398 /// and the value fits into a signed 8-bit constant, and if so, return the
1401 /// @note: The incoming vector is v16i8 because that's the only way we can load
1402 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1404 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406 if (ConstantSDNode *CN = getVecImm(N)) {
1407 int Value = (int) CN->getZExtValue();
1408 if (ValueType == MVT::i16
1409 && Value <= 0xffff /* truncated from uint64_t */
1410 && ((short) Value >> 8) == ((short) Value & 0xff))
1411 return DAG.getTargetConstant(Value & 0xff, ValueType);
1412 else if (ValueType == MVT::i8
1413 && (Value & 0xff) == Value)
1414 return DAG.getTargetConstant(Value, ValueType);
1420 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1421 /// and the value fits into a signed 16-bit constant, and if so, return the
1423 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425 if (ConstantSDNode *CN = getVecImm(N)) {
1426 uint64_t Value = CN->getZExtValue();
1427 if ((ValueType == MVT::i32
1428 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1429 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1430 return DAG.getTargetConstant(Value >> 16, ValueType);
1436 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1437 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1438 if (ConstantSDNode *CN = getVecImm(N)) {
1439 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1445 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1446 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1454 // If this is a vector of constants or undefs, get the bits. A bit in
1455 // UndefBits is set if the corresponding element of the vector is an
1456 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1457 // zero. Return true if this is not an array of constants, false if it is.
1459 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1460 uint64_t UndefBits[2]) {
1461 // Start with zero'd results.
1462 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1464 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1465 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1466 SDValue OpVal = BV->getOperand(i);
1468 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1469 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1471 uint64_t EltBits = 0;
1472 if (OpVal.getOpcode() == ISD::UNDEF) {
1473 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1474 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1476 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1477 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1478 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1479 const APFloat &apf = CN->getValueAPF();
1480 EltBits = (CN->getValueType(0) == MVT::f32
1481 ? FloatToBits(apf.convertToFloat())
1482 : DoubleToBits(apf.convertToDouble()));
1484 // Nonconstant element.
1488 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1491 //printf("%llx %llx %llx %llx\n",
1492 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1496 /// If this is a splat (repetition) of a value across the whole vector, return
1497 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1498 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1499 /// SplatSize = 1 byte.
1500 static bool isConstantSplat(const uint64_t Bits128[2],
1501 const uint64_t Undef128[2],
1503 uint64_t &SplatBits, uint64_t &SplatUndef,
1505 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1506 // the same as the lower 64-bits, ignoring undefs.
1507 uint64_t Bits64 = Bits128[0] | Bits128[1];
1508 uint64_t Undef64 = Undef128[0] & Undef128[1];
1509 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1510 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1511 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1512 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1514 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1515 if (MinSplatBits < 64) {
1517 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1519 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1520 if (MinSplatBits < 32) {
1522 // If the top 16-bits are different than the lower 16-bits, ignoring
1523 // undefs, we have an i32 splat.
1524 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1525 if (MinSplatBits < 16) {
1526 // If the top 8-bits are different than the lower 8-bits, ignoring
1527 // undefs, we have an i16 splat.
1528 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1529 == ((Bits16 >> 8) & ~Undef16)) {
1530 // Otherwise, we have an 8-bit splat.
1531 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1532 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1538 SplatUndef = Undef16;
1545 SplatUndef = Undef32;
1551 SplatBits = Bits128[0];
1552 SplatUndef = Undef128[0];
1558 return false; // Can't be a splat if two pieces don't match.
1561 // If this is a case we can't handle, return null and let the default
1562 // expansion code take care of it. If we CAN select this case, and if it
1563 // selects to a single instruction, return Op. Otherwise, if we can codegen
1564 // this case more efficiently than a constant pool load, lower it to the
1565 // sequence of ops that should be used.
1566 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1567 MVT VT = Op.getValueType();
1568 // If this is a vector of constants or undefs, get the bits. A bit in
1569 // UndefBits is set if the corresponding element of the vector is an
1570 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1572 uint64_t VectorBits[2];
1573 uint64_t UndefBits[2];
1574 uint64_t SplatBits, SplatUndef;
1576 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1577 || !isConstantSplat(VectorBits, UndefBits,
1578 VT.getVectorElementType().getSizeInBits(),
1579 SplatBits, SplatUndef, SplatSize))
1580 return SDValue(); // Not a constant vector, not a splat.
1582 switch (VT.getSimpleVT()) {
1585 uint32_t Value32 = SplatBits;
1586 assert(SplatSize == 4
1587 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589 SDValue T = DAG.getConstant(Value32, MVT::i32);
1590 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1595 uint64_t f64val = SplatBits;
1596 assert(SplatSize == 8
1597 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDValue T = DAG.getConstant(f64val, MVT::i64);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1605 // 8-bit constants have to be expanded to 16-bits
1606 unsigned short Value16 = SplatBits | (SplatBits << 8);
1608 for (int i = 0; i < 8; ++i)
1609 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610 return DAG.getNode(ISD::BIT_CONVERT, VT,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1614 unsigned short Value16;
1616 Value16 = (unsigned short) (SplatBits & 0xffff);
1618 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1621 for (int i = 0; i < 8; ++i) Ops[i] = T;
1622 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1625 unsigned int Value = SplatBits;
1626 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1627 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1630 uint64_t val = SplatBits;
1631 uint32_t upper = uint32_t(val >> 32);
1632 uint32_t lower = uint32_t(val);
1634 if (upper == lower) {
1635 // Magic constant that can be matched by IL, ILA, et. al.
1636 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1637 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1641 SmallVector<SDValue, 16> ShufBytes;
1643 bool upper_special, lower_special;
1645 // NOTE: This code creates common-case shuffle masks that can be easily
1646 // detected as common expressions. It is not attempting to create highly
1647 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1649 // Detect if the upper or lower half is a special shuffle mask pattern:
1650 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1651 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1653 // Create lower vector if not a special pattern
1654 if (!lower_special) {
1655 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1656 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1658 LO32C, LO32C, LO32C, LO32C));
1661 // Create upper vector if not a special pattern
1662 if (!upper_special) {
1663 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1664 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1665 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1666 HI32C, HI32C, HI32C, HI32C));
1669 // If either upper or lower are special, then the two input operands are
1670 // the same (basically, one of them is a "don't care")
1675 if (lower_special && upper_special) {
1676 // Unhappy situation... both upper and lower are special, so punt with
1677 // a target constant:
1678 SDValue Zero = DAG.getConstant(0, MVT::i32);
1679 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1683 for (int i = 0; i < 4; ++i) {
1685 for (int j = 0; j < 4; ++j) {
1687 bool process_upper, process_lower;
1689 process_upper = (upper_special && (i & 1) == 0);
1690 process_lower = (lower_special && (i & 1) == 1);
1692 if (process_upper || process_lower) {
1693 if ((process_upper && upper == 0)
1694 || (process_lower && lower == 0))
1696 else if ((process_upper && upper == 0xffffffff)
1697 || (process_lower && lower == 0xffffffff))
1699 else if ((process_upper && upper == 0x80000000)
1700 || (process_lower && lower == 0x80000000))
1701 val |= (j == 0 ? 0xe0 : 0x80);
1703 val |= i * 4 + j + ((i & 1) * 16);
1706 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1709 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1710 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1711 &ShufBytes[0], ShufBytes.size()));
1719 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1720 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1721 /// permutation vector, V3, is monotonically increasing with one "exception"
1722 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1723 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1724 /// In either case, the net result is going to eventually invoke SHUFB to
1725 /// permute/shuffle the bytes from V1 and V2.
1727 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1728 /// control word for byte/halfword/word insertion. This takes care of a single
1729 /// element move from V2 into V1.
1731 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1732 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1733 SDValue V1 = Op.getOperand(0);
1734 SDValue V2 = Op.getOperand(1);
1735 SDValue PermMask = Op.getOperand(2);
1737 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1739 // If we have a single element being moved from V1 to V2, this can be handled
1740 // using the C*[DX] compute mask instructions, but the vector elements have
1741 // to be monotonically increasing with one exception element.
1742 MVT EltVT = V1.getValueType().getVectorElementType();
1743 unsigned EltsFromV2 = 0;
1745 unsigned V2EltIdx0 = 0;
1746 unsigned CurrElt = 0;
1747 bool monotonic = true;
1748 if (EltVT == MVT::i8)
1750 else if (EltVT == MVT::i16)
1752 else if (EltVT == MVT::i32)
1755 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1757 for (unsigned i = 0, e = PermMask.getNumOperands();
1758 EltsFromV2 <= 1 && monotonic && i != e;
1761 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1764 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1766 if (SrcElt >= V2EltIdx0) {
1768 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1769 } else if (CurrElt != SrcElt) {
1776 if (EltsFromV2 == 1 && monotonic) {
1777 // Compute mask and shuffle
1778 MachineFunction &MF = DAG.getMachineFunction();
1779 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1780 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1781 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1782 // Initialize temporary register to 0
1783 SDValue InitTempReg =
1784 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1785 // Copy register's contents as index in INSERT_MASK:
1786 SDValue ShufMaskOp =
1787 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1788 DAG.getTargetConstant(V2Elt, MVT::i32),
1789 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1790 // Use shuffle mask in SHUFB synthetic instruction:
1791 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1793 // Convert the SHUFFLE_VECTOR mask's input element units to the
1795 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1797 SmallVector<SDValue, 16> ResultMask;
1798 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1800 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1803 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1805 for (unsigned j = 0; j < BytesPerElement; ++j) {
1806 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1811 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1812 &ResultMask[0], ResultMask.size());
1813 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1817 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1818 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1820 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1821 // For a constant, build the appropriate constant vector, which will
1822 // eventually simplify to a vector register load.
1824 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1825 SmallVector<SDValue, 16> ConstVecValues;
1829 // Create a constant vector:
1830 switch (Op.getValueType().getSimpleVT()) {
1831 default: assert(0 && "Unexpected constant value type in "
1832 "LowerSCALAR_TO_VECTOR");
1833 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1834 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1835 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1836 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1837 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1838 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1841 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1842 for (size_t j = 0; j < n_copies; ++j)
1843 ConstVecValues.push_back(CValue);
1845 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1846 &ConstVecValues[0], ConstVecValues.size());
1848 // Otherwise, copy the value from one register to another:
1849 switch (Op0.getValueType().getSimpleVT()) {
1850 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1857 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1864 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1865 switch (Op.getValueType().getSimpleVT()) {
1867 cerr << "CellSPU: Unknown vector multiplication, got "
1868 << Op.getValueType().getMVTString()
1874 SDValue rA = Op.getOperand(0);
1875 SDValue rB = Op.getOperand(1);
1876 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1877 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1878 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1879 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1881 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1885 // Multiply two v8i16 vectors (pipeline friendly version):
1886 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1887 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1888 // c) Use SELB to select upper and lower halves from the intermediate results
1890 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1891 // dual-issue. This code does manage to do this, even if it's a little on
1894 MachineFunction &MF = DAG.getMachineFunction();
1895 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1896 SDValue Chain = Op.getOperand(0);
1897 SDValue rA = Op.getOperand(0);
1898 SDValue rB = Op.getOperand(1);
1899 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1900 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1903 DAG.getCopyToReg(Chain, FSMBIreg,
1904 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1905 DAG.getConstant(0xcccc, MVT::i16)));
1908 DAG.getCopyToReg(FSMBOp, HiProdReg,
1909 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1911 SDValue HHProd_v4i32 =
1912 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1913 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1915 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1916 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1917 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1918 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1920 DAG.getConstant(16, MVT::i16))),
1921 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1924 // This M00sE is N@stI! (apologies to Monty Python)
1926 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1927 // is to break it all apart, sign extend, and reassemble the various
1928 // intermediate products.
1930 SDValue rA = Op.getOperand(0);
1931 SDValue rB = Op.getOperand(1);
1932 SDValue c8 = DAG.getConstant(8, MVT::i32);
1933 SDValue c16 = DAG.getConstant(16, MVT::i32);
1936 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1937 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1938 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1940 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1942 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1945 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1946 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1948 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1949 DAG.getConstant(0x2222, MVT::i16));
1951 SDValue LoProdParts =
1952 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1953 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1954 LLProd, LHProd, FSMBmask));
1956 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1959 DAG.getNode(ISD::AND, MVT::v4i32,
1961 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1962 LoProdMask, LoProdMask,
1963 LoProdMask, LoProdMask));
1966 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1967 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1970 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1971 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1974 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1976 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1979 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1981 DAG.getNode(SPUISD::VEC_SRA,
1982 MVT::v4i32, rAH, c8)),
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1984 DAG.getNode(SPUISD::VEC_SRA,
1985 MVT::v4i32, rBH, c8)));
1988 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1994 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1996 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1997 DAG.getNode(ISD::OR, MVT::v4i32,
2005 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2006 MachineFunction &MF = DAG.getMachineFunction();
2007 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2009 SDValue A = Op.getOperand(0);
2010 SDValue B = Op.getOperand(1);
2011 MVT VT = Op.getValueType();
2013 unsigned VRegBR, VRegC;
2015 if (VT == MVT::f32) {
2016 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2017 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2019 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2020 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2022 // TODO: make sure we're feeding FPInterp the right arguments
2023 // Right now: fi B, frest(B)
2026 // (Floating Interpolate (FP Reciprocal Estimate B))
2028 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2029 DAG.getNode(SPUISD::FPInterp, VT, B,
2030 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2032 // Computes A * BRcpl and stores in a temporary register
2034 DAG.getCopyToReg(BRcpl, VRegC,
2035 DAG.getNode(ISD::FMUL, VT, A,
2036 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2037 // What's the Chain variable do? It's magic!
2038 // TODO: set Chain = Op(0).getEntryNode()
2040 return DAG.getNode(ISD::FADD, VT,
2041 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2042 DAG.getNode(ISD::FMUL, VT,
2043 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2044 DAG.getNode(ISD::FSUB, VT, A,
2045 DAG.getNode(ISD::FMUL, VT, B,
2046 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2049 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2050 MVT VT = Op.getValueType();
2051 SDValue N = Op.getOperand(0);
2052 SDValue Elt = Op.getOperand(1);
2053 SDValue ShufMask[16];
2054 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2056 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2058 int EltNo = (int) C->getZExtValue();
2061 if (VT == MVT::i8 && EltNo >= 16)
2062 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2063 else if (VT == MVT::i16 && EltNo >= 8)
2064 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2065 else if (VT == MVT::i32 && EltNo >= 4)
2066 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2067 else if (VT == MVT::i64 && EltNo >= 2)
2068 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2070 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2071 // i32 and i64: Element 0 is the preferred slot
2072 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2075 // Need to generate shuffle mask and extract:
2076 int prefslot_begin = -1, prefslot_end = -1;
2077 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2079 switch (VT.getSimpleVT()) {
2081 assert(false && "Invalid value type!");
2083 prefslot_begin = prefslot_end = 3;
2087 prefslot_begin = 2; prefslot_end = 3;
2092 prefslot_begin = 0; prefslot_end = 3;
2097 prefslot_begin = 0; prefslot_end = 7;
2102 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2103 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2105 for (int i = 0; i < 16; ++i) {
2106 // zero fill uppper part of preferred slot, don't care about the
2108 unsigned int mask_val;
2109 if (i <= prefslot_end) {
2111 ((i < prefslot_begin)
2113 : elt_byte + (i - prefslot_begin));
2115 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2117 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2120 SDValue ShufMaskVec =
2121 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2123 sizeof(ShufMask) / sizeof(ShufMask[0]));
2125 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2126 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2127 N, N, ShufMaskVec));
2131 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2132 SDValue VecOp = Op.getOperand(0);
2133 SDValue ValOp = Op.getOperand(1);
2134 SDValue IdxOp = Op.getOperand(2);
2135 MVT VT = Op.getValueType();
2137 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2138 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2140 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2141 // Use $2 because it's always 16-byte aligned and it's available:
2142 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2145 DAG.getNode(SPUISD::SHUFB, VT,
2146 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2148 DAG.getNode(SPUISD::INSERT_MASK, VT,
2149 DAG.getNode(ISD::ADD, PtrVT,
2151 DAG.getConstant(CN->getZExtValue(),
2157 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2159 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2161 assert(Op.getValueType() == MVT::i8);
2164 assert(0 && "Unhandled i8 math operator");
2168 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2170 SDValue N1 = Op.getOperand(1);
2171 N0 = (N0.getOpcode() != ISD::Constant
2172 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2173 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2175 N1 = (N1.getOpcode() != ISD::Constant
2176 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2177 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2179 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2180 DAG.getNode(Opc, MVT::i16, N0, N1));
2184 SDValue N1 = Op.getOperand(1);
2186 N0 = (N0.getOpcode() != ISD::Constant
2187 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2188 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2190 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2193 N1 = (N1.getOpcode() != ISD::Constant
2194 ? DAG.getNode(N1Opc, MVT::i32, N1)
2195 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2198 DAG.getNode(ISD::OR, MVT::i16, N0,
2199 DAG.getNode(ISD::SHL, MVT::i16,
2200 N0, DAG.getConstant(8, MVT::i32)));
2201 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2206 SDValue N1 = Op.getOperand(1);
2208 N0 = (N0.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2212 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2215 N1 = (N1.getOpcode() != ISD::Constant
2216 ? DAG.getNode(N1Opc, MVT::i16, N1)
2217 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2219 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2220 DAG.getNode(Opc, MVT::i16, N0, N1));
2223 SDValue N1 = Op.getOperand(1);
2225 N0 = (N0.getOpcode() != ISD::Constant
2226 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2227 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2229 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2232 N1 = (N1.getOpcode() != ISD::Constant
2233 ? DAG.getNode(N1Opc, MVT::i16, N1)
2234 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, N0, N1));
2240 SDValue N1 = Op.getOperand(1);
2242 N0 = (N0.getOpcode() != ISD::Constant
2243 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2244 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2246 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2247 N1 = (N1.getOpcode() != ISD::Constant
2248 ? DAG.getNode(N1Opc, MVT::i16, N1)
2249 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2251 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2252 DAG.getNode(Opc, MVT::i16, N0, N1));
2260 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2262 MVT VT = Op.getValueType();
2263 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2265 SDValue Op0 = Op.getOperand(0);
2268 case ISD::ZERO_EXTEND:
2269 case ISD::SIGN_EXTEND:
2270 case ISD::ANY_EXTEND: {
2271 MVT Op0VT = Op0.getValueType();
2272 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2274 assert(Op0VT == MVT::i32
2275 && "CellSPU: Zero/sign extending something other than i32");
2276 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2278 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2279 ? SPUISD::ROTBYTES_RIGHT_S
2280 : SPUISD::ROTQUAD_RZ_BYTES);
2281 SDValue PromoteScalar =
2282 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2284 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2285 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2286 DAG.getNode(NewOpc, Op0VecVT,
2288 DAG.getConstant(4, MVT::i32))));
2292 // Turn operands into vectors to satisfy type checking (shufb works on
2295 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2297 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2298 SmallVector<SDValue, 16> ShufBytes;
2300 // Create the shuffle mask for "rotating" the borrow up one register slot
2301 // once the borrow is generated.
2302 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2303 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2304 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2305 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2308 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2309 SDValue ShiftedCarry =
2310 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2312 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2313 &ShufBytes[0], ShufBytes.size()));
2315 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2316 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2317 Op0, Op1, ShiftedCarry));
2321 // Turn operands into vectors to satisfy type checking (shufb works on
2324 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2326 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2327 SmallVector<SDValue, 16> ShufBytes;
2329 // Create the shuffle mask for "rotating" the borrow up one register slot
2330 // once the borrow is generated.
2331 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2332 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2333 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2334 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2337 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2338 SDValue ShiftedBorrow =
2339 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2340 BorrowGen, BorrowGen,
2341 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2342 &ShufBytes[0], ShufBytes.size()));
2344 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2345 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2346 Op0, Op1, ShiftedBorrow));
2350 SDValue ShiftAmt = Op.getOperand(1);
2351 MVT ShiftAmtVT = ShiftAmt.getValueType();
2352 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2354 DAG.getNode(SPUISD::SELB, VecVT,
2356 DAG.getConstant(0, VecVT),
2357 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2358 DAG.getConstant(0xff00ULL, MVT::i16)));
2359 SDValue ShiftAmtBytes =
2360 DAG.getNode(ISD::SRL, ShiftAmtVT,
2362 DAG.getConstant(3, ShiftAmtVT));
2363 SDValue ShiftAmtBits =
2364 DAG.getNode(ISD::AND, ShiftAmtVT,
2366 DAG.getConstant(7, ShiftAmtVT));
2368 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2369 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2370 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2371 MaskLower, ShiftAmtBytes),
2376 MVT VT = Op.getValueType();
2377 SDValue ShiftAmt = Op.getOperand(1);
2378 MVT ShiftAmtVT = ShiftAmt.getValueType();
2379 SDValue ShiftAmtBytes =
2380 DAG.getNode(ISD::SRL, ShiftAmtVT,
2382 DAG.getConstant(3, ShiftAmtVT));
2383 SDValue ShiftAmtBits =
2384 DAG.getNode(ISD::AND, ShiftAmtVT,
2386 DAG.getConstant(7, ShiftAmtVT));
2388 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2389 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2390 Op0, ShiftAmtBytes),
2395 // Promote Op0 to vector
2397 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2398 SDValue ShiftAmt = Op.getOperand(1);
2399 MVT ShiftVT = ShiftAmt.getValueType();
2401 // Negate variable shift amounts
2402 if (!isa<ConstantSDNode>(ShiftAmt)) {
2403 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2404 DAG.getConstant(0, ShiftVT), ShiftAmt);
2407 SDValue UpperHalfSign =
2408 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2409 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2410 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2411 Op0, DAG.getConstant(31, MVT::i32))));
2412 SDValue UpperHalfSignMask =
2413 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2414 SDValue UpperLowerMask =
2415 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2416 DAG.getConstant(0xff00, MVT::i16));
2417 SDValue UpperLowerSelect =
2418 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2419 UpperHalfSignMask, Op0, UpperLowerMask);
2420 SDValue RotateLeftBytes =
2421 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2422 UpperLowerSelect, ShiftAmt);
2423 SDValue RotateLeftBits =
2424 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2425 RotateLeftBytes, ShiftAmt);
2427 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2435 //! Lower byte immediate operations for v16i8 vectors:
2437 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2440 MVT VT = Op.getValueType();
2442 ConstVec = Op.getOperand(0);
2443 Arg = Op.getOperand(1);
2444 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2445 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2446 ConstVec = ConstVec.getOperand(0);
2448 ConstVec = Op.getOperand(1);
2449 Arg = Op.getOperand(0);
2450 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2451 ConstVec = ConstVec.getOperand(0);
2456 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2457 uint64_t VectorBits[2];
2458 uint64_t UndefBits[2];
2459 uint64_t SplatBits, SplatUndef;
2462 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2463 && isConstantSplat(VectorBits, UndefBits,
2464 VT.getVectorElementType().getSizeInBits(),
2465 SplatBits, SplatUndef, SplatSize)) {
2467 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2468 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2470 // Turn the BUILD_VECTOR into a set of target constants:
2471 for (size_t i = 0; i < tcVecSize; ++i)
2474 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2475 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2478 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2479 // lowered. Return the operation, rather than a null SDValue.
2483 //! Lower i32 multiplication
2484 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2486 switch (VT.getSimpleVT()) {
2488 cerr << "CellSPU: Unknown LowerMUL value type, got "
2489 << Op.getValueType().getMVTString()
2495 SDValue rA = Op.getOperand(0);
2496 SDValue rB = Op.getOperand(1);
2498 return DAG.getNode(ISD::ADD, MVT::i32,
2499 DAG.getNode(ISD::ADD, MVT::i32,
2500 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2501 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2502 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2509 //! Custom lowering for CTPOP (count population)
2511 Custom lowering code that counts the number ones in the input
2512 operand. SPU has such an instruction, but it counts the number of
2513 ones per byte, which then have to be accumulated.
2515 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2516 MVT VT = Op.getValueType();
2517 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2519 switch (VT.getSimpleVT()) {
2521 assert(false && "Invalid value type!");
2523 SDValue N = Op.getOperand(0);
2524 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2526 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2527 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2529 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2533 MachineFunction &MF = DAG.getMachineFunction();
2534 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2536 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2538 SDValue N = Op.getOperand(0);
2539 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2540 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2541 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2543 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2544 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2546 // CNTB_result becomes the chain to which all of the virtual registers
2547 // CNTB_reg, SUM1_reg become associated:
2548 SDValue CNTB_result =
2549 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2551 SDValue CNTB_rescopy =
2552 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2554 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2556 return DAG.getNode(ISD::AND, MVT::i16,
2557 DAG.getNode(ISD::ADD, MVT::i16,
2558 DAG.getNode(ISD::SRL, MVT::i16,
2565 MachineFunction &MF = DAG.getMachineFunction();
2566 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2568 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2569 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2571 SDValue N = Op.getOperand(0);
2572 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2573 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2574 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2575 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2577 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2578 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2580 // CNTB_result becomes the chain to which all of the virtual registers
2581 // CNTB_reg, SUM1_reg become associated:
2582 SDValue CNTB_result =
2583 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2585 SDValue CNTB_rescopy =
2586 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2589 DAG.getNode(ISD::SRL, MVT::i32,
2590 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2593 DAG.getNode(ISD::ADD, MVT::i32,
2594 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2596 SDValue Sum1_rescopy =
2597 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2600 DAG.getNode(ISD::SRL, MVT::i32,
2601 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2604 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2605 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2607 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2617 /// LowerOperation - Provide custom lowering hooks for some operations.
2620 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2622 unsigned Opc = (unsigned) Op.getOpcode();
2623 MVT VT = Op.getValueType();
2627 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2628 cerr << "Op.getOpcode() = " << Opc << "\n";
2629 cerr << "*Op.getNode():\n";
2630 Op.getNode()->dump();
2636 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2638 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2639 case ISD::ConstantPool:
2640 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2641 case ISD::GlobalAddress:
2642 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2643 case ISD::JumpTable:
2644 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2646 return LowerConstant(Op, DAG);
2647 case ISD::ConstantFP:
2648 return LowerConstantFP(Op, DAG);
2650 return LowerBRCOND(Op, DAG);
2651 case ISD::FORMAL_ARGUMENTS:
2652 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2654 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2656 return LowerRET(Op, DAG, getTargetMachine());
2659 // i8, i64 math ops:
2660 case ISD::ZERO_EXTEND:
2661 case ISD::SIGN_EXTEND:
2662 case ISD::ANY_EXTEND:
2671 return LowerI8Math(Op, DAG, Opc);
2672 else if (VT == MVT::i64)
2673 return LowerI64Math(Op, DAG, Opc);
2677 // Vector-related lowering.
2678 case ISD::BUILD_VECTOR:
2679 return LowerBUILD_VECTOR(Op, DAG);
2680 case ISD::SCALAR_TO_VECTOR:
2681 return LowerSCALAR_TO_VECTOR(Op, DAG);
2682 case ISD::VECTOR_SHUFFLE:
2683 return LowerVECTOR_SHUFFLE(Op, DAG);
2684 case ISD::EXTRACT_VECTOR_ELT:
2685 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2686 case ISD::INSERT_VECTOR_ELT:
2687 return LowerINSERT_VECTOR_ELT(Op, DAG);
2689 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2693 return LowerByteImmed(Op, DAG);
2695 // Vector and i8 multiply:
2698 return LowerVectorMUL(Op, DAG);
2699 else if (VT == MVT::i8)
2700 return LowerI8Math(Op, DAG, Opc);
2702 return LowerMUL(Op, DAG, VT, Opc);
2705 if (VT == MVT::f32 || VT == MVT::v4f32)
2706 return LowerFDIVf32(Op, DAG);
2707 // else if (Op.getValueType() == MVT::f64)
2708 // return LowerFDIVf64(Op, DAG);
2710 assert(0 && "Calling FDIV on unsupported MVT");
2713 return LowerCTPOP(Op, DAG);
2719 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2722 unsigned Opc = (unsigned) N->getOpcode();
2723 MVT OpVT = N->getValueType(0);
2727 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2728 cerr << "Op.getOpcode() = " << Opc << "\n";
2729 cerr << "*Op.getNode():\n";
2737 /* Otherwise, return unchanged */
2741 //===----------------------------------------------------------------------===//
2742 // Target Optimization Hooks
2743 //===----------------------------------------------------------------------===//
2746 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2749 TargetMachine &TM = getTargetMachine();
2751 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2752 SelectionDAG &DAG = DCI.DAG;
2753 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2754 SDValue Result; // Initially, NULL result
2756 switch (N->getOpcode()) {
2759 SDValue Op1 = N->getOperand(1);
2761 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2762 SDValue Op01 = Op0.getOperand(1);
2763 if (Op01.getOpcode() == ISD::Constant
2764 || Op01.getOpcode() == ISD::TargetConstant) {
2765 // (add <const>, (SPUindirect <arg>, <const>)) ->
2766 // (SPUindirect <arg>, <const + const>)
2767 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2768 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2769 SDValue combinedConst =
2770 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2771 Op0.getValueType());
2773 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2774 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2775 DEBUG(cerr << "With: (SPUindirect <arg>, "
2776 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2777 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2778 Op0.getOperand(0), combinedConst);
2780 } else if (isa<ConstantSDNode>(Op0)
2781 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2782 SDValue Op11 = Op1.getOperand(1);
2783 if (Op11.getOpcode() == ISD::Constant
2784 || Op11.getOpcode() == ISD::TargetConstant) {
2785 // (add (SPUindirect <arg>, <const>), <const>) ->
2786 // (SPUindirect <arg>, <const + const>)
2787 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2788 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2789 SDValue combinedConst =
2790 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2791 Op0.getValueType());
2793 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2794 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2795 DEBUG(cerr << "With: (SPUindirect <arg>, "
2796 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2798 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2799 Op1.getOperand(0), combinedConst);
2804 case ISD::SIGN_EXTEND:
2805 case ISD::ZERO_EXTEND:
2806 case ISD::ANY_EXTEND: {
2807 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2808 N->getValueType(0) == Op0.getValueType()) {
2809 // (any_extend (SPUextract_elt0 <arg>)) ->
2810 // (SPUextract_elt0 <arg>)
2811 // Types must match, however...
2812 DEBUG(cerr << "Replace: ");
2813 DEBUG(N->dump(&DAG));
2814 DEBUG(cerr << "\nWith: ");
2815 DEBUG(Op0.getNode()->dump(&DAG));
2816 DEBUG(cerr << "\n");
2822 case SPUISD::IndirectAddr: {
2823 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2824 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2825 if (CN->getZExtValue() == 0) {
2826 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2827 // (SPUaform <addr>, 0)
2829 DEBUG(cerr << "Replace: ");
2830 DEBUG(N->dump(&DAG));
2831 DEBUG(cerr << "\nWith: ");
2832 DEBUG(Op0.getNode()->dump(&DAG));
2833 DEBUG(cerr << "\n");
2840 case SPUISD::SHLQUAD_L_BITS:
2841 case SPUISD::SHLQUAD_L_BYTES:
2842 case SPUISD::VEC_SHL:
2843 case SPUISD::VEC_SRL:
2844 case SPUISD::VEC_SRA:
2845 case SPUISD::ROTQUAD_RZ_BYTES:
2846 case SPUISD::ROTQUAD_RZ_BITS: {
2847 SDValue Op1 = N->getOperand(1);
2849 if (isa<ConstantSDNode>(Op1)) {
2850 // Kill degenerate vector shifts:
2851 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2853 if (CN->getZExtValue() == 0) {
2859 case SPUISD::PROMOTE_SCALAR: {
2860 switch (Op0.getOpcode()) {
2863 case ISD::ANY_EXTEND:
2864 case ISD::ZERO_EXTEND:
2865 case ISD::SIGN_EXTEND: {
2866 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2868 // but only if the SPUpromote_scalar and <arg> types match.
2869 SDValue Op00 = Op0.getOperand(0);
2870 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2871 SDValue Op000 = Op00.getOperand(0);
2872 if (Op000.getValueType() == N->getValueType(0)) {
2878 case SPUISD::EXTRACT_ELT0: {
2879 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2881 Result = Op0.getOperand(0);
2888 // Otherwise, return unchanged.
2890 if (Result.getNode()) {
2891 DEBUG(cerr << "\nReplace.SPU: ");
2892 DEBUG(N->dump(&DAG));
2893 DEBUG(cerr << "\nWith: ");
2894 DEBUG(Result.getNode()->dump(&DAG));
2895 DEBUG(cerr << "\n");
2902 //===----------------------------------------------------------------------===//
2903 // Inline Assembly Support
2904 //===----------------------------------------------------------------------===//
2906 /// getConstraintType - Given a constraint letter, return the type of
2907 /// constraint it is for this target.
2908 SPUTargetLowering::ConstraintType
2909 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2910 if (ConstraintLetter.size() == 1) {
2911 switch (ConstraintLetter[0]) {
2918 return C_RegisterClass;
2921 return TargetLowering::getConstraintType(ConstraintLetter);
2924 std::pair<unsigned, const TargetRegisterClass*>
2925 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2928 if (Constraint.size() == 1) {
2929 // GCC RS6000 Constraint Letters
2930 switch (Constraint[0]) {
2934 return std::make_pair(0U, SPU::R64CRegisterClass);
2935 return std::make_pair(0U, SPU::R32CRegisterClass);
2938 return std::make_pair(0U, SPU::R32FPRegisterClass);
2939 else if (VT == MVT::f64)
2940 return std::make_pair(0U, SPU::R64FPRegisterClass);
2943 return std::make_pair(0U, SPU::GPRCRegisterClass);
2947 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2950 //! Compute used/known bits for a SPU operand
2952 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2956 const SelectionDAG &DAG,
2957 unsigned Depth ) const {
2959 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2962 switch (Op.getOpcode()) {
2964 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2974 case SPUISD::PROMOTE_SCALAR: {
2975 SDValue Op0 = Op.getOperand(0);
2976 MVT Op0VT = Op0.getValueType();
2977 unsigned Op0VTBits = Op0VT.getSizeInBits();
2978 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2979 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2980 KnownOne |= APInt(Op0VTBits, InMask, false);
2984 case SPUISD::LDRESULT:
2985 case SPUISD::EXTRACT_ELT0:
2986 case SPUISD::EXTRACT_ELT0_CHAINED: {
2987 MVT OpVT = Op.getValueType();
2988 unsigned OpVTBits = OpVT.getSizeInBits();
2989 uint64_t InMask = OpVT.getIntegerVTBitMask();
2990 KnownZero |= APInt(OpVTBits, ~InMask, false);
2991 KnownOne |= APInt(OpVTBits, InMask, false);
2996 case EXTRACT_I1_ZEXT:
2997 case EXTRACT_I1_SEXT:
2998 case EXTRACT_I8_ZEXT:
2999 case EXTRACT_I8_SEXT:
3004 case SPUISD::SHLQUAD_L_BITS:
3005 case SPUISD::SHLQUAD_L_BYTES:
3006 case SPUISD::VEC_SHL:
3007 case SPUISD::VEC_SRL:
3008 case SPUISD::VEC_SRA:
3009 case SPUISD::VEC_ROTL:
3010 case SPUISD::VEC_ROTR:
3011 case SPUISD::ROTQUAD_RZ_BYTES:
3012 case SPUISD::ROTQUAD_RZ_BITS:
3013 case SPUISD::ROTBYTES_RIGHT_S:
3014 case SPUISD::ROTBYTES_LEFT:
3015 case SPUISD::ROTBYTES_LEFT_CHAINED:
3016 case SPUISD::SELECT_MASK:
3018 case SPUISD::FPInterp:
3019 case SPUISD::FPRecipEst:
3020 case SPUISD::SEXT32TO64:
3025 // LowerAsmOperandForConstraint
3027 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3028 char ConstraintLetter,
3030 std::vector<SDValue> &Ops,
3031 SelectionDAG &DAG) const {
3032 // Default, for the time being, to the base class handler
3033 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3037 /// isLegalAddressImmediate - Return true if the integer value can be used
3038 /// as the offset of the target addressing mode.
3039 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3040 const Type *Ty) const {
3041 // SPU's addresses are 256K:
3042 return (V > -(1 << 18) && V < (1 << 18) - 1);
3045 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3050 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3051 // The SPU target isn't yet aware of offsets.