1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
205 setOperationAction(ISD::ROTR, MVT::i32, Legal);
206 setOperationAction(ISD::ROTR, MVT::i16, Legal);
207 setOperationAction(ISD::ROTR, MVT::i8, Custom);
208 setOperationAction(ISD::ROTL, MVT::i32, Legal);
209 setOperationAction(ISD::ROTL, MVT::i16, Legal);
210 setOperationAction(ISD::ROTL, MVT::i8, Custom);
211 // SPU has no native version of shift left/right for i8
212 setOperationAction(ISD::SHL, MVT::i8, Custom);
213 setOperationAction(ISD::SRL, MVT::i8, Custom);
214 setOperationAction(ISD::SRA, MVT::i8, Custom);
215 // And SPU needs custom lowering for shift left/right for i64
216 setOperationAction(ISD::SHL, MVT::i64, Custom);
217 setOperationAction(ISD::SRL, MVT::i64, Custom);
218 setOperationAction(ISD::SRA, MVT::i64, Custom);
220 // Custom lower i32 multiplications
221 setOperationAction(ISD::MUL, MVT::i32, Custom);
223 // Need to custom handle (some) common i8, i64 math ops
224 setOperationAction(ISD::ADD, MVT::i64, Custom);
225 setOperationAction(ISD::SUB, MVT::i8, Custom);
226 setOperationAction(ISD::SUB, MVT::i64, Custom);
227 setOperationAction(ISD::MUL, MVT::i8, Custom);
229 // SPU does not have BSWAP. It does have i32 support CTLZ.
230 // CTPOP has to be custom lowered.
231 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
232 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
234 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
235 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
239 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
240 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
242 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
244 // SPU has a version of select that implements (a&~c)|(b&c), just like
245 // select ought to work:
246 setOperationAction(ISD::SELECT, MVT::i1, Promote);
247 setOperationAction(ISD::SELECT, MVT::i8, Legal);
248 setOperationAction(ISD::SELECT, MVT::i16, Legal);
249 setOperationAction(ISD::SELECT, MVT::i32, Legal);
250 setOperationAction(ISD::SELECT, MVT::i64, Expand);
252 setOperationAction(ISD::SETCC, MVT::i1, Promote);
253 setOperationAction(ISD::SETCC, MVT::i8, Legal);
254 setOperationAction(ISD::SETCC, MVT::i16, Legal);
255 setOperationAction(ISD::SETCC, MVT::i32, Legal);
256 setOperationAction(ISD::SETCC, MVT::i64, Expand);
258 // Zero extension and sign extension for i64 have to be
260 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
261 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
262 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
264 // SPU has a legal FP -> signed INT instruction
265 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
266 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
267 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
270 // FDIV on SPU requires custom lowering
271 setOperationAction(ISD::FDIV, MVT::f32, Custom);
272 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
274 // SPU has [U|S]INT_TO_FP
275 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
276 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
277 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
278 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
280 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
282 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
284 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
285 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
287 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
289 // We cannot sextinreg(i1). Expand to shifts.
290 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
292 // Support label based line numbers.
293 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
294 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
296 // We want to legalize GlobalAddress and ConstantPool nodes into the
297 // appropriate instructions to materialize the address.
298 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
300 MVT VT = (MVT::SimpleValueType)sctype;
302 setOperationAction(ISD::GlobalAddress, VT, Custom);
303 setOperationAction(ISD::ConstantPool, VT, Custom);
304 setOperationAction(ISD::JumpTable, VT, Custom);
307 // RET must be custom lowered, to meet ABI requirements
308 setOperationAction(ISD::RET, MVT::Other, Custom);
310 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
311 setOperationAction(ISD::VASTART , MVT::Other, Custom);
313 // Use the default implementation.
314 setOperationAction(ISD::VAARG , MVT::Other, Expand);
315 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
316 setOperationAction(ISD::VAEND , MVT::Other, Expand);
317 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
318 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
319 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
320 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
322 // Cell SPU has instructions for converting between i64 and fp.
323 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
324 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
326 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
327 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
329 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
330 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
332 // First set operation action for all vector types to expand. Then we
333 // will selectively turn on ones that can be effectively codegen'd.
334 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
335 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
337 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
338 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
341 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
342 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
343 MVT VT = (MVT::SimpleValueType)i;
345 // add/sub are legal for all supported vector VT's.
346 setOperationAction(ISD::ADD , VT, Legal);
347 setOperationAction(ISD::SUB , VT, Legal);
348 // mul has to be custom lowered.
349 setOperationAction(ISD::MUL , VT, Custom);
351 setOperationAction(ISD::AND , VT, Legal);
352 setOperationAction(ISD::OR , VT, Legal);
353 setOperationAction(ISD::XOR , VT, Legal);
354 setOperationAction(ISD::LOAD , VT, Legal);
355 setOperationAction(ISD::SELECT, VT, Legal);
356 setOperationAction(ISD::STORE, VT, Legal);
358 // These operations need to be expanded:
359 setOperationAction(ISD::SDIV, VT, Expand);
360 setOperationAction(ISD::SREM, VT, Expand);
361 setOperationAction(ISD::UDIV, VT, Expand);
362 setOperationAction(ISD::UREM, VT, Expand);
363 setOperationAction(ISD::FDIV, VT, Custom);
365 // Custom lower build_vector, constant pool spills, insert and
366 // extract vector elements:
367 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
368 setOperationAction(ISD::ConstantPool, VT, Custom);
369 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
370 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
371 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
372 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
375 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
376 setOperationAction(ISD::AND, MVT::v16i8, Custom);
377 setOperationAction(ISD::OR, MVT::v16i8, Custom);
378 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
379 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
381 setShiftAmountType(MVT::i32);
382 setSetCCResultContents(ZeroOrOneSetCCResult);
384 setStackPointerRegisterToSaveRestore(SPU::R1);
386 // We have target-specific dag combine patterns for the following nodes:
387 setTargetDAGCombine(ISD::ADD);
388 setTargetDAGCombine(ISD::ZERO_EXTEND);
389 setTargetDAGCombine(ISD::SIGN_EXTEND);
390 setTargetDAGCombine(ISD::ANY_EXTEND);
392 computeRegisterProperties();
396 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
398 if (node_names.empty()) {
399 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
400 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
401 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
402 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
403 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
404 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
405 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
406 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
407 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
408 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
409 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
410 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
411 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
412 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
413 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
414 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
415 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
416 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
417 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
418 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
419 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
420 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
421 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
422 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
423 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
424 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
425 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
426 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
427 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
428 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
429 "SPUISD::ROTQUAD_RZ_BYTES";
430 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
431 "SPUISD::ROTQUAD_RZ_BITS";
432 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
433 "SPUISD::ROTBYTES_RIGHT_S";
434 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
435 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
436 "SPUISD::ROTBYTES_LEFT_CHAINED";
437 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
438 "SPUISD::ROTBYTES_LEFT_BITS";
439 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
440 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
441 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
442 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
443 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
444 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
445 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
446 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
447 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
450 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
452 return ((i != node_names.end()) ? i->second : 0);
455 MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
456 MVT VT = Op.getValueType();
463 //===----------------------------------------------------------------------===//
464 // Calling convention code:
465 //===----------------------------------------------------------------------===//
467 #include "SPUGenCallingConv.inc"
469 //===----------------------------------------------------------------------===//
470 // LowerOperation implementation
471 //===----------------------------------------------------------------------===//
473 /// Aligned load common code for CellSPU
475 \param[in] Op The SelectionDAG load or store operand
476 \param[in] DAG The selection DAG
477 \param[in] ST CellSPU subtarget information structure
478 \param[in,out] alignment Caller initializes this to the load or store node's
479 value from getAlignment(), may be updated while generating the aligned load
480 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
481 offset (divisible by 16, modulo 16 == 0)
482 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
483 offset of the preferred slot (modulo 16 != 0)
484 \param[in,out] VT Caller initializes this value type to the the load or store
485 node's loaded or stored value type; may be updated if an i1-extended load or
487 \param[out] was16aligned true if the base pointer had 16-byte alignment,
488 otherwise false. Can help to determine if the chunk needs to be rotated.
490 Both load and store lowering load a block of data aligned on a 16-byte
491 boundary. This is the common aligned load code shared between both.
494 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
496 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
497 MVT &VT, bool &was16aligned)
499 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
500 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
501 SDOperand basePtr = LSN->getBasePtr();
502 SDOperand chain = LSN->getChain();
504 if (basePtr.getOpcode() == ISD::ADD) {
505 SDOperand Op1 = basePtr.Val->getOperand(1);
507 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
508 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
510 alignOffs = (int) CN->getValue();
511 prefSlotOffs = (int) (alignOffs & 0xf);
513 // Adjust the rotation amount to ensure that the final result ends up in
514 // the preferred slot:
515 prefSlotOffs -= vtm->prefslot_byte;
516 basePtr = basePtr.getOperand(0);
518 // Loading from memory, can we adjust alignment?
519 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
520 SDOperand APtr = basePtr.getOperand(0);
521 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
522 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
523 alignment = GSDN->getGlobal()->getAlignment();
528 prefSlotOffs = -vtm->prefslot_byte;
530 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
531 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
532 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
533 prefSlotOffs = (int) (alignOffs & 0xf);
534 prefSlotOffs -= vtm->prefslot_byte;
535 basePtr = DAG.getRegister(SPU::R1, VT);
538 prefSlotOffs = -vtm->prefslot_byte;
541 if (alignment == 16) {
542 // Realign the base pointer as a D-Form address:
543 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
544 basePtr = DAG.getNode(ISD::ADD, PtrVT,
546 DAG.getConstant((alignOffs & ~0xf), PtrVT));
549 // Emit the vector load:
551 return DAG.getLoad(MVT::v16i8, chain, basePtr,
552 LSN->getSrcValue(), LSN->getSrcValueOffset(),
553 LSN->isVolatile(), 16);
556 // Unaligned load or we're using the "large memory" model, which means that
557 // we have to be very pessimistic:
558 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
559 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
563 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
564 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 was16aligned = false;
566 return DAG.getLoad(MVT::v16i8, chain, basePtr,
567 LSN->getSrcValue(), LSN->getSrcValueOffset(),
568 LSN->isVolatile(), 16);
571 /// Custom lower loads for CellSPU
573 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
574 within a 16-byte block, we have to rotate to extract the requested element.
577 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
578 LoadSDNode *LN = cast<LoadSDNode>(Op);
579 SDOperand the_chain = LN->getChain();
580 MVT VT = LN->getMemoryVT();
581 MVT OpVT = Op.Val->getValueType(0);
582 ISD::LoadExtType ExtType = LN->getExtensionType();
583 unsigned alignment = LN->getAlignment();
586 switch (LN->getAddressingMode()) {
587 case ISD::UNINDEXED: {
591 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
596 the_chain = result.getValue(1);
597 // Rotate the chunk if necessary
600 if (rotamt != 0 || !was16aligned) {
601 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
606 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
608 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
609 LoadSDNode *LN1 = cast<LoadSDNode>(result);
610 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
611 DAG.getConstant(rotamt, PtrVT));
614 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
615 the_chain = result.getValue(1);
618 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
620 MVT vecVT = MVT::v16i8;
622 // Convert the loaded v16i8 vector to the appropriate vector type
623 // specified by the operand:
626 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
628 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
631 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
632 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
633 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
634 the_chain = result.getValue(1);
636 // Handle the sign and zero-extending loads for i1 and i8:
639 if (ExtType == ISD::SEXTLOAD) {
640 NewOpC = (OpVT == MVT::i1
641 ? SPUISD::EXTRACT_I1_SEXT
642 : SPUISD::EXTRACT_I8_SEXT);
644 assert(ExtType == ISD::ZEXTLOAD);
645 NewOpC = (OpVT == MVT::i1
646 ? SPUISD::EXTRACT_I1_ZEXT
647 : SPUISD::EXTRACT_I8_ZEXT);
650 result = DAG.getNode(NewOpC, OpVT, result);
653 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
654 SDOperand retops[2] = {
659 result = DAG.getNode(SPUISD::LDRESULT, retvts,
660 retops, sizeof(retops) / sizeof(retops[0]));
667 case ISD::LAST_INDEXED_MODE:
668 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
670 cerr << (unsigned) LN->getAddressingMode() << "\n";
678 /// Custom lower stores for CellSPU
680 All CellSPU stores are aligned to 16-byte boundaries, so for elements
681 within a 16-byte block, we have to generate a shuffle to insert the
682 requested element into its place, then store the resulting block.
685 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
686 StoreSDNode *SN = cast<StoreSDNode>(Op);
687 SDOperand Value = SN->getValue();
688 MVT VT = Value.getValueType();
689 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
690 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
691 unsigned alignment = SN->getAlignment();
693 switch (SN->getAddressingMode()) {
694 case ISD::UNINDEXED: {
695 int chunk_offset, slot_offset;
698 // The vector type we really want to load from the 16-byte chunk, except
699 // in the case of MVT::i1, which has to be v16i8.
700 MVT vecVT, stVecVT = MVT::v16i8;
703 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
704 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
706 SDOperand alignLoadVec =
707 AlignedLoad(Op, DAG, ST, SN, alignment,
708 chunk_offset, slot_offset, VT, was16aligned);
710 if (alignLoadVec.Val == 0)
713 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
714 SDOperand basePtr = LN->getBasePtr();
715 SDOperand the_chain = alignLoadVec.getValue(1);
716 SDOperand theValue = SN->getValue();
720 && (theValue.getOpcode() == ISD::AssertZext
721 || theValue.getOpcode() == ISD::AssertSext)) {
722 // Drill down and get the value for zero- and sign-extended
724 theValue = theValue.getOperand(0);
729 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
730 SDOperand insertEltPtr;
731 SDOperand insertEltOp;
733 // If the base pointer is already a D-form address, then just create
734 // a new D-form address with a slot offset and the orignal base pointer.
735 // Otherwise generate a D-form address with the slot offset relative
736 // to the stack pointer, which is always aligned.
737 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
738 DEBUG(basePtr.Val->dump(&DAG));
741 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
742 (basePtr.getOpcode() == ISD::ADD
743 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
744 insertEltPtr = basePtr;
746 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
749 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
750 result = DAG.getNode(SPUISD::SHUFB, vecVT,
751 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
753 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
755 result = DAG.getStore(the_chain, result, basePtr,
756 LN->getSrcValue(), LN->getSrcValueOffset(),
757 LN->isVolatile(), LN->getAlignment());
766 case ISD::LAST_INDEXED_MODE:
767 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
769 cerr << (unsigned) SN->getAddressingMode() << "\n";
777 /// Generate the address of a constant pool entry.
779 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
780 MVT PtrVT = Op.getValueType();
781 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
782 Constant *C = CP->getConstVal();
783 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
784 SDOperand Zero = DAG.getConstant(0, PtrVT);
785 const TargetMachine &TM = DAG.getTarget();
787 if (TM.getRelocationModel() == Reloc::Static) {
788 if (!ST->usingLargeMem()) {
789 // Just return the SDOperand with the constant pool address in it.
790 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
792 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
793 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
794 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
799 "LowerConstantPool: Relocation model other than static not supported.");
804 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
805 MVT PtrVT = Op.getValueType();
806 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
807 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
808 SDOperand Zero = DAG.getConstant(0, PtrVT);
809 const TargetMachine &TM = DAG.getTarget();
811 if (TM.getRelocationModel() == Reloc::Static) {
812 if (!ST->usingLargeMem()) {
813 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
815 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
816 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
817 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
822 "LowerJumpTable: Relocation model other than static not supported.");
827 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
828 MVT PtrVT = Op.getValueType();
829 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
830 GlobalValue *GV = GSDN->getGlobal();
831 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
832 const TargetMachine &TM = DAG.getTarget();
833 SDOperand Zero = DAG.getConstant(0, PtrVT);
835 if (TM.getRelocationModel() == Reloc::Static) {
836 if (!ST->usingLargeMem()) {
837 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
839 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
840 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
841 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
844 cerr << "LowerGlobalAddress: Relocation model other than static not "
853 //! Custom lower i64 integer constants
855 This code inserts all of the necessary juggling that needs to occur to load
856 a 64-bit constant into a register.
859 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
860 MVT VT = Op.getValueType();
861 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
863 if (VT == MVT::i64) {
864 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
865 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
866 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
868 cerr << "LowerConstant: unhandled constant type "
878 //! Custom lower double precision floating point constants
880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881 MVT VT = Op.getValueType();
882 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
885 "LowerConstantFP: Node is not ConstantFPSDNode");
887 if (VT == MVT::f64) {
888 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
889 return DAG.getNode(ISD::BIT_CONVERT, VT,
890 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
896 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
898 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
900 SDOperand Cond = Op.getOperand(1);
901 MVT CondVT = Cond.getValueType();
904 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
905 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
906 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
908 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
911 return SDOperand(); // Unchanged
915 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
917 MachineFunction &MF = DAG.getMachineFunction();
918 MachineFrameInfo *MFI = MF.getFrameInfo();
919 MachineRegisterInfo &RegInfo = MF.getRegInfo();
920 SmallVector<SDOperand, 8> ArgValues;
921 SDOperand Root = Op.getOperand(0);
922 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
924 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
925 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
927 unsigned ArgOffset = SPUFrameInfo::minStackSize();
928 unsigned ArgRegIdx = 0;
929 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
931 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
933 // Add DAG nodes to load the arguments or copy them out of registers.
934 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
936 bool needsLoad = false;
937 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
938 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
940 switch (ObjectVT.getSimpleVT()) {
942 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
943 << ObjectVT.getMVTString()
948 if (!isVarArg && ArgRegIdx < NumArgRegs) {
949 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
950 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
951 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
960 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1013 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1014 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1015 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1016 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1024 // We need to load the argument to a virtual register if we determined above
1025 // that we ran out of physical registers of the appropriate type
1027 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1028 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1029 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1030 ArgOffset += StackSlotSize;
1033 ArgValues.push_back(ArgVal);
1036 // If the function takes variable number of arguments, make a frame index for
1037 // the start of the first vararg value... for expansion of llvm.va_start.
1039 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1041 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1042 // If this function is vararg, store any remaining integer argument regs to
1043 // their spots on the stack so that they may be loaded by deferencing the
1044 // result of va_next.
1045 SmallVector<SDOperand, 8> MemOps;
1046 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1047 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1048 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1049 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1050 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1051 MemOps.push_back(Store);
1052 // Increment the address by four for the next argument to store
1053 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1054 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1056 if (!MemOps.empty())
1057 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1060 ArgValues.push_back(Root);
1062 // Return the new list of results.
1063 std::vector<MVT> RetVT(Op.Val->value_begin(),
1064 Op.Val->value_end());
1065 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1068 /// isLSAAddress - Return the immediate to use if the specified
1069 /// value is representable as a LSA address.
1070 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1071 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1074 int Addr = C->getValue();
1075 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1076 (Addr << 14 >> 14) != Addr)
1077 return 0; // Top 14 bits have to be sext of immediate.
1079 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1084 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1085 SDOperand Chain = Op.getOperand(0);
1087 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1088 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1090 SDOperand Callee = Op.getOperand(4);
1091 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1092 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1093 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1094 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1096 // Handy pointer type
1097 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1099 // Accumulate how many bytes are to be pushed on the stack, including the
1100 // linkage area, and parameter passing area. According to the SPU ABI,
1101 // we minimally need space for [LR] and [SP]
1102 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1104 // Set up a copy of the stack pointer for use loading and storing any
1105 // arguments that may not fit in the registers available for argument
1107 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1109 // Figure out which arguments are going to go in registers, and which in
1111 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1112 unsigned ArgRegIdx = 0;
1114 // Keep track of registers passing arguments
1115 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1116 // And the arguments passed on the stack
1117 SmallVector<SDOperand, 8> MemOpChains;
1119 for (unsigned i = 0; i != NumOps; ++i) {
1120 SDOperand Arg = Op.getOperand(5+2*i);
1122 // PtrOff will be used to store the current argument to the stack if a
1123 // register cannot be found for it.
1124 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1125 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1127 switch (Arg.getValueType().getSimpleVT()) {
1128 default: assert(0 && "Unexpected ValueType for argument!");
1132 if (ArgRegIdx != NumArgRegs) {
1133 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136 ArgOffset += StackSlotSize;
1141 if (ArgRegIdx != NumArgRegs) {
1142 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145 ArgOffset += StackSlotSize;
1152 if (ArgRegIdx != NumArgRegs) {
1153 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1155 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156 ArgOffset += StackSlotSize;
1162 // Update number of stack bytes actually used, insert a call sequence start
1163 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1164 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1166 if (!MemOpChains.empty()) {
1167 // Adjust the stack pointer for the stack arguments.
1168 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1169 &MemOpChains[0], MemOpChains.size());
1172 // Build a sequence of copy-to-reg nodes chained together with token chain
1173 // and flag operands which copy the outgoing args into the appropriate regs.
1175 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1176 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1178 InFlag = Chain.getValue(1);
1181 std::vector<MVT> NodeTys;
1182 NodeTys.push_back(MVT::Other); // Returns a chain
1183 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1185 SmallVector<SDOperand, 8> Ops;
1186 unsigned CallOpc = SPUISD::CALL;
1188 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1189 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1190 // node so that legalize doesn't hack it.
1191 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1192 GlobalValue *GV = G->getGlobal();
1193 MVT CalleeVT = Callee.getValueType();
1194 SDOperand Zero = DAG.getConstant(0, PtrVT);
1195 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1197 if (!ST->usingLargeMem()) {
1198 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1199 // style calls, otherwise, external symbols are BRASL calls. This assumes
1200 // that declared/defined symbols are in the same compilation unit and can
1201 // be reached through PC-relative jumps.
1204 // This may be an unsafe assumption for JIT and really large compilation
1206 if (GV->isDeclaration()) {
1207 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1209 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1212 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1214 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1216 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1217 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1218 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1219 // If this is an absolute destination address that appears to be a legal
1220 // local store address, use the munged value.
1221 Callee = SDOperand(Dest, 0);
1224 Ops.push_back(Chain);
1225 Ops.push_back(Callee);
1227 // Add argument registers to the end of the list so that they are known live
1229 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1230 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1231 RegsToPass[i].second.getValueType()));
1234 Ops.push_back(InFlag);
1235 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1236 InFlag = Chain.getValue(1);
1238 Chain = DAG.getCALLSEQ_END(Chain,
1239 DAG.getConstant(NumStackBytes, PtrVT),
1240 DAG.getConstant(0, PtrVT),
1242 if (Op.Val->getValueType(0) != MVT::Other)
1243 InFlag = Chain.getValue(1);
1245 SDOperand ResultVals[3];
1246 unsigned NumResults = 0;
1249 // If the call has results, copy the values out of the ret val registers.
1250 switch (Op.Val->getValueType(0).getSimpleVT()) {
1251 default: assert(0 && "Unexpected ret value!");
1252 case MVT::Other: break;
1254 if (Op.Val->getValueType(1) == MVT::i32) {
1255 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1258 Chain.getValue(2)).getValue(1);
1259 ResultVals[1] = Chain.getValue(0);
1261 NodeTys.push_back(MVT::i32);
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1267 NodeTys.push_back(MVT::i32);
1270 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1271 ResultVals[0] = Chain.getValue(0);
1273 NodeTys.push_back(MVT::i64);
1277 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1278 InFlag).getValue(1);
1279 ResultVals[0] = Chain.getValue(0);
1281 NodeTys.push_back(Op.Val->getValueType(0));
1288 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1289 InFlag).getValue(1);
1290 ResultVals[0] = Chain.getValue(0);
1292 NodeTys.push_back(Op.Val->getValueType(0));
1296 NodeTys.push_back(MVT::Other);
1298 // If the function returns void, just return the chain.
1299 if (NumResults == 0)
1302 // Otherwise, merge everything together with a MERGE_VALUES node.
1303 ResultVals[NumResults++] = Chain;
1304 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1305 ResultVals, NumResults);
1306 return Res.getValue(Op.ResNo);
1310 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1311 SmallVector<CCValAssign, 16> RVLocs;
1312 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1313 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1314 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1315 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1317 // If this is the first return lowered for this function, add the regs to the
1318 // liveout set for the function.
1319 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1320 for (unsigned i = 0; i != RVLocs.size(); ++i)
1321 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1324 SDOperand Chain = Op.getOperand(0);
1327 // Copy the result values into the output registers.
1328 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1329 CCValAssign &VA = RVLocs[i];
1330 assert(VA.isRegLoc() && "Can only return in registers!");
1331 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1332 Flag = Chain.getValue(1);
1336 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1338 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1342 //===----------------------------------------------------------------------===//
1343 // Vector related lowering:
1344 //===----------------------------------------------------------------------===//
1346 static ConstantSDNode *
1347 getVecImm(SDNode *N) {
1348 SDOperand OpVal(0, 0);
1350 // Check to see if this buildvec has a single non-undef value in its elements.
1351 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1352 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1354 OpVal = N->getOperand(i);
1355 else if (OpVal != N->getOperand(i))
1359 if (OpVal.Val != 0) {
1360 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1365 return 0; // All UNDEF: use implicit def.; not Constant node
1368 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1369 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1371 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1373 if (ConstantSDNode *CN = getVecImm(N)) {
1374 uint64_t Value = CN->getValue();
1375 if (ValueType == MVT::i64) {
1376 uint64_t UValue = CN->getValue();
1377 uint32_t upper = uint32_t(UValue >> 32);
1378 uint32_t lower = uint32_t(UValue);
1381 Value = Value >> 32;
1383 if (Value <= 0x3ffff)
1384 return DAG.getConstant(Value, ValueType);
1390 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1391 /// and the value fits into a signed 16-bit constant, and if so, return the
1393 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1395 if (ConstantSDNode *CN = getVecImm(N)) {
1396 int64_t Value = CN->getSignExtended();
1397 if (ValueType == MVT::i64) {
1398 uint64_t UValue = CN->getValue();
1399 uint32_t upper = uint32_t(UValue >> 32);
1400 uint32_t lower = uint32_t(UValue);
1403 Value = Value >> 32;
1405 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1406 return DAG.getConstant(Value, ValueType);
1413 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1414 /// and the value fits into a signed 10-bit constant, and if so, return the
1416 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1418 if (ConstantSDNode *CN = getVecImm(N)) {
1419 int64_t Value = CN->getSignExtended();
1420 if (ValueType == MVT::i64) {
1421 uint64_t UValue = CN->getValue();
1422 uint32_t upper = uint32_t(UValue >> 32);
1423 uint32_t lower = uint32_t(UValue);
1426 Value = Value >> 32;
1428 if (isS10Constant(Value))
1429 return DAG.getConstant(Value, ValueType);
1435 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1436 /// and the value fits into a signed 8-bit constant, and if so, return the
1439 /// @note: The incoming vector is v16i8 because that's the only way we can load
1440 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1442 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1444 if (ConstantSDNode *CN = getVecImm(N)) {
1445 int Value = (int) CN->getValue();
1446 if (ValueType == MVT::i16
1447 && Value <= 0xffff /* truncated from uint64_t */
1448 && ((short) Value >> 8) == ((short) Value & 0xff))
1449 return DAG.getConstant(Value & 0xff, ValueType);
1450 else if (ValueType == MVT::i8
1451 && (Value & 0xff) == Value)
1452 return DAG.getConstant(Value, ValueType);
1458 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1459 /// and the value fits into a signed 16-bit constant, and if so, return the
1461 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1463 if (ConstantSDNode *CN = getVecImm(N)) {
1464 uint64_t Value = CN->getValue();
1465 if ((ValueType == MVT::i32
1466 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1467 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1468 return DAG.getConstant(Value >> 16, ValueType);
1474 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1475 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1476 if (ConstantSDNode *CN = getVecImm(N)) {
1477 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1483 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1484 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1485 if (ConstantSDNode *CN = getVecImm(N)) {
1486 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1492 // If this is a vector of constants or undefs, get the bits. A bit in
1493 // UndefBits is set if the corresponding element of the vector is an
1494 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1495 // zero. Return true if this is not an array of constants, false if it is.
1497 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1498 uint64_t UndefBits[2]) {
1499 // Start with zero'd results.
1500 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1502 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1503 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1504 SDOperand OpVal = BV->getOperand(i);
1506 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1507 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1509 uint64_t EltBits = 0;
1510 if (OpVal.getOpcode() == ISD::UNDEF) {
1511 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1512 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1514 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1515 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1516 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1517 const APFloat &apf = CN->getValueAPF();
1518 EltBits = (CN->getValueType(0) == MVT::f32
1519 ? FloatToBits(apf.convertToFloat())
1520 : DoubleToBits(apf.convertToDouble()));
1522 // Nonconstant element.
1526 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1529 //printf("%llx %llx %llx %llx\n",
1530 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1534 /// If this is a splat (repetition) of a value across the whole vector, return
1535 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1536 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1537 /// SplatSize = 1 byte.
1538 static bool isConstantSplat(const uint64_t Bits128[2],
1539 const uint64_t Undef128[2],
1541 uint64_t &SplatBits, uint64_t &SplatUndef,
1543 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1544 // the same as the lower 64-bits, ignoring undefs.
1545 uint64_t Bits64 = Bits128[0] | Bits128[1];
1546 uint64_t Undef64 = Undef128[0] & Undef128[1];
1547 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1548 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1549 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1550 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1552 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1553 if (MinSplatBits < 64) {
1555 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1557 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1558 if (MinSplatBits < 32) {
1560 // If the top 16-bits are different than the lower 16-bits, ignoring
1561 // undefs, we have an i32 splat.
1562 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1563 if (MinSplatBits < 16) {
1564 // If the top 8-bits are different than the lower 8-bits, ignoring
1565 // undefs, we have an i16 splat.
1566 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1567 // Otherwise, we have an 8-bit splat.
1568 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1569 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1575 SplatUndef = Undef16;
1582 SplatUndef = Undef32;
1588 SplatBits = Bits128[0];
1589 SplatUndef = Undef128[0];
1595 return false; // Can't be a splat if two pieces don't match.
1598 // If this is a case we can't handle, return null and let the default
1599 // expansion code take care of it. If we CAN select this case, and if it
1600 // selects to a single instruction, return Op. Otherwise, if we can codegen
1601 // this case more efficiently than a constant pool load, lower it to the
1602 // sequence of ops that should be used.
1603 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1604 MVT VT = Op.getValueType();
1605 // If this is a vector of constants or undefs, get the bits. A bit in
1606 // UndefBits is set if the corresponding element of the vector is an
1607 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1609 uint64_t VectorBits[2];
1610 uint64_t UndefBits[2];
1611 uint64_t SplatBits, SplatUndef;
1613 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1614 || !isConstantSplat(VectorBits, UndefBits,
1615 VT.getVectorElementType().getSizeInBits(),
1616 SplatBits, SplatUndef, SplatSize))
1617 return SDOperand(); // Not a constant vector, not a splat.
1619 switch (VT.getSimpleVT()) {
1622 uint32_t Value32 = SplatBits;
1623 assert(SplatSize == 4
1624 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1625 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1626 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1627 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1628 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1632 uint64_t f64val = SplatBits;
1633 assert(SplatSize == 8
1634 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1635 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1637 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1638 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1642 // 8-bit constants have to be expanded to 16-bits
1643 unsigned short Value16 = SplatBits | (SplatBits << 8);
1645 for (int i = 0; i < 8; ++i)
1646 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1647 return DAG.getNode(ISD::BIT_CONVERT, VT,
1648 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1651 unsigned short Value16;
1653 Value16 = (unsigned short) (SplatBits & 0xffff);
1655 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1656 SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1658 for (int i = 0; i < 8; ++i) Ops[i] = T;
1659 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1662 unsigned int Value = SplatBits;
1663 SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1664 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1667 uint64_t val = SplatBits;
1668 uint32_t upper = uint32_t(val >> 32);
1669 uint32_t lower = uint32_t(val);
1671 if (upper == lower) {
1672 // Magic constant that can be matched by IL, ILA, et. al.
1673 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1674 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1678 SmallVector<SDOperand, 16> ShufBytes;
1680 bool upper_special, lower_special;
1682 // NOTE: This code creates common-case shuffle masks that can be easily
1683 // detected as common expressions. It is not attempting to create highly
1684 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1686 // Detect if the upper or lower half is a special shuffle mask pattern:
1687 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1688 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1690 // Create lower vector if not a special pattern
1691 if (!lower_special) {
1692 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1693 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1694 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1695 LO32C, LO32C, LO32C, LO32C));
1698 // Create upper vector if not a special pattern
1699 if (!upper_special) {
1700 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1701 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1702 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1703 HI32C, HI32C, HI32C, HI32C));
1706 // If either upper or lower are special, then the two input operands are
1707 // the same (basically, one of them is a "don't care")
1712 if (lower_special && upper_special) {
1713 // Unhappy situation... both upper and lower are special, so punt with
1714 // a target constant:
1715 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1716 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1720 for (int i = 0; i < 4; ++i) {
1722 for (int j = 0; j < 4; ++j) {
1724 bool process_upper, process_lower;
1726 process_upper = (upper_special && (i & 1) == 0);
1727 process_lower = (lower_special && (i & 1) == 1);
1729 if (process_upper || process_lower) {
1730 if ((process_upper && upper == 0)
1731 || (process_lower && lower == 0))
1733 else if ((process_upper && upper == 0xffffffff)
1734 || (process_lower && lower == 0xffffffff))
1736 else if ((process_upper && upper == 0x80000000)
1737 || (process_lower && lower == 0x80000000))
1738 val |= (j == 0 ? 0xe0 : 0x80);
1740 val |= i * 4 + j + ((i & 1) * 16);
1743 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1746 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1747 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1748 &ShufBytes[0], ShufBytes.size()));
1756 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1757 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1758 /// permutation vector, V3, is monotonically increasing with one "exception"
1759 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1760 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1761 /// In either case, the net result is going to eventually invoke SHUFB to
1762 /// permute/shuffle the bytes from V1 and V2.
1764 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1765 /// control word for byte/halfword/word insertion. This takes care of a single
1766 /// element move from V2 into V1.
1768 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1769 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1770 SDOperand V1 = Op.getOperand(0);
1771 SDOperand V2 = Op.getOperand(1);
1772 SDOperand PermMask = Op.getOperand(2);
1774 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1776 // If we have a single element being moved from V1 to V2, this can be handled
1777 // using the C*[DX] compute mask instructions, but the vector elements have
1778 // to be monotonically increasing with one exception element.
1779 MVT EltVT = V1.getValueType().getVectorElementType();
1780 unsigned EltsFromV2 = 0;
1782 unsigned V2EltIdx0 = 0;
1783 unsigned CurrElt = 0;
1784 bool monotonic = true;
1785 if (EltVT == MVT::i8)
1787 else if (EltVT == MVT::i16)
1789 else if (EltVT == MVT::i32)
1792 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1794 for (unsigned i = 0, e = PermMask.getNumOperands();
1795 EltsFromV2 <= 1 && monotonic && i != e;
1798 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1801 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1803 if (SrcElt >= V2EltIdx0) {
1805 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1806 } else if (CurrElt != SrcElt) {
1813 if (EltsFromV2 == 1 && monotonic) {
1814 // Compute mask and shuffle
1815 MachineFunction &MF = DAG.getMachineFunction();
1816 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819 // Initialize temporary register to 0
1820 SDOperand InitTempReg =
1821 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822 // Copy register's contents as index in INSERT_MASK:
1823 SDOperand ShufMaskOp =
1824 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1825 DAG.getTargetConstant(V2Elt, MVT::i32),
1826 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827 // Use shuffle mask in SHUFB synthetic instruction:
1828 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1830 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1831 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1833 SmallVector<SDOperand, 16> ResultMask;
1834 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1836 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1839 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1841 for (unsigned j = 0; j < BytesPerElement; ++j) {
1842 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1847 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1848 &ResultMask[0], ResultMask.size());
1849 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1853 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1854 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1856 if (Op0.Val->getOpcode() == ISD::Constant) {
1857 // For a constant, build the appropriate constant vector, which will
1858 // eventually simplify to a vector register load.
1860 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1861 SmallVector<SDOperand, 16> ConstVecValues;
1865 // Create a constant vector:
1866 switch (Op.getValueType().getSimpleVT()) {
1867 default: assert(0 && "Unexpected constant value type in "
1868 "LowerSCALAR_TO_VECTOR");
1869 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1870 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1871 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1872 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1873 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1874 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1877 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1878 for (size_t j = 0; j < n_copies; ++j)
1879 ConstVecValues.push_back(CValue);
1881 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1882 &ConstVecValues[0], ConstVecValues.size());
1884 // Otherwise, copy the value from one register to another:
1885 switch (Op0.getValueType().getSimpleVT()) {
1886 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1893 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1900 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1901 switch (Op.getValueType().getSimpleVT()) {
1903 cerr << "CellSPU: Unknown vector multiplication, got "
1904 << Op.getValueType().getMVTString()
1910 SDOperand rA = Op.getOperand(0);
1911 SDOperand rB = Op.getOperand(1);
1912 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1913 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1914 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1915 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1917 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1921 // Multiply two v8i16 vectors (pipeline friendly version):
1922 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1923 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1924 // c) Use SELB to select upper and lower halves from the intermediate results
1926 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1927 // dual-issue. This code does manage to do this, even if it's a little on
1930 MachineFunction &MF = DAG.getMachineFunction();
1931 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1932 SDOperand Chain = Op.getOperand(0);
1933 SDOperand rA = Op.getOperand(0);
1934 SDOperand rB = Op.getOperand(1);
1935 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1939 DAG.getCopyToReg(Chain, FSMBIreg,
1940 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1941 DAG.getConstant(0xcccc, MVT::i16)));
1944 DAG.getCopyToReg(FSMBOp, HiProdReg,
1945 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1947 SDOperand HHProd_v4i32 =
1948 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1949 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1951 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1952 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1953 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1954 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1956 DAG.getConstant(16, MVT::i16))),
1957 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1960 // This M00sE is N@stI! (apologies to Monty Python)
1962 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1963 // is to break it all apart, sign extend, and reassemble the various
1964 // intermediate products.
1966 SDOperand rA = Op.getOperand(0);
1967 SDOperand rB = Op.getOperand(1);
1968 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1969 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1972 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1976 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1978 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1981 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1984 SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1985 DAG.getConstant(0x2222, MVT::i16));
1987 SDOperand LoProdParts =
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1989 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1990 LLProd, LHProd, FSMBmask));
1992 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1995 DAG.getNode(ISD::AND, MVT::v4i32,
1997 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1998 LoProdMask, LoProdMask,
1999 LoProdMask, LoProdMask));
2002 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2003 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2006 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2007 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2010 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2011 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2012 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2014 SDOperand HHProd_1 =
2015 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2016 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2017 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2018 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2019 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2022 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2024 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2028 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2030 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2031 DAG.getNode(ISD::OR, MVT::v4i32,
2039 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2040 MachineFunction &MF = DAG.getMachineFunction();
2041 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2043 SDOperand A = Op.getOperand(0);
2044 SDOperand B = Op.getOperand(1);
2045 MVT VT = Op.getValueType();
2047 unsigned VRegBR, VRegC;
2049 if (VT == MVT::f32) {
2050 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2051 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2053 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2054 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2056 // TODO: make sure we're feeding FPInterp the right arguments
2057 // Right now: fi B, frest(B)
2060 // (Floating Interpolate (FP Reciprocal Estimate B))
2062 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2063 DAG.getNode(SPUISD::FPInterp, VT, B,
2064 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2066 // Computes A * BRcpl and stores in a temporary register
2068 DAG.getCopyToReg(BRcpl, VRegC,
2069 DAG.getNode(ISD::FMUL, VT, A,
2070 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2071 // What's the Chain variable do? It's magic!
2072 // TODO: set Chain = Op(0).getEntryNode()
2074 return DAG.getNode(ISD::FADD, VT,
2075 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2076 DAG.getNode(ISD::FMUL, VT,
2077 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2078 DAG.getNode(ISD::FSUB, VT, A,
2079 DAG.getNode(ISD::FMUL, VT, B,
2080 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2083 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2084 MVT VT = Op.getValueType();
2085 SDOperand N = Op.getOperand(0);
2086 SDOperand Elt = Op.getOperand(1);
2087 SDOperand ShufMask[16];
2088 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2090 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2092 int EltNo = (int) C->getValue();
2095 if (VT == MVT::i8 && EltNo >= 16)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2097 else if (VT == MVT::i16 && EltNo >= 8)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2099 else if (VT == MVT::i32 && EltNo >= 4)
2100 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2101 else if (VT == MVT::i64 && EltNo >= 2)
2102 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2104 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2105 // i32 and i64: Element 0 is the preferred slot
2106 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2109 // Need to generate shuffle mask and extract:
2110 int prefslot_begin = -1, prefslot_end = -1;
2111 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2113 switch (VT.getSimpleVT()) {
2115 assert(false && "Invalid value type!");
2117 prefslot_begin = prefslot_end = 3;
2121 prefslot_begin = 2; prefslot_end = 3;
2125 prefslot_begin = 0; prefslot_end = 3;
2129 prefslot_begin = 0; prefslot_end = 7;
2134 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2135 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2137 for (int i = 0; i < 16; ++i) {
2138 // zero fill uppper part of preferred slot, don't care about the
2140 unsigned int mask_val;
2142 if (i <= prefslot_end) {
2144 ((i < prefslot_begin)
2146 : elt_byte + (i - prefslot_begin));
2148 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2150 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2153 SDOperand ShufMaskVec =
2154 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2156 sizeof(ShufMask) / sizeof(ShufMask[0]));
2158 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2159 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2160 N, N, ShufMaskVec));
2164 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2165 SDOperand VecOp = Op.getOperand(0);
2166 SDOperand ValOp = Op.getOperand(1);
2167 SDOperand IdxOp = Op.getOperand(2);
2168 MVT VT = Op.getValueType();
2170 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2171 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2173 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2174 // Use $2 because it's always 16-byte aligned and it's available:
2175 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2178 DAG.getNode(SPUISD::SHUFB, VT,
2179 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2181 DAG.getNode(SPUISD::INSERT_MASK, VT,
2182 DAG.getNode(ISD::ADD, PtrVT,
2184 DAG.getConstant(CN->getValue(),
2190 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2192 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2194 assert(Op.getValueType() == MVT::i8);
2197 assert(0 && "Unhandled i8 math operator");
2201 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2203 SDOperand N1 = Op.getOperand(1);
2204 N0 = (N0.getOpcode() != ISD::Constant
2205 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2206 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2207 N1 = (N1.getOpcode() != ISD::Constant
2208 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2209 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2211 DAG.getNode(Opc, MVT::i16, N0, N1));
2215 SDOperand N1 = Op.getOperand(1);
2217 N0 = (N0.getOpcode() != ISD::Constant
2218 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2219 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2220 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2221 N1 = (N1.getOpcode() != ISD::Constant
2222 ? DAG.getNode(N1Opc, MVT::i16, N1)
2223 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2224 SDOperand ExpandArg =
2225 DAG.getNode(ISD::OR, MVT::i16, N0,
2226 DAG.getNode(ISD::SHL, MVT::i16,
2227 N0, DAG.getConstant(8, MVT::i16)));
2228 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2229 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2233 SDOperand N1 = Op.getOperand(1);
2235 N0 = (N0.getOpcode() != ISD::Constant
2236 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2237 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2238 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2239 N1 = (N1.getOpcode() != ISD::Constant
2240 ? DAG.getNode(N1Opc, MVT::i16, N1)
2241 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2242 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2243 DAG.getNode(Opc, MVT::i16, N0, N1));
2246 SDOperand N1 = Op.getOperand(1);
2248 N0 = (N0.getOpcode() != ISD::Constant
2249 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2250 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2251 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2252 N1 = (N1.getOpcode() != ISD::Constant
2253 ? DAG.getNode(N1Opc, MVT::i16, N1)
2254 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2255 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256 DAG.getNode(Opc, MVT::i16, N0, N1));
2259 SDOperand N1 = Op.getOperand(1);
2261 N0 = (N0.getOpcode() != ISD::Constant
2262 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2263 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2264 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2265 N1 = (N1.getOpcode() != ISD::Constant
2266 ? DAG.getNode(N1Opc, MVT::i16, N1)
2267 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2268 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2269 DAG.getNode(Opc, MVT::i16, N0, N1));
2277 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2279 MVT VT = Op.getValueType();
2280 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2282 SDOperand Op0 = Op.getOperand(0);
2285 case ISD::ZERO_EXTEND:
2286 case ISD::SIGN_EXTEND:
2287 case ISD::ANY_EXTEND: {
2288 MVT Op0VT = Op0.getValueType();
2289 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2291 assert(Op0VT == MVT::i32
2292 && "CellSPU: Zero/sign extending something other than i32");
2293 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2295 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2296 ? SPUISD::ROTBYTES_RIGHT_S
2297 : SPUISD::ROTQUAD_RZ_BYTES);
2298 SDOperand PromoteScalar =
2299 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2301 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2302 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2303 DAG.getNode(NewOpc, Op0VecVT,
2305 DAG.getConstant(4, MVT::i32))));
2309 // Turn operands into vectors to satisfy type checking (shufb works on
2312 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2314 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2315 SmallVector<SDOperand, 16> ShufBytes;
2317 // Create the shuffle mask for "rotating" the borrow up one register slot
2318 // once the borrow is generated.
2319 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2320 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2321 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2322 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2324 SDOperand CarryGen =
2325 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2326 SDOperand ShiftedCarry =
2327 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2329 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2330 &ShufBytes[0], ShufBytes.size()));
2332 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2333 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2334 Op0, Op1, ShiftedCarry));
2338 // Turn operands into vectors to satisfy type checking (shufb works on
2341 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2343 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2344 SmallVector<SDOperand, 16> ShufBytes;
2346 // Create the shuffle mask for "rotating" the borrow up one register slot
2347 // once the borrow is generated.
2348 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2349 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2350 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2351 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2353 SDOperand BorrowGen =
2354 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2355 SDOperand ShiftedBorrow =
2356 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2357 BorrowGen, BorrowGen,
2358 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2359 &ShufBytes[0], ShufBytes.size()));
2361 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2362 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2363 Op0, Op1, ShiftedBorrow));
2367 SDOperand ShiftAmt = Op.getOperand(1);
2368 MVT ShiftAmtVT = ShiftAmt.getValueType();
2369 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2370 SDOperand MaskLower =
2371 DAG.getNode(SPUISD::SELB, VecVT,
2373 DAG.getConstant(0, VecVT),
2374 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2375 DAG.getConstant(0xff00ULL, MVT::i16)));
2376 SDOperand ShiftAmtBytes =
2377 DAG.getNode(ISD::SRL, ShiftAmtVT,
2379 DAG.getConstant(3, ShiftAmtVT));
2380 SDOperand ShiftAmtBits =
2381 DAG.getNode(ISD::AND, ShiftAmtVT,
2383 DAG.getConstant(7, ShiftAmtVT));
2385 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2386 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2387 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2388 MaskLower, ShiftAmtBytes),
2393 MVT VT = Op.getValueType();
2394 SDOperand ShiftAmt = Op.getOperand(1);
2395 MVT ShiftAmtVT = ShiftAmt.getValueType();
2396 SDOperand ShiftAmtBytes =
2397 DAG.getNode(ISD::SRL, ShiftAmtVT,
2399 DAG.getConstant(3, ShiftAmtVT));
2400 SDOperand ShiftAmtBits =
2401 DAG.getNode(ISD::AND, ShiftAmtVT,
2403 DAG.getConstant(7, ShiftAmtVT));
2405 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2406 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2407 Op0, ShiftAmtBytes),
2412 // Promote Op0 to vector
2414 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2415 SDOperand ShiftAmt = Op.getOperand(1);
2416 MVT ShiftVT = ShiftAmt.getValueType();
2418 // Negate variable shift amounts
2419 if (!isa<ConstantSDNode>(ShiftAmt)) {
2420 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2421 DAG.getConstant(0, ShiftVT), ShiftAmt);
2424 SDOperand UpperHalfSign =
2425 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2426 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2427 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2428 Op0, DAG.getConstant(31, MVT::i32))));
2429 SDOperand UpperHalfSignMask =
2430 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2431 SDOperand UpperLowerMask =
2432 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2433 DAG.getConstant(0xff00, MVT::i16));
2434 SDOperand UpperLowerSelect =
2435 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2436 UpperHalfSignMask, Op0, UpperLowerMask);
2437 SDOperand RotateLeftBytes =
2438 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2439 UpperLowerSelect, ShiftAmt);
2440 SDOperand RotateLeftBits =
2441 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2442 RotateLeftBytes, ShiftAmt);
2444 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2452 //! Lower byte immediate operations for v16i8 vectors:
2454 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2457 MVT VT = Op.getValueType();
2459 ConstVec = Op.getOperand(0);
2460 Arg = Op.getOperand(1);
2461 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2462 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2463 ConstVec = ConstVec.getOperand(0);
2465 ConstVec = Op.getOperand(1);
2466 Arg = Op.getOperand(0);
2467 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2468 ConstVec = ConstVec.getOperand(0);
2473 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2474 uint64_t VectorBits[2];
2475 uint64_t UndefBits[2];
2476 uint64_t SplatBits, SplatUndef;
2479 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2480 && isConstantSplat(VectorBits, UndefBits,
2481 VT.getVectorElementType().getSizeInBits(),
2482 SplatBits, SplatUndef, SplatSize)) {
2483 SDOperand tcVec[16];
2484 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2485 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2487 // Turn the BUILD_VECTOR into a set of target constants:
2488 for (size_t i = 0; i < tcVecSize; ++i)
2491 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2492 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2499 //! Lower i32 multiplication
2500 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2502 switch (VT.getSimpleVT()) {
2504 cerr << "CellSPU: Unknown LowerMUL value type, got "
2505 << Op.getValueType().getMVTString()
2511 SDOperand rA = Op.getOperand(0);
2512 SDOperand rB = Op.getOperand(1);
2514 return DAG.getNode(ISD::ADD, MVT::i32,
2515 DAG.getNode(ISD::ADD, MVT::i32,
2516 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2517 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2518 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2525 //! Custom lowering for CTPOP (count population)
2527 Custom lowering code that counts the number ones in the input
2528 operand. SPU has such an instruction, but it counts the number of
2529 ones per byte, which then have to be accumulated.
2531 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2532 MVT VT = Op.getValueType();
2533 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2535 switch (VT.getSimpleVT()) {
2537 assert(false && "Invalid value type!");
2539 SDOperand N = Op.getOperand(0);
2540 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2542 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2543 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2545 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2549 MachineFunction &MF = DAG.getMachineFunction();
2550 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2552 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2554 SDOperand N = Op.getOperand(0);
2555 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2556 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2557 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2559 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2560 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2562 // CNTB_result becomes the chain to which all of the virtual registers
2563 // CNTB_reg, SUM1_reg become associated:
2564 SDOperand CNTB_result =
2565 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2567 SDOperand CNTB_rescopy =
2568 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2570 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2572 return DAG.getNode(ISD::AND, MVT::i16,
2573 DAG.getNode(ISD::ADD, MVT::i16,
2574 DAG.getNode(ISD::SRL, MVT::i16,
2581 MachineFunction &MF = DAG.getMachineFunction();
2582 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2584 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2585 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2587 SDOperand N = Op.getOperand(0);
2588 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2589 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2590 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2591 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2593 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2594 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2596 // CNTB_result becomes the chain to which all of the virtual registers
2597 // CNTB_reg, SUM1_reg become associated:
2598 SDOperand CNTB_result =
2599 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2601 SDOperand CNTB_rescopy =
2602 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2605 DAG.getNode(ISD::SRL, MVT::i32,
2606 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2609 DAG.getNode(ISD::ADD, MVT::i32,
2610 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2612 SDOperand Sum1_rescopy =
2613 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2616 DAG.getNode(ISD::SRL, MVT::i32,
2617 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2620 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2621 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2623 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2633 /// LowerOperation - Provide custom lowering hooks for some operations.
2636 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2638 unsigned Opc = (unsigned) Op.getOpcode();
2639 MVT VT = Op.getValueType();
2643 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2644 cerr << "Op.getOpcode() = " << Opc << "\n";
2645 cerr << "*Op.Val:\n";
2652 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2654 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2655 case ISD::ConstantPool:
2656 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2657 case ISD::GlobalAddress:
2658 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2659 case ISD::JumpTable:
2660 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2662 return LowerConstant(Op, DAG);
2663 case ISD::ConstantFP:
2664 return LowerConstantFP(Op, DAG);
2666 return LowerBRCOND(Op, DAG);
2667 case ISD::FORMAL_ARGUMENTS:
2668 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2670 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2672 return LowerRET(Op, DAG, getTargetMachine());
2675 // i8, i64 math ops:
2676 case ISD::ZERO_EXTEND:
2677 case ISD::SIGN_EXTEND:
2678 case ISD::ANY_EXTEND:
2687 return LowerI8Math(Op, DAG, Opc);
2688 else if (VT == MVT::i64)
2689 return LowerI64Math(Op, DAG, Opc);
2693 // Vector-related lowering.
2694 case ISD::BUILD_VECTOR:
2695 return LowerBUILD_VECTOR(Op, DAG);
2696 case ISD::SCALAR_TO_VECTOR:
2697 return LowerSCALAR_TO_VECTOR(Op, DAG);
2698 case ISD::VECTOR_SHUFFLE:
2699 return LowerVECTOR_SHUFFLE(Op, DAG);
2700 case ISD::EXTRACT_VECTOR_ELT:
2701 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2702 case ISD::INSERT_VECTOR_ELT:
2703 return LowerINSERT_VECTOR_ELT(Op, DAG);
2705 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2709 return LowerByteImmed(Op, DAG);
2711 // Vector and i8 multiply:
2714 return LowerVectorMUL(Op, DAG);
2715 else if (VT == MVT::i8)
2716 return LowerI8Math(Op, DAG, Opc);
2718 return LowerMUL(Op, DAG, VT, Opc);
2721 if (VT == MVT::f32 || VT == MVT::v4f32)
2722 return LowerFDIVf32(Op, DAG);
2723 // else if (Op.getValueType() == MVT::f64)
2724 // return LowerFDIVf64(Op, DAG);
2726 assert(0 && "Calling FDIV on unsupported MVT");
2729 return LowerCTPOP(Op, DAG);
2735 //===----------------------------------------------------------------------===//
2736 // Target Optimization Hooks
2737 //===----------------------------------------------------------------------===//
2740 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2743 TargetMachine &TM = getTargetMachine();
2745 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2746 SelectionDAG &DAG = DCI.DAG;
2747 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2748 SDOperand Result; // Initially, NULL result
2750 switch (N->getOpcode()) {
2753 SDOperand Op1 = N->getOperand(1);
2755 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2756 SDOperand Op01 = Op0.getOperand(1);
2757 if (Op01.getOpcode() == ISD::Constant
2758 || Op01.getOpcode() == ISD::TargetConstant) {
2759 // (add <const>, (SPUindirect <arg>, <const>)) ->
2760 // (SPUindirect <arg>, <const + const>)
2761 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2762 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2763 SDOperand combinedConst =
2764 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2765 Op0.getValueType());
2767 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2768 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2769 DEBUG(cerr << "With: (SPUindirect <arg>, "
2770 << CN0->getValue() + CN1->getValue() << ")\n");
2771 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2772 Op0.getOperand(0), combinedConst);
2774 } else if (isa<ConstantSDNode>(Op0)
2775 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2776 SDOperand Op11 = Op1.getOperand(1);
2777 if (Op11.getOpcode() == ISD::Constant
2778 || Op11.getOpcode() == ISD::TargetConstant) {
2779 // (add (SPUindirect <arg>, <const>), <const>) ->
2780 // (SPUindirect <arg>, <const + const>)
2781 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2782 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2783 SDOperand combinedConst =
2784 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2785 Op0.getValueType());
2787 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2788 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2789 DEBUG(cerr << "With: (SPUindirect <arg>, "
2790 << CN0->getValue() + CN1->getValue() << ")\n");
2792 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2793 Op1.getOperand(0), combinedConst);
2798 case ISD::SIGN_EXTEND:
2799 case ISD::ZERO_EXTEND:
2800 case ISD::ANY_EXTEND: {
2801 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2802 N->getValueType(0) == Op0.getValueType()) {
2803 // (any_extend (SPUextract_elt0 <arg>)) ->
2804 // (SPUextract_elt0 <arg>)
2805 // Types must match, however...
2806 DEBUG(cerr << "Replace: ");
2807 DEBUG(N->dump(&DAG));
2808 DEBUG(cerr << "\nWith: ");
2809 DEBUG(Op0.Val->dump(&DAG));
2810 DEBUG(cerr << "\n");
2816 case SPUISD::IndirectAddr: {
2817 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2818 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2819 if (CN->getValue() == 0) {
2820 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2821 // (SPUaform <addr>, 0)
2823 DEBUG(cerr << "Replace: ");
2824 DEBUG(N->dump(&DAG));
2825 DEBUG(cerr << "\nWith: ");
2826 DEBUG(Op0.Val->dump(&DAG));
2827 DEBUG(cerr << "\n");
2834 case SPUISD::SHLQUAD_L_BITS:
2835 case SPUISD::SHLQUAD_L_BYTES:
2836 case SPUISD::VEC_SHL:
2837 case SPUISD::VEC_SRL:
2838 case SPUISD::VEC_SRA:
2839 case SPUISD::ROTQUAD_RZ_BYTES:
2840 case SPUISD::ROTQUAD_RZ_BITS: {
2841 SDOperand Op1 = N->getOperand(1);
2843 if (isa<ConstantSDNode>(Op1)) {
2844 // Kill degenerate vector shifts:
2845 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2847 if (CN->getValue() == 0) {
2853 case SPUISD::PROMOTE_SCALAR: {
2854 switch (Op0.getOpcode()) {
2857 case ISD::ANY_EXTEND:
2858 case ISD::ZERO_EXTEND:
2859 case ISD::SIGN_EXTEND: {
2860 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2862 // but only if the SPUpromote_scalar and <arg> types match.
2863 SDOperand Op00 = Op0.getOperand(0);
2864 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2865 SDOperand Op000 = Op00.getOperand(0);
2866 if (Op000.getValueType() == N->getValueType(0)) {
2872 case SPUISD::EXTRACT_ELT0: {
2873 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2875 Result = Op0.getOperand(0);
2882 // Otherwise, return unchanged.
2885 DEBUG(cerr << "\nReplace.SPU: ");
2886 DEBUG(N->dump(&DAG));
2887 DEBUG(cerr << "\nWith: ");
2888 DEBUG(Result.Val->dump(&DAG));
2889 DEBUG(cerr << "\n");
2896 //===----------------------------------------------------------------------===//
2897 // Inline Assembly Support
2898 //===----------------------------------------------------------------------===//
2900 /// getConstraintType - Given a constraint letter, return the type of
2901 /// constraint it is for this target.
2902 SPUTargetLowering::ConstraintType
2903 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2904 if (ConstraintLetter.size() == 1) {
2905 switch (ConstraintLetter[0]) {
2912 return C_RegisterClass;
2915 return TargetLowering::getConstraintType(ConstraintLetter);
2918 std::pair<unsigned, const TargetRegisterClass*>
2919 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2922 if (Constraint.size() == 1) {
2923 // GCC RS6000 Constraint Letters
2924 switch (Constraint[0]) {
2928 return std::make_pair(0U, SPU::R64CRegisterClass);
2929 return std::make_pair(0U, SPU::R32CRegisterClass);
2932 return std::make_pair(0U, SPU::R32FPRegisterClass);
2933 else if (VT == MVT::f64)
2934 return std::make_pair(0U, SPU::R64FPRegisterClass);
2937 return std::make_pair(0U, SPU::GPRCRegisterClass);
2941 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2944 //! Compute used/known bits for a SPU operand
2946 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2950 const SelectionDAG &DAG,
2951 unsigned Depth ) const {
2953 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2956 switch (Op.getOpcode()) {
2958 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2968 case SPUISD::PROMOTE_SCALAR: {
2969 SDOperand Op0 = Op.getOperand(0);
2970 MVT Op0VT = Op0.getValueType();
2971 unsigned Op0VTBits = Op0VT.getSizeInBits();
2972 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2973 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2974 KnownOne |= APInt(Op0VTBits, InMask, false);
2978 case SPUISD::LDRESULT:
2979 case SPUISD::EXTRACT_ELT0:
2980 case SPUISD::EXTRACT_ELT0_CHAINED: {
2981 MVT OpVT = Op.getValueType();
2982 unsigned OpVTBits = OpVT.getSizeInBits();
2983 uint64_t InMask = OpVT.getIntegerVTBitMask();
2984 KnownZero |= APInt(OpVTBits, ~InMask, false);
2985 KnownOne |= APInt(OpVTBits, InMask, false);
2990 case EXTRACT_I1_ZEXT:
2991 case EXTRACT_I1_SEXT:
2992 case EXTRACT_I8_ZEXT:
2993 case EXTRACT_I8_SEXT:
2998 case SPUISD::SHLQUAD_L_BITS:
2999 case SPUISD::SHLQUAD_L_BYTES:
3000 case SPUISD::VEC_SHL:
3001 case SPUISD::VEC_SRL:
3002 case SPUISD::VEC_SRA:
3003 case SPUISD::VEC_ROTL:
3004 case SPUISD::VEC_ROTR:
3005 case SPUISD::ROTQUAD_RZ_BYTES:
3006 case SPUISD::ROTQUAD_RZ_BITS:
3007 case SPUISD::ROTBYTES_RIGHT_S:
3008 case SPUISD::ROTBYTES_LEFT:
3009 case SPUISD::ROTBYTES_LEFT_CHAINED:
3010 case SPUISD::SELECT_MASK:
3012 case SPUISD::FPInterp:
3013 case SPUISD::FPRecipEst:
3014 case SPUISD::SEXT32TO64:
3019 // LowerAsmOperandForConstraint
3021 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3022 char ConstraintLetter,
3023 std::vector<SDOperand> &Ops,
3024 SelectionDAG &DAG) const {
3025 // Default, for the time being, to the base class handler
3026 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3029 /// isLegalAddressImmediate - Return true if the integer value can be used
3030 /// as the offset of the target addressing mode.
3031 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3032 // SPU's addresses are 256K:
3033 return (V > -(1 << 18) && V < (1 << 18) - 1);
3036 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {