2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! MVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
46 const int prefslot_byte;
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 MVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext());
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
116 Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext());
117 std::pair<SDValue, SDValue> CallInfo =
118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119 0, CallingConv::C, false, Callee, Args, DAG,
122 return CallInfo.first;
126 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
127 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
129 // Fold away setcc operations if possible.
132 // Use _setjmp/_longjmp instead of setjmp/longjmp.
133 setUseUnderscoreSetJmp(true);
134 setUseUnderscoreLongJmp(true);
136 // Set RTLIB libcall names as used by SPU:
137 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
139 // Set up the SPU's register classes:
140 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
141 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
142 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
143 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
144 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
145 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
146 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
148 // SPU has no sign or zero extended loads for i1, i8, i16:
149 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
150 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
153 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
156 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
157 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
161 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
163 // SPU constant load actions are custom lowered:
164 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
165 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
167 // SPU's loads and stores have to be custom lowered:
168 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
170 MVT VT = (MVT::SimpleValueType)sctype;
172 setOperationAction(ISD::LOAD, VT, Custom);
173 setOperationAction(ISD::STORE, VT, Custom);
174 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
175 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
176 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
178 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
179 MVT StoreVT = (MVT::SimpleValueType) stype;
180 setTruncStoreAction(VT, StoreVT, Expand);
184 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
186 MVT VT = (MVT::SimpleValueType) sctype;
188 setOperationAction(ISD::LOAD, VT, Custom);
189 setOperationAction(ISD::STORE, VT, Custom);
191 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
192 MVT StoreVT = (MVT::SimpleValueType) stype;
193 setTruncStoreAction(VT, StoreVT, Expand);
197 // Expand the jumptable branches
198 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
199 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
201 // Custom lower SELECT_CC for most cases, but expand by default
202 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
203 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
204 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
205 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
208 // SPU has no intrinsics for these particular operations:
209 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
211 // SPU has no division/remainder instructions
212 setOperationAction(ISD::SREM, MVT::i8, Expand);
213 setOperationAction(ISD::UREM, MVT::i8, Expand);
214 setOperationAction(ISD::SDIV, MVT::i8, Expand);
215 setOperationAction(ISD::UDIV, MVT::i8, Expand);
216 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
217 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
218 setOperationAction(ISD::SREM, MVT::i16, Expand);
219 setOperationAction(ISD::UREM, MVT::i16, Expand);
220 setOperationAction(ISD::SDIV, MVT::i16, Expand);
221 setOperationAction(ISD::UDIV, MVT::i16, Expand);
222 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
223 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
224 setOperationAction(ISD::SREM, MVT::i32, Expand);
225 setOperationAction(ISD::UREM, MVT::i32, Expand);
226 setOperationAction(ISD::SDIV, MVT::i32, Expand);
227 setOperationAction(ISD::UDIV, MVT::i32, Expand);
228 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
229 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
230 setOperationAction(ISD::SREM, MVT::i64, Expand);
231 setOperationAction(ISD::UREM, MVT::i64, Expand);
232 setOperationAction(ISD::SDIV, MVT::i64, Expand);
233 setOperationAction(ISD::UDIV, MVT::i64, Expand);
234 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
235 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
236 setOperationAction(ISD::SREM, MVT::i128, Expand);
237 setOperationAction(ISD::UREM, MVT::i128, Expand);
238 setOperationAction(ISD::SDIV, MVT::i128, Expand);
239 setOperationAction(ISD::UDIV, MVT::i128, Expand);
240 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
241 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
243 // We don't support sin/cos/sqrt/fmod
244 setOperationAction(ISD::FSIN , MVT::f64, Expand);
245 setOperationAction(ISD::FCOS , MVT::f64, Expand);
246 setOperationAction(ISD::FREM , MVT::f64, Expand);
247 setOperationAction(ISD::FSIN , MVT::f32, Expand);
248 setOperationAction(ISD::FCOS , MVT::f32, Expand);
249 setOperationAction(ISD::FREM , MVT::f32, Expand);
251 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
253 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
254 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
256 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
257 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
259 // SPU can do rotate right and left, so legalize it... but customize for i8
260 // because instructions don't exist.
262 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
264 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
265 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
266 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
268 setOperationAction(ISD::ROTL, MVT::i32, Legal);
269 setOperationAction(ISD::ROTL, MVT::i16, Legal);
270 setOperationAction(ISD::ROTL, MVT::i8, Custom);
272 // SPU has no native version of shift left/right for i8
273 setOperationAction(ISD::SHL, MVT::i8, Custom);
274 setOperationAction(ISD::SRL, MVT::i8, Custom);
275 setOperationAction(ISD::SRA, MVT::i8, Custom);
277 // Make these operations legal and handle them during instruction selection:
278 setOperationAction(ISD::SHL, MVT::i64, Legal);
279 setOperationAction(ISD::SRL, MVT::i64, Legal);
280 setOperationAction(ISD::SRA, MVT::i64, Legal);
282 // Custom lower i8, i32 and i64 multiplications
283 setOperationAction(ISD::MUL, MVT::i8, Custom);
284 setOperationAction(ISD::MUL, MVT::i32, Legal);
285 setOperationAction(ISD::MUL, MVT::i64, Legal);
287 // Expand double-width multiplication
288 // FIXME: It would probably be reasonable to support some of these operations
289 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
290 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
291 setOperationAction(ISD::MULHU, MVT::i8, Expand);
292 setOperationAction(ISD::MULHS, MVT::i8, Expand);
293 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
294 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
295 setOperationAction(ISD::MULHU, MVT::i16, Expand);
296 setOperationAction(ISD::MULHS, MVT::i16, Expand);
297 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
298 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
299 setOperationAction(ISD::MULHU, MVT::i32, Expand);
300 setOperationAction(ISD::MULHS, MVT::i32, Expand);
301 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
302 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
303 setOperationAction(ISD::MULHU, MVT::i64, Expand);
304 setOperationAction(ISD::MULHS, MVT::i64, Expand);
306 // Need to custom handle (some) common i8, i64 math ops
307 setOperationAction(ISD::ADD, MVT::i8, Custom);
308 setOperationAction(ISD::ADD, MVT::i64, Legal);
309 setOperationAction(ISD::SUB, MVT::i8, Custom);
310 setOperationAction(ISD::SUB, MVT::i64, Legal);
312 // SPU does not have BSWAP. It does have i32 support CTLZ.
313 // CTPOP has to be custom lowered.
314 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
315 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
317 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
318 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
319 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
323 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
324 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
329 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
330 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
331 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
332 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
333 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
335 // SPU has a version of select that implements (a&~c)|(b&c), just like
336 // select ought to work:
337 setOperationAction(ISD::SELECT, MVT::i8, Legal);
338 setOperationAction(ISD::SELECT, MVT::i16, Legal);
339 setOperationAction(ISD::SELECT, MVT::i32, Legal);
340 setOperationAction(ISD::SELECT, MVT::i64, Legal);
342 setOperationAction(ISD::SETCC, MVT::i8, Legal);
343 setOperationAction(ISD::SETCC, MVT::i16, Legal);
344 setOperationAction(ISD::SETCC, MVT::i32, Legal);
345 setOperationAction(ISD::SETCC, MVT::i64, Legal);
346 setOperationAction(ISD::SETCC, MVT::f64, Custom);
348 // Custom lower i128 -> i64 truncates
349 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
351 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
352 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
353 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
354 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
355 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
356 // to expand to a libcall, hence the custom lowering:
357 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
358 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
359 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
360 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
361 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
362 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
364 // FDIV on SPU requires custom lowering
365 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
367 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
368 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
369 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
370 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
371 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
372 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
373 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
374 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
375 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
377 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
378 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
379 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
380 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
382 // We cannot sextinreg(i1). Expand to shifts.
383 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
385 // Support label based line numbers.
386 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
387 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
389 // We want to legalize GlobalAddress and ConstantPool nodes into the
390 // appropriate instructions to materialize the address.
391 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
393 MVT VT = (MVT::SimpleValueType)sctype;
395 setOperationAction(ISD::GlobalAddress, VT, Custom);
396 setOperationAction(ISD::ConstantPool, VT, Custom);
397 setOperationAction(ISD::JumpTable, VT, Custom);
400 // RET must be custom lowered, to meet ABI requirements
401 setOperationAction(ISD::RET, MVT::Other, Custom);
403 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
404 setOperationAction(ISD::VASTART , MVT::Other, Custom);
406 // Use the default implementation.
407 setOperationAction(ISD::VAARG , MVT::Other, Expand);
408 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
409 setOperationAction(ISD::VAEND , MVT::Other, Expand);
410 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
411 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
412 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
413 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
415 // Cell SPU has instructions for converting between i64 and fp.
416 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
417 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
419 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
420 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
422 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
423 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
425 // First set operation action for all vector types to expand. Then we
426 // will selectively turn on ones that can be effectively codegen'd.
427 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
428 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
429 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
431 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
432 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
434 // "Odd size" vector classes that we're willing to support:
435 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
437 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
438 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
439 MVT VT = (MVT::SimpleValueType)i;
441 // add/sub are legal for all supported vector VT's.
442 setOperationAction(ISD::ADD, VT, Legal);
443 setOperationAction(ISD::SUB, VT, Legal);
444 // mul has to be custom lowered.
445 setOperationAction(ISD::MUL, VT, Legal);
447 setOperationAction(ISD::AND, VT, Legal);
448 setOperationAction(ISD::OR, VT, Legal);
449 setOperationAction(ISD::XOR, VT, Legal);
450 setOperationAction(ISD::LOAD, VT, Legal);
451 setOperationAction(ISD::SELECT, VT, Legal);
452 setOperationAction(ISD::STORE, VT, Legal);
454 // These operations need to be expanded:
455 setOperationAction(ISD::SDIV, VT, Expand);
456 setOperationAction(ISD::SREM, VT, Expand);
457 setOperationAction(ISD::UDIV, VT, Expand);
458 setOperationAction(ISD::UREM, VT, Expand);
460 // Custom lower build_vector, constant pool spills, insert and
461 // extract vector elements:
462 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
463 setOperationAction(ISD::ConstantPool, VT, Custom);
464 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
465 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
466 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
467 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
470 setOperationAction(ISD::AND, MVT::v16i8, Custom);
471 setOperationAction(ISD::OR, MVT::v16i8, Custom);
472 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
473 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
475 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
477 setShiftAmountType(MVT::i32);
478 setBooleanContents(ZeroOrNegativeOneBooleanContent);
480 setStackPointerRegisterToSaveRestore(SPU::R1);
482 // We have target-specific dag combine patterns for the following nodes:
483 setTargetDAGCombine(ISD::ADD);
484 setTargetDAGCombine(ISD::ZERO_EXTEND);
485 setTargetDAGCombine(ISD::SIGN_EXTEND);
486 setTargetDAGCombine(ISD::ANY_EXTEND);
488 computeRegisterProperties();
490 // Set pre-RA register scheduler default to BURR, which produces slightly
491 // better code than the default (could also be TDRR, but TargetLowering.h
492 // needs a mod to support that model):
493 setSchedulingPreference(SchedulingForRegPressure);
497 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
499 if (node_names.empty()) {
500 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
501 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
502 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
503 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
504 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
505 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
506 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
507 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
508 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
509 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
510 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
511 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
512 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
513 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
514 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
515 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
516 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
517 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
518 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
519 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
520 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
521 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
522 "SPUISD::ROTBYTES_LEFT_BITS";
523 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
524 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
525 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
526 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
527 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
530 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
532 return ((i != node_names.end()) ? i->second : 0);
535 /// getFunctionAlignment - Return the Log2 alignment of this function.
536 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
540 //===----------------------------------------------------------------------===//
541 // Return the Cell SPU's SETCC result type
542 //===----------------------------------------------------------------------===//
544 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
545 // i16 and i32 are valid SETCC result types
546 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
549 //===----------------------------------------------------------------------===//
550 // Calling convention code:
551 //===----------------------------------------------------------------------===//
553 #include "SPUGenCallingConv.inc"
555 //===----------------------------------------------------------------------===//
556 // LowerOperation implementation
557 //===----------------------------------------------------------------------===//
559 /// Custom lower loads for CellSPU
561 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
562 within a 16-byte block, we have to rotate to extract the requested element.
564 For extending loads, we also want to ensure that the following sequence is
565 emitted, e.g. for MVT::f32 extending load to MVT::f64:
569 %2 v16i8,ch = rotate %1
570 %3 v4f8, ch = bitconvert %2
571 %4 f32 = vec2perfslot %3
572 %5 f64 = fp_extend %4
576 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
577 LoadSDNode *LN = cast<LoadSDNode>(Op);
578 SDValue the_chain = LN->getChain();
579 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
580 MVT InVT = LN->getMemoryVT();
581 MVT OutVT = Op.getValueType();
582 ISD::LoadExtType ExtType = LN->getExtensionType();
583 unsigned alignment = LN->getAlignment();
584 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
585 DebugLoc dl = Op.getDebugLoc();
587 switch (LN->getAddressingMode()) {
588 case ISD::UNINDEXED: {
590 SDValue basePtr = LN->getBasePtr();
593 if (alignment == 16) {
596 // Special cases for a known aligned load to simplify the base pointer
597 // and the rotation amount:
598 if (basePtr.getOpcode() == ISD::ADD
599 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
600 // Known offset into basePtr
601 int64_t offset = CN->getSExtValue();
602 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
607 rotate = DAG.getConstant(rotamt, MVT::i16);
609 // Simplify the base pointer for this case:
610 basePtr = basePtr.getOperand(0);
611 if ((offset & ~0xf) > 0) {
612 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
614 DAG.getConstant((offset & ~0xf), PtrVT));
616 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
617 || (basePtr.getOpcode() == SPUISD::IndirectAddr
618 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
619 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
620 // Plain aligned a-form address: rotate into preferred slot
621 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
622 int64_t rotamt = -vtm->prefslot_byte;
625 rotate = DAG.getConstant(rotamt, MVT::i16);
627 // Offset the rotate amount by the basePtr and the preferred slot
629 int64_t rotamt = -vtm->prefslot_byte;
632 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
634 DAG.getConstant(rotamt, PtrVT));
637 // Unaligned load: must be more pessimistic about addressing modes:
638 if (basePtr.getOpcode() == ISD::ADD) {
639 MachineFunction &MF = DAG.getMachineFunction();
640 MachineRegisterInfo &RegInfo = MF.getRegInfo();
641 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
644 SDValue Op0 = basePtr.getOperand(0);
645 SDValue Op1 = basePtr.getOperand(1);
647 if (isa<ConstantSDNode>(Op1)) {
648 // Convert the (add <ptr>, <const>) to an indirect address contained
649 // in a register. Note that this is done because we need to avoid
650 // creating a 0(reg) d-form address due to the SPU's block loads.
651 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
652 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
653 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
655 // Convert the (add <arg1>, <arg2>) to an indirect address, which
656 // will likely be lowered as a reg(reg) x-form address.
657 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
660 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
662 DAG.getConstant(0, PtrVT));
665 // Offset the rotate amount by the basePtr and the preferred slot
667 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
669 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
672 // Re-emit as a v16i8 vector load
673 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
674 LN->getSrcValue(), LN->getSrcValueOffset(),
675 LN->isVolatile(), 16);
678 the_chain = result.getValue(1);
680 // Rotate into the preferred slot:
681 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
682 result.getValue(0), rotate);
684 // Convert the loaded v16i8 vector to the appropriate vector type
685 // specified by the operand:
686 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
687 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
688 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
690 // Handle extending loads by extending the scalar result:
691 if (ExtType == ISD::SEXTLOAD) {
692 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
693 } else if (ExtType == ISD::ZEXTLOAD) {
694 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
695 } else if (ExtType == ISD::EXTLOAD) {
696 unsigned NewOpc = ISD::ANY_EXTEND;
698 if (OutVT.isFloatingPoint())
699 NewOpc = ISD::FP_EXTEND;
701 result = DAG.getNode(NewOpc, dl, OutVT, result);
704 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
705 SDValue retops[2] = {
710 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
711 retops, sizeof(retops) / sizeof(retops[0]));
718 case ISD::LAST_INDEXED_MODE:
721 raw_string_ostream Msg(msg);
722 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
724 Msg << (unsigned) LN->getAddressingMode();
725 llvm_report_error(Msg.str());
733 /// Custom lower stores for CellSPU
735 All CellSPU stores are aligned to 16-byte boundaries, so for elements
736 within a 16-byte block, we have to generate a shuffle to insert the
737 requested element into its place, then store the resulting block.
740 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
741 StoreSDNode *SN = cast<StoreSDNode>(Op);
742 SDValue Value = SN->getValue();
743 MVT VT = Value.getValueType();
744 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
745 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
746 DebugLoc dl = Op.getDebugLoc();
747 unsigned alignment = SN->getAlignment();
749 switch (SN->getAddressingMode()) {
750 case ISD::UNINDEXED: {
751 // The vector type we really want to load from the 16-byte chunk.
752 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
753 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
755 SDValue alignLoadVec;
756 SDValue basePtr = SN->getBasePtr();
757 SDValue the_chain = SN->getChain();
758 SDValue insertEltOffs;
760 if (alignment == 16) {
763 // Special cases for a known aligned load to simplify the base pointer
764 // and insertion byte:
765 if (basePtr.getOpcode() == ISD::ADD
766 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
767 // Known offset into basePtr
768 int64_t offset = CN->getSExtValue();
770 // Simplify the base pointer for this case:
771 basePtr = basePtr.getOperand(0);
772 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
774 DAG.getConstant((offset & 0xf), PtrVT));
776 if ((offset & ~0xf) > 0) {
777 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
779 DAG.getConstant((offset & ~0xf), PtrVT));
782 // Otherwise, assume it's at byte 0 of basePtr
783 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
785 DAG.getConstant(0, PtrVT));
788 // Unaligned load: must be more pessimistic about addressing modes:
789 if (basePtr.getOpcode() == ISD::ADD) {
790 MachineFunction &MF = DAG.getMachineFunction();
791 MachineRegisterInfo &RegInfo = MF.getRegInfo();
792 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
795 SDValue Op0 = basePtr.getOperand(0);
796 SDValue Op1 = basePtr.getOperand(1);
798 if (isa<ConstantSDNode>(Op1)) {
799 // Convert the (add <ptr>, <const>) to an indirect address contained
800 // in a register. Note that this is done because we need to avoid
801 // creating a 0(reg) d-form address due to the SPU's block loads.
802 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
803 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
804 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
806 // Convert the (add <arg1>, <arg2>) to an indirect address, which
807 // will likely be lowered as a reg(reg) x-form address.
808 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
811 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
816 // Insertion point is solely determined by basePtr's contents
817 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
819 DAG.getConstant(0, PtrVT));
822 // Re-emit as a v16i8 vector load
823 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
824 SN->getSrcValue(), SN->getSrcValueOffset(),
825 SN->isVolatile(), 16);
828 the_chain = alignLoadVec.getValue(1);
830 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
831 SDValue theValue = SN->getValue();
835 && (theValue.getOpcode() == ISD::AssertZext
836 || theValue.getOpcode() == ISD::AssertSext)) {
837 // Drill down and get the value for zero- and sign-extended
839 theValue = theValue.getOperand(0);
842 // If the base pointer is already a D-form address, then just create
843 // a new D-form address with a slot offset and the orignal base pointer.
844 // Otherwise generate a D-form address with the slot offset relative
845 // to the stack pointer, which is always aligned.
847 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
848 cerr << "CellSPU LowerSTORE: basePtr = ";
849 basePtr.getNode()->dump(&DAG);
854 SDValue insertEltOp =
855 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
856 SDValue vectorizeOp =
857 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
859 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
860 vectorizeOp, alignLoadVec,
861 DAG.getNode(ISD::BIT_CONVERT, dl,
862 MVT::v4i32, insertEltOp));
864 result = DAG.getStore(the_chain, dl, result, basePtr,
865 LN->getSrcValue(), LN->getSrcValueOffset(),
866 LN->isVolatile(), LN->getAlignment());
868 #if 0 && !defined(NDEBUG)
869 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
870 const SDValue ¤tRoot = DAG.getRoot();
873 cerr << "------- CellSPU:LowerStore result:\n";
876 DAG.setRoot(currentRoot);
887 case ISD::LAST_INDEXED_MODE:
890 raw_string_ostream Msg(msg);
891 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
893 Msg << (unsigned) SN->getAddressingMode();
894 llvm_report_error(Msg.str());
902 //! Generate the address of a constant pool entry.
904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
905 MVT PtrVT = Op.getValueType();
906 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
907 Constant *C = CP->getConstVal();
908 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
909 SDValue Zero = DAG.getConstant(0, PtrVT);
910 const TargetMachine &TM = DAG.getTarget();
911 // FIXME there is no actual debug info here
912 DebugLoc dl = Op.getDebugLoc();
914 if (TM.getRelocationModel() == Reloc::Static) {
915 if (!ST->usingLargeMem()) {
916 // Just return the SDValue with the constant pool address in it.
917 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
919 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
920 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
921 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
925 llvm_unreachable("LowerConstantPool: Relocation model other than static"
930 //! Alternate entry point for generating the address of a constant pool entry
932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
933 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
938 MVT PtrVT = Op.getValueType();
939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
940 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
941 SDValue Zero = DAG.getConstant(0, PtrVT);
942 const TargetMachine &TM = DAG.getTarget();
943 // FIXME there is no actual debug info here
944 DebugLoc dl = Op.getDebugLoc();
946 if (TM.getRelocationModel() == Reloc::Static) {
947 if (!ST->usingLargeMem()) {
948 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
950 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
951 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
952 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
956 llvm_unreachable("LowerJumpTable: Relocation model other than static"
962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
963 MVT PtrVT = Op.getValueType();
964 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
965 GlobalValue *GV = GSDN->getGlobal();
966 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
967 const TargetMachine &TM = DAG.getTarget();
968 SDValue Zero = DAG.getConstant(0, PtrVT);
969 // FIXME there is no actual debug info here
970 DebugLoc dl = Op.getDebugLoc();
972 if (TM.getRelocationModel() == Reloc::Static) {
973 if (!ST->usingLargeMem()) {
974 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
976 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
977 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
978 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
981 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
989 //! Custom lower double precision floating point constants
991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
992 MVT VT = Op.getValueType();
993 // FIXME there is no actual debug info here
994 DebugLoc dl = Op.getDebugLoc();
996 if (VT == MVT::f64) {
997 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1000 "LowerConstantFP: Node is not ConstantFPSDNode");
1002 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003 SDValue T = DAG.getConstant(dbits, MVT::i64);
1004 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1005 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1013 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
1015 MachineFunction &MF = DAG.getMachineFunction();
1016 MachineFrameInfo *MFI = MF.getFrameInfo();
1017 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1018 SmallVector<SDValue, 48> ArgValues;
1019 SDValue Root = Op.getOperand(0);
1020 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1021 DebugLoc dl = Op.getDebugLoc();
1023 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1024 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1026 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1027 unsigned ArgRegIdx = 0;
1028 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1030 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1032 // Add DAG nodes to load the arguments or copy them out of registers.
1033 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
1034 ArgNo != e; ++ArgNo) {
1035 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1036 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1039 if (ArgRegIdx < NumArgRegs) {
1040 const TargetRegisterClass *ArgRegClass;
1042 switch (ObjectVT.getSimpleVT()) {
1045 raw_string_ostream Msg(msg);
1046 Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
1047 << ObjectVT.getMVTString();
1048 llvm_report_error(Msg.str());
1051 ArgRegClass = &SPU::R8CRegClass;
1054 ArgRegClass = &SPU::R16CRegClass;
1057 ArgRegClass = &SPU::R32CRegClass;
1060 ArgRegClass = &SPU::R64CRegClass;
1063 ArgRegClass = &SPU::GPRCRegClass;
1066 ArgRegClass = &SPU::R32FPRegClass;
1069 ArgRegClass = &SPU::R64FPRegClass;
1077 ArgRegClass = &SPU::VECREGRegClass;
1081 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1086 // We need to load the argument to a virtual register if we determined
1087 // above that we ran out of physical registers of the appropriate type
1088 // or we're forced to do vararg
1089 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1090 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1092 ArgOffset += StackSlotSize;
1095 ArgValues.push_back(ArgVal);
1097 Root = ArgVal.getOperand(0);
1102 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103 // We will spill (79-3)+1 registers to the stack
1104 SmallVector<SDValue, 79-3+1> MemOps;
1106 // Create the frame slot
1108 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1110 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1111 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1112 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1113 Root = Store.getOperand(0);
1114 MemOps.push_back(Store);
1116 // Increment address by stack slot size for the next stored argument
1117 ArgOffset += StackSlotSize;
1119 if (!MemOps.empty())
1120 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1121 &MemOps[0], MemOps.size());
1124 ArgValues.push_back(Root);
1126 // Return the new list of results.
1127 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1128 &ArgValues[0], ArgValues.size());
1131 /// isLSAAddress - Return the immediate to use if the specified
1132 /// value is representable as a LSA address.
1133 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1134 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1137 int Addr = C->getZExtValue();
1138 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1139 (Addr << 14 >> 14) != Addr)
1140 return 0; // Top 14 bits have to be sext of immediate.
1142 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1146 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1147 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1148 SDValue Chain = TheCall->getChain();
1149 SDValue Callee = TheCall->getCallee();
1150 unsigned NumOps = TheCall->getNumArgs();
1151 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1152 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1153 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1154 DebugLoc dl = TheCall->getDebugLoc();
1156 // Handy pointer type
1157 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1159 // Accumulate how many bytes are to be pushed on the stack, including the
1160 // linkage area, and parameter passing area. According to the SPU ABI,
1161 // we minimally need space for [LR] and [SP]
1162 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1164 // Set up a copy of the stack pointer for use loading and storing any
1165 // arguments that may not fit in the registers available for argument
1167 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1169 // Figure out which arguments are going to go in registers, and which in
1171 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1172 unsigned ArgRegIdx = 0;
1174 // Keep track of registers passing arguments
1175 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1176 // And the arguments passed on the stack
1177 SmallVector<SDValue, 8> MemOpChains;
1179 for (unsigned i = 0; i != NumOps; ++i) {
1180 SDValue Arg = TheCall->getArg(i);
1182 // PtrOff will be used to store the current argument to the stack if a
1183 // register cannot be found for it.
1184 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1185 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1187 switch (Arg.getValueType().getSimpleVT()) {
1188 default: llvm_unreachable("Unexpected ValueType for argument!");
1194 if (ArgRegIdx != NumArgRegs) {
1195 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1197 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1198 ArgOffset += StackSlotSize;
1203 if (ArgRegIdx != NumArgRegs) {
1204 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1206 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1207 ArgOffset += StackSlotSize;
1216 if (ArgRegIdx != NumArgRegs) {
1217 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1219 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1220 ArgOffset += StackSlotSize;
1226 // Update number of stack bytes actually used, insert a call sequence start
1227 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1228 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1231 if (!MemOpChains.empty()) {
1232 // Adjust the stack pointer for the stack arguments.
1233 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1234 &MemOpChains[0], MemOpChains.size());
1237 // Build a sequence of copy-to-reg nodes chained together with token chain
1238 // and flag operands which copy the outgoing args into the appropriate regs.
1240 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1241 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1242 RegsToPass[i].second, InFlag);
1243 InFlag = Chain.getValue(1);
1246 SmallVector<SDValue, 8> Ops;
1247 unsigned CallOpc = SPUISD::CALL;
1249 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1250 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1251 // node so that legalize doesn't hack it.
1252 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1253 GlobalValue *GV = G->getGlobal();
1254 MVT CalleeVT = Callee.getValueType();
1255 SDValue Zero = DAG.getConstant(0, PtrVT);
1256 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1258 if (!ST->usingLargeMem()) {
1259 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1260 // style calls, otherwise, external symbols are BRASL calls. This assumes
1261 // that declared/defined symbols are in the same compilation unit and can
1262 // be reached through PC-relative jumps.
1265 // This may be an unsafe assumption for JIT and really large compilation
1267 if (GV->isDeclaration()) {
1268 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1270 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1273 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1275 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1277 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1278 MVT CalleeVT = Callee.getValueType();
1279 SDValue Zero = DAG.getConstant(0, PtrVT);
1280 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1281 Callee.getValueType());
1283 if (!ST->usingLargeMem()) {
1284 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1286 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1288 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1289 // If this is an absolute destination address that appears to be a legal
1290 // local store address, use the munged value.
1291 Callee = SDValue(Dest, 0);
1294 Ops.push_back(Chain);
1295 Ops.push_back(Callee);
1297 // Add argument registers to the end of the list so that they are known live
1299 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1300 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1301 RegsToPass[i].second.getValueType()));
1303 if (InFlag.getNode())
1304 Ops.push_back(InFlag);
1305 // Returns a chain and a flag for retval copy to use.
1306 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1307 &Ops[0], Ops.size());
1308 InFlag = Chain.getValue(1);
1310 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1311 DAG.getIntPtrConstant(0, true), InFlag);
1312 if (TheCall->getValueType(0) != MVT::Other)
1313 InFlag = Chain.getValue(1);
1315 SDValue ResultVals[3];
1316 unsigned NumResults = 0;
1318 // If the call has results, copy the values out of the ret val registers.
1319 switch (TheCall->getValueType(0).getSimpleVT()) {
1320 default: llvm_unreachable("Unexpected ret value!");
1321 case MVT::Other: break;
1323 if (TheCall->getValueType(1) == MVT::i32) {
1324 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1325 MVT::i32, InFlag).getValue(1);
1326 ResultVals[0] = Chain.getValue(0);
1327 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1328 Chain.getValue(2)).getValue(1);
1329 ResultVals[1] = Chain.getValue(0);
1332 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333 InFlag).getValue(1);
1334 ResultVals[0] = Chain.getValue(0);
1339 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1340 InFlag).getValue(1);
1341 ResultVals[0] = Chain.getValue(0);
1345 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1346 InFlag).getValue(1);
1347 ResultVals[0] = Chain.getValue(0);
1352 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1353 InFlag).getValue(1);
1354 ResultVals[0] = Chain.getValue(0);
1363 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1364 InFlag).getValue(1);
1365 ResultVals[0] = Chain.getValue(0);
1370 // If the function returns void, just return the chain.
1371 if (NumResults == 0)
1374 // Otherwise, merge everything together with a MERGE_VALUES node.
1375 ResultVals[NumResults++] = Chain;
1376 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1377 return Res.getValue(Op.getResNo());
1381 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1382 SmallVector<CCValAssign, 16> RVLocs;
1383 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1384 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1385 DebugLoc dl = Op.getDebugLoc();
1386 CCState CCInfo(CC, isVarArg, TM, RVLocs, *DAG.getContext());
1387 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1389 // If this is the first return lowered for this function, add the regs to the
1390 // liveout set for the function.
1391 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1392 for (unsigned i = 0; i != RVLocs.size(); ++i)
1393 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1396 SDValue Chain = Op.getOperand(0);
1399 // Copy the result values into the output registers.
1400 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1401 CCValAssign &VA = RVLocs[i];
1402 assert(VA.isRegLoc() && "Can only return in registers!");
1403 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1404 Op.getOperand(i*2+1), Flag);
1405 Flag = Chain.getValue(1);
1409 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1411 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1415 //===----------------------------------------------------------------------===//
1416 // Vector related lowering:
1417 //===----------------------------------------------------------------------===//
1419 static ConstantSDNode *
1420 getVecImm(SDNode *N) {
1421 SDValue OpVal(0, 0);
1423 // Check to see if this buildvec has a single non-undef value in its elements.
1424 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1425 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1426 if (OpVal.getNode() == 0)
1427 OpVal = N->getOperand(i);
1428 else if (OpVal != N->getOperand(i))
1432 if (OpVal.getNode() != 0) {
1433 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1441 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1444 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 uint64_t Value = CN->getZExtValue();
1448 if (ValueType == MVT::i64) {
1449 uint64_t UValue = CN->getZExtValue();
1450 uint32_t upper = uint32_t(UValue >> 32);
1451 uint32_t lower = uint32_t(UValue);
1454 Value = Value >> 32;
1456 if (Value <= 0x3ffff)
1457 return DAG.getTargetConstant(Value, ValueType);
1463 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1464 /// and the value fits into a signed 16-bit constant, and if so, return the
1466 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1468 if (ConstantSDNode *CN = getVecImm(N)) {
1469 int64_t Value = CN->getSExtValue();
1470 if (ValueType == MVT::i64) {
1471 uint64_t UValue = CN->getZExtValue();
1472 uint32_t upper = uint32_t(UValue >> 32);
1473 uint32_t lower = uint32_t(UValue);
1476 Value = Value >> 32;
1478 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1479 return DAG.getTargetConstant(Value, ValueType);
1486 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1487 /// and the value fits into a signed 10-bit constant, and if so, return the
1489 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1491 if (ConstantSDNode *CN = getVecImm(N)) {
1492 int64_t Value = CN->getSExtValue();
1493 if (ValueType == MVT::i64) {
1494 uint64_t UValue = CN->getZExtValue();
1495 uint32_t upper = uint32_t(UValue >> 32);
1496 uint32_t lower = uint32_t(UValue);
1499 Value = Value >> 32;
1501 if (isS10Constant(Value))
1502 return DAG.getTargetConstant(Value, ValueType);
1508 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1509 /// and the value fits into a signed 8-bit constant, and if so, return the
1512 /// @note: The incoming vector is v16i8 because that's the only way we can load
1513 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1515 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1517 if (ConstantSDNode *CN = getVecImm(N)) {
1518 int Value = (int) CN->getZExtValue();
1519 if (ValueType == MVT::i16
1520 && Value <= 0xffff /* truncated from uint64_t */
1521 && ((short) Value >> 8) == ((short) Value & 0xff))
1522 return DAG.getTargetConstant(Value & 0xff, ValueType);
1523 else if (ValueType == MVT::i8
1524 && (Value & 0xff) == Value)
1525 return DAG.getTargetConstant(Value, ValueType);
1531 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1532 /// and the value fits into a signed 16-bit constant, and if so, return the
1534 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1536 if (ConstantSDNode *CN = getVecImm(N)) {
1537 uint64_t Value = CN->getZExtValue();
1538 if ((ValueType == MVT::i32
1539 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1540 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1541 return DAG.getTargetConstant(Value >> 16, ValueType);
1547 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1548 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1549 if (ConstantSDNode *CN = getVecImm(N)) {
1550 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1556 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1557 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1558 if (ConstantSDNode *CN = getVecImm(N)) {
1559 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1565 //! Lower a BUILD_VECTOR instruction creatively:
1567 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1568 MVT VT = Op.getValueType();
1569 MVT EltVT = VT.getVectorElementType();
1570 DebugLoc dl = Op.getDebugLoc();
1571 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1572 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1573 unsigned minSplatBits = EltVT.getSizeInBits();
1575 if (minSplatBits < 16)
1578 APInt APSplatBits, APSplatUndef;
1579 unsigned SplatBitSize;
1582 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1583 HasAnyUndefs, minSplatBits)
1584 || minSplatBits < SplatBitSize)
1585 return SDValue(); // Wasn't a constant vector or splat exceeded min
1587 uint64_t SplatBits = APSplatBits.getZExtValue();
1589 switch (VT.getSimpleVT()) {
1592 raw_string_ostream Msg(msg);
1593 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1594 << VT.getMVTString();
1595 llvm_report_error(Msg.str());
1599 uint32_t Value32 = uint32_t(SplatBits);
1600 assert(SplatBitSize == 32
1601 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDValue T = DAG.getConstant(Value32, MVT::i32);
1604 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1605 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1609 uint64_t f64val = uint64_t(SplatBits);
1610 assert(SplatBitSize == 64
1611 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1612 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1613 SDValue T = DAG.getConstant(f64val, MVT::i64);
1614 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1615 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1619 // 8-bit constants have to be expanded to 16-bits
1620 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1621 SmallVector<SDValue, 8> Ops;
1623 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1624 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1625 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1628 unsigned short Value16 = SplatBits;
1629 SDValue T = DAG.getConstant(Value16, EltVT);
1630 SmallVector<SDValue, 8> Ops;
1633 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1636 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1637 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1640 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1641 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1644 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1654 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1656 uint32_t upper = uint32_t(SplatVal >> 32);
1657 uint32_t lower = uint32_t(SplatVal);
1659 if (upper == lower) {
1660 // Magic constant that can be matched by IL, ILA, et. al.
1661 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1662 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1663 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1664 Val, Val, Val, Val));
1666 bool upper_special, lower_special;
1668 // NOTE: This code creates common-case shuffle masks that can be easily
1669 // detected as common expressions. It is not attempting to create highly
1670 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1672 // Detect if the upper or lower half is a special shuffle mask pattern:
1673 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1674 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1676 // Both upper and lower are special, lower to a constant pool load:
1677 if (lower_special && upper_special) {
1678 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1679 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1680 SplatValCN, SplatValCN);
1685 SmallVector<SDValue, 16> ShufBytes;
1688 // Create lower vector if not a special pattern
1689 if (!lower_special) {
1690 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1691 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1692 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1693 LO32C, LO32C, LO32C, LO32C));
1696 // Create upper vector if not a special pattern
1697 if (!upper_special) {
1698 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1699 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1700 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1701 HI32C, HI32C, HI32C, HI32C));
1704 // If either upper or lower are special, then the two input operands are
1705 // the same (basically, one of them is a "don't care")
1711 for (int i = 0; i < 4; ++i) {
1713 for (int j = 0; j < 4; ++j) {
1715 bool process_upper, process_lower;
1717 process_upper = (upper_special && (i & 1) == 0);
1718 process_lower = (lower_special && (i & 1) == 1);
1720 if (process_upper || process_lower) {
1721 if ((process_upper && upper == 0)
1722 || (process_lower && lower == 0))
1724 else if ((process_upper && upper == 0xffffffff)
1725 || (process_lower && lower == 0xffffffff))
1727 else if ((process_upper && upper == 0x80000000)
1728 || (process_lower && lower == 0x80000000))
1729 val |= (j == 0 ? 0xe0 : 0x80);
1731 val |= i * 4 + j + ((i & 1) * 16);
1734 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1737 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1738 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1739 &ShufBytes[0], ShufBytes.size()));
1743 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1744 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1745 /// permutation vector, V3, is monotonically increasing with one "exception"
1746 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1747 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1748 /// In either case, the net result is going to eventually invoke SHUFB to
1749 /// permute/shuffle the bytes from V1 and V2.
1751 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1752 /// control word for byte/halfword/word insertion. This takes care of a single
1753 /// element move from V2 into V1.
1755 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1756 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1757 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1758 SDValue V1 = Op.getOperand(0);
1759 SDValue V2 = Op.getOperand(1);
1760 DebugLoc dl = Op.getDebugLoc();
1762 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1764 // If we have a single element being moved from V1 to V2, this can be handled
1765 // using the C*[DX] compute mask instructions, but the vector elements have
1766 // to be monotonically increasing with one exception element.
1767 MVT VecVT = V1.getValueType();
1768 MVT EltVT = VecVT.getVectorElementType();
1769 unsigned EltsFromV2 = 0;
1771 unsigned V2EltIdx0 = 0;
1772 unsigned CurrElt = 0;
1773 unsigned MaxElts = VecVT.getVectorNumElements();
1774 unsigned PrevElt = 0;
1776 bool monotonic = true;
1779 if (EltVT == MVT::i8) {
1781 } else if (EltVT == MVT::i16) {
1783 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1785 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1788 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1790 for (unsigned i = 0; i != MaxElts; ++i) {
1791 if (SVN->getMaskElt(i) < 0)
1794 unsigned SrcElt = SVN->getMaskElt(i);
1797 if (SrcElt >= V2EltIdx0) {
1798 if (1 >= (++EltsFromV2)) {
1799 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1801 } else if (CurrElt != SrcElt) {
1809 if (PrevElt > 0 && SrcElt < MaxElts) {
1810 if ((PrevElt == SrcElt - 1)
1811 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1818 } else if (PrevElt == 0) {
1819 // First time through, need to keep track of previous element
1822 // This isn't a rotation, takes elements from vector 2
1828 if (EltsFromV2 == 1 && monotonic) {
1829 // Compute mask and shuffle
1830 MachineFunction &MF = DAG.getMachineFunction();
1831 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1832 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1833 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1834 // Initialize temporary register to 0
1835 SDValue InitTempReg =
1836 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1837 // Copy register's contents as index in SHUFFLE_MASK:
1838 SDValue ShufMaskOp =
1839 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1840 DAG.getTargetConstant(V2Elt, MVT::i32),
1841 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1842 // Use shuffle mask in SHUFB synthetic instruction:
1843 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1845 } else if (rotate) {
1846 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1848 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1849 V1, DAG.getConstant(rotamt, MVT::i16));
1851 // Convert the SHUFFLE_VECTOR mask's input element units to the
1853 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1855 SmallVector<SDValue, 16> ResultMask;
1856 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1857 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1859 for (unsigned j = 0; j < BytesPerElement; ++j)
1860 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1863 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1864 &ResultMask[0], ResultMask.size());
1865 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1869 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1870 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1871 DebugLoc dl = Op.getDebugLoc();
1873 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1874 // For a constant, build the appropriate constant vector, which will
1875 // eventually simplify to a vector register load.
1877 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1878 SmallVector<SDValue, 16> ConstVecValues;
1882 // Create a constant vector:
1883 switch (Op.getValueType().getSimpleVT()) {
1884 default: llvm_unreachable("Unexpected constant value type in "
1885 "LowerSCALAR_TO_VECTOR");
1886 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1887 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1888 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1889 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1890 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1891 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1894 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1895 for (size_t j = 0; j < n_copies; ++j)
1896 ConstVecValues.push_back(CValue);
1898 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1899 &ConstVecValues[0], ConstVecValues.size());
1901 // Otherwise, copy the value from one register to another:
1902 switch (Op0.getValueType().getSimpleVT()) {
1903 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1910 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1917 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1918 MVT VT = Op.getValueType();
1919 SDValue N = Op.getOperand(0);
1920 SDValue Elt = Op.getOperand(1);
1921 DebugLoc dl = Op.getDebugLoc();
1924 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1925 // Constant argument:
1926 int EltNo = (int) C->getZExtValue();
1929 if (VT == MVT::i8 && EltNo >= 16)
1930 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1931 else if (VT == MVT::i16 && EltNo >= 8)
1932 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1933 else if (VT == MVT::i32 && EltNo >= 4)
1934 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1935 else if (VT == MVT::i64 && EltNo >= 2)
1936 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1938 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1939 // i32 and i64: Element 0 is the preferred slot
1940 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1943 // Need to generate shuffle mask and extract:
1944 int prefslot_begin = -1, prefslot_end = -1;
1945 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1947 switch (VT.getSimpleVT()) {
1949 assert(false && "Invalid value type!");
1951 prefslot_begin = prefslot_end = 3;
1955 prefslot_begin = 2; prefslot_end = 3;
1960 prefslot_begin = 0; prefslot_end = 3;
1965 prefslot_begin = 0; prefslot_end = 7;
1970 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1971 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1973 unsigned int ShufBytes[16];
1974 for (int i = 0; i < 16; ++i) {
1975 // zero fill uppper part of preferred slot, don't care about the
1977 unsigned int mask_val;
1978 if (i <= prefslot_end) {
1980 ((i < prefslot_begin)
1982 : elt_byte + (i - prefslot_begin));
1984 ShufBytes[i] = mask_val;
1986 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1989 SDValue ShufMask[4];
1990 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1991 unsigned bidx = i * 4;
1992 unsigned int bits = ((ShufBytes[bidx] << 24) |
1993 (ShufBytes[bidx+1] << 16) |
1994 (ShufBytes[bidx+2] << 8) |
1996 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1999 SDValue ShufMaskVec =
2000 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2001 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2003 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2004 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2005 N, N, ShufMaskVec));
2007 // Variable index: Rotate the requested element into slot 0, then replicate
2008 // slot 0 across the vector
2009 MVT VecVT = N.getValueType();
2010 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2011 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2015 // Make life easier by making sure the index is zero-extended to i32
2016 if (Elt.getValueType() != MVT::i32)
2017 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2019 // Scale the index to a bit/byte shift quantity
2021 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2022 unsigned scaleShift = scaleFactor.logBase2();
2025 if (scaleShift > 0) {
2026 // Scale the shift factor:
2027 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2028 DAG.getConstant(scaleShift, MVT::i32));
2031 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2033 // Replicate the bytes starting at byte 0 across the entire vector (for
2034 // consistency with the notion of a unified register set)
2037 switch (VT.getSimpleVT()) {
2039 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2043 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2044 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2045 factor, factor, factor, factor);
2049 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2050 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2051 factor, factor, factor, factor);
2056 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2057 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2058 factor, factor, factor, factor);
2063 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2064 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2065 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2066 loFactor, hiFactor, loFactor, hiFactor);
2071 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2072 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2073 vecShift, vecShift, replicate));
2079 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2080 SDValue VecOp = Op.getOperand(0);
2081 SDValue ValOp = Op.getOperand(1);
2082 SDValue IdxOp = Op.getOperand(2);
2083 DebugLoc dl = Op.getDebugLoc();
2084 MVT VT = Op.getValueType();
2086 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2087 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2089 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2090 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2091 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2092 DAG.getRegister(SPU::R1, PtrVT),
2093 DAG.getConstant(CN->getSExtValue(), PtrVT));
2094 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2097 DAG.getNode(SPUISD::SHUFB, dl, VT,
2098 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2100 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2105 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2106 const TargetLowering &TLI)
2108 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2109 DebugLoc dl = Op.getDebugLoc();
2110 MVT ShiftVT = TLI.getShiftAmountTy();
2112 assert(Op.getValueType() == MVT::i8);
2115 llvm_unreachable("Unhandled i8 math operator");
2119 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2121 SDValue N1 = Op.getOperand(1);
2122 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2123 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2124 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2125 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2130 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2132 SDValue N1 = Op.getOperand(1);
2133 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2134 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2135 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2136 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2140 SDValue N1 = Op.getOperand(1);
2141 MVT N1VT = N1.getValueType();
2143 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2144 if (!N1VT.bitsEq(ShiftVT)) {
2145 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2148 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2151 // Replicate lower 8-bits into upper 8:
2153 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2154 DAG.getNode(ISD::SHL, dl, MVT::i16,
2155 N0, DAG.getConstant(8, MVT::i32)));
2157 // Truncate back down to i8
2158 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2159 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2163 SDValue N1 = Op.getOperand(1);
2164 MVT N1VT = N1.getValueType();
2166 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2167 if (!N1VT.bitsEq(ShiftVT)) {
2168 unsigned N1Opc = ISD::ZERO_EXTEND;
2170 if (N1.getValueType().bitsGT(ShiftVT))
2171 N1Opc = ISD::TRUNCATE;
2173 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2176 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2177 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2180 SDValue N1 = Op.getOperand(1);
2181 MVT N1VT = N1.getValueType();
2183 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2184 if (!N1VT.bitsEq(ShiftVT)) {
2185 unsigned N1Opc = ISD::SIGN_EXTEND;
2187 if (N1VT.bitsGT(ShiftVT))
2188 N1Opc = ISD::TRUNCATE;
2189 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2192 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2193 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2196 SDValue N1 = Op.getOperand(1);
2198 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2199 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2200 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2201 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2209 //! Lower byte immediate operations for v16i8 vectors:
2211 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2214 MVT VT = Op.getValueType();
2215 DebugLoc dl = Op.getDebugLoc();
2217 ConstVec = Op.getOperand(0);
2218 Arg = Op.getOperand(1);
2219 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2220 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2221 ConstVec = ConstVec.getOperand(0);
2223 ConstVec = Op.getOperand(1);
2224 Arg = Op.getOperand(0);
2225 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2226 ConstVec = ConstVec.getOperand(0);
2231 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2232 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2233 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2235 APInt APSplatBits, APSplatUndef;
2236 unsigned SplatBitSize;
2238 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2240 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2241 HasAnyUndefs, minSplatBits)
2242 && minSplatBits <= SplatBitSize) {
2243 uint64_t SplatBits = APSplatBits.getZExtValue();
2244 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2246 SmallVector<SDValue, 16> tcVec;
2247 tcVec.assign(16, tc);
2248 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2249 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2253 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2254 // lowered. Return the operation, rather than a null SDValue.
2258 //! Custom lowering for CTPOP (count population)
2260 Custom lowering code that counts the number ones in the input
2261 operand. SPU has such an instruction, but it counts the number of
2262 ones per byte, which then have to be accumulated.
2264 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2265 MVT VT = Op.getValueType();
2266 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2267 DebugLoc dl = Op.getDebugLoc();
2269 switch (VT.getSimpleVT()) {
2271 assert(false && "Invalid value type!");
2273 SDValue N = Op.getOperand(0);
2274 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2276 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2277 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2279 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2283 MachineFunction &MF = DAG.getMachineFunction();
2284 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2286 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2288 SDValue N = Op.getOperand(0);
2289 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2290 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2291 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2293 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2294 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2296 // CNTB_result becomes the chain to which all of the virtual registers
2297 // CNTB_reg, SUM1_reg become associated:
2298 SDValue CNTB_result =
2299 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2301 SDValue CNTB_rescopy =
2302 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2304 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2306 return DAG.getNode(ISD::AND, dl, MVT::i16,
2307 DAG.getNode(ISD::ADD, dl, MVT::i16,
2308 DAG.getNode(ISD::SRL, dl, MVT::i16,
2315 MachineFunction &MF = DAG.getMachineFunction();
2316 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2318 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2319 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2321 SDValue N = Op.getOperand(0);
2322 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2323 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2324 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2325 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2327 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2328 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2330 // CNTB_result becomes the chain to which all of the virtual registers
2331 // CNTB_reg, SUM1_reg become associated:
2332 SDValue CNTB_result =
2333 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2335 SDValue CNTB_rescopy =
2336 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2339 DAG.getNode(ISD::SRL, dl, MVT::i32,
2340 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2344 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2345 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2347 SDValue Sum1_rescopy =
2348 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2351 DAG.getNode(ISD::SRL, dl, MVT::i32,
2352 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2355 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2356 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2358 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2368 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2370 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2371 All conversions to i64 are expanded to a libcall.
2373 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2374 SPUTargetLowering &TLI) {
2375 MVT OpVT = Op.getValueType();
2376 SDValue Op0 = Op.getOperand(0);
2377 MVT Op0VT = Op0.getValueType();
2379 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2380 || OpVT == MVT::i64) {
2381 // Convert f32 / f64 to i32 / i64 via libcall.
2383 (Op.getOpcode() == ISD::FP_TO_SINT)
2384 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2385 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2386 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2388 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2394 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2396 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2397 All conversions from i64 are expanded to a libcall.
2399 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2400 SPUTargetLowering &TLI) {
2401 MVT OpVT = Op.getValueType();
2402 SDValue Op0 = Op.getOperand(0);
2403 MVT Op0VT = Op0.getValueType();
2405 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2406 || Op0VT == MVT::i64) {
2407 // Convert i32, i64 to f64 via libcall:
2409 (Op.getOpcode() == ISD::SINT_TO_FP)
2410 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2411 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2412 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2414 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2420 //! Lower ISD::SETCC
2422 This handles MVT::f64 (double floating point) condition lowering
2424 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2425 const TargetLowering &TLI) {
2426 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2427 DebugLoc dl = Op.getDebugLoc();
2428 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2430 SDValue lhs = Op.getOperand(0);
2431 SDValue rhs = Op.getOperand(1);
2432 MVT lhsVT = lhs.getValueType();
2433 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2435 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2436 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2437 MVT IntVT(MVT::i64);
2439 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2440 // selected to a NOP:
2441 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2443 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2444 DAG.getNode(ISD::SRL, dl, IntVT,
2445 i64lhs, DAG.getConstant(32, MVT::i32)));
2446 SDValue lhsHi32abs =
2447 DAG.getNode(ISD::AND, dl, MVT::i32,
2448 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2450 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2452 // SETO and SETUO only use the lhs operand:
2453 if (CC->get() == ISD::SETO) {
2454 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2456 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2457 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2458 DAG.getSetCC(dl, ccResultVT,
2459 lhs, DAG.getConstantFP(0.0, lhsVT),
2461 DAG.getConstant(ccResultAllOnes, ccResultVT));
2462 } else if (CC->get() == ISD::SETUO) {
2463 // Evaluates to true if Op0 is [SQ]NaN
2464 return DAG.getNode(ISD::AND, dl, ccResultVT,
2465 DAG.getSetCC(dl, ccResultVT,
2467 DAG.getConstant(0x7ff00000, MVT::i32),
2469 DAG.getSetCC(dl, ccResultVT,
2471 DAG.getConstant(0, MVT::i32),
2475 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2477 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2478 DAG.getNode(ISD::SRL, dl, IntVT,
2479 i64rhs, DAG.getConstant(32, MVT::i32)));
2481 // If a value is negative, subtract from the sign magnitude constant:
2482 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2484 // Convert the sign-magnitude representation into 2's complement:
2485 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2486 lhsHi32, DAG.getConstant(31, MVT::i32));
2487 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2489 DAG.getNode(ISD::SELECT, dl, IntVT,
2490 lhsSelectMask, lhsSignMag2TC, i64lhs);
2492 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2493 rhsHi32, DAG.getConstant(31, MVT::i32));
2494 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2496 DAG.getNode(ISD::SELECT, dl, IntVT,
2497 rhsSelectMask, rhsSignMag2TC, i64rhs);
2501 switch (CC->get()) {
2504 compareOp = ISD::SETEQ; break;
2507 compareOp = ISD::SETGT; break;
2510 compareOp = ISD::SETGE; break;
2513 compareOp = ISD::SETLT; break;
2516 compareOp = ISD::SETLE; break;
2519 compareOp = ISD::SETNE; break;
2521 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2525 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2526 (ISD::CondCode) compareOp);
2528 if ((CC->get() & 0x8) == 0) {
2529 // Ordered comparison:
2530 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2531 lhs, DAG.getConstantFP(0.0, MVT::f64),
2533 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2534 rhs, DAG.getConstantFP(0.0, MVT::f64),
2536 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2538 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2544 //! Lower ISD::SELECT_CC
2546 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2549 \note Need to revisit this in the future: if the code path through the true
2550 and false value computations is longer than the latency of a branch (6
2551 cycles), then it would be more advantageous to branch and insert a new basic
2552 block and branch on the condition. However, this code does not make that
2553 assumption, given the simplisitc uses so far.
2556 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2557 const TargetLowering &TLI) {
2558 MVT VT = Op.getValueType();
2559 SDValue lhs = Op.getOperand(0);
2560 SDValue rhs = Op.getOperand(1);
2561 SDValue trueval = Op.getOperand(2);
2562 SDValue falseval = Op.getOperand(3);
2563 SDValue condition = Op.getOperand(4);
2564 DebugLoc dl = Op.getDebugLoc();
2566 // NOTE: SELB's arguments: $rA, $rB, $mask
2568 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2569 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2570 // condition was true and 0s where the condition was false. Hence, the
2571 // arguments to SELB get reversed.
2573 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2574 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2575 // with another "cannot select select_cc" assert:
2577 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2578 TLI.getSetCCResultType(Op.getValueType()),
2579 lhs, rhs, condition);
2580 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2583 //! Custom lower ISD::TRUNCATE
2584 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2586 // Type to truncate to
2587 MVT VT = Op.getValueType();
2588 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2589 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2590 DebugLoc dl = Op.getDebugLoc();
2592 // Type to truncate from
2593 SDValue Op0 = Op.getOperand(0);
2594 MVT Op0VT = Op0.getValueType();
2596 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2597 // Create shuffle mask, least significant doubleword of quadword
2598 unsigned maskHigh = 0x08090a0b;
2599 unsigned maskLow = 0x0c0d0e0f;
2600 // Use a shuffle to perform the truncation
2601 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2602 DAG.getConstant(maskHigh, MVT::i32),
2603 DAG.getConstant(maskLow, MVT::i32),
2604 DAG.getConstant(maskHigh, MVT::i32),
2605 DAG.getConstant(maskLow, MVT::i32));
2607 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2608 Op0, Op0, shufMask);
2610 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2613 return SDValue(); // Leave the truncate unmolested
2616 //! Custom (target-specific) lowering entry point
2618 This is where LLVM's DAG selection process calls to do target-specific
2622 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2624 unsigned Opc = (unsigned) Op.getOpcode();
2625 MVT VT = Op.getValueType();
2630 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2631 cerr << "Op.getOpcode() = " << Opc << "\n";
2632 cerr << "*Op.getNode():\n";
2633 Op.getNode()->dump();
2635 llvm_unreachable(0);
2641 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2643 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2644 case ISD::ConstantPool:
2645 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2646 case ISD::GlobalAddress:
2647 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2648 case ISD::JumpTable:
2649 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2650 case ISD::ConstantFP:
2651 return LowerConstantFP(Op, DAG);
2652 case ISD::FORMAL_ARGUMENTS:
2653 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2655 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2657 return LowerRET(Op, DAG, getTargetMachine());
2659 // i8, i64 math ops:
2668 return LowerI8Math(Op, DAG, Opc, *this);
2672 case ISD::FP_TO_SINT:
2673 case ISD::FP_TO_UINT:
2674 return LowerFP_TO_INT(Op, DAG, *this);
2676 case ISD::SINT_TO_FP:
2677 case ISD::UINT_TO_FP:
2678 return LowerINT_TO_FP(Op, DAG, *this);
2680 // Vector-related lowering.
2681 case ISD::BUILD_VECTOR:
2682 return LowerBUILD_VECTOR(Op, DAG);
2683 case ISD::SCALAR_TO_VECTOR:
2684 return LowerSCALAR_TO_VECTOR(Op, DAG);
2685 case ISD::VECTOR_SHUFFLE:
2686 return LowerVECTOR_SHUFFLE(Op, DAG);
2687 case ISD::EXTRACT_VECTOR_ELT:
2688 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2689 case ISD::INSERT_VECTOR_ELT:
2690 return LowerINSERT_VECTOR_ELT(Op, DAG);
2692 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2696 return LowerByteImmed(Op, DAG);
2698 // Vector and i8 multiply:
2701 return LowerI8Math(Op, DAG, Opc, *this);
2704 return LowerCTPOP(Op, DAG);
2706 case ISD::SELECT_CC:
2707 return LowerSELECT_CC(Op, DAG, *this);
2710 return LowerSETCC(Op, DAG, *this);
2713 return LowerTRUNCATE(Op, DAG);
2719 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2720 SmallVectorImpl<SDValue>&Results,
2724 unsigned Opc = (unsigned) N->getOpcode();
2725 MVT OpVT = N->getValueType(0);
2729 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2730 cerr << "Op.getOpcode() = " << Opc << "\n";
2731 cerr << "*Op.getNode():\n";
2739 /* Otherwise, return unchanged */
2742 //===----------------------------------------------------------------------===//
2743 // Target Optimization Hooks
2744 //===----------------------------------------------------------------------===//
2747 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2750 TargetMachine &TM = getTargetMachine();
2752 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2753 SelectionDAG &DAG = DCI.DAG;
2754 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2755 MVT NodeVT = N->getValueType(0); // The node's value type
2756 MVT Op0VT = Op0.getValueType(); // The first operand's result
2757 SDValue Result; // Initially, empty result
2758 DebugLoc dl = N->getDebugLoc();
2760 switch (N->getOpcode()) {
2763 SDValue Op1 = N->getOperand(1);
2765 if (Op0.getOpcode() == SPUISD::IndirectAddr
2766 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2767 // Normalize the operands to reduce repeated code
2768 SDValue IndirectArg = Op0, AddArg = Op1;
2770 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2775 if (isa<ConstantSDNode>(AddArg)) {
2776 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2777 SDValue IndOp1 = IndirectArg.getOperand(1);
2779 if (CN0->isNullValue()) {
2780 // (add (SPUindirect <arg>, <arg>), 0) ->
2781 // (SPUindirect <arg>, <arg>)
2783 #if !defined(NDEBUG)
2784 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2786 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2787 << "With: (SPUindirect <arg>, <arg>)\n";
2792 } else if (isa<ConstantSDNode>(IndOp1)) {
2793 // (add (SPUindirect <arg>, <const>), <const>) ->
2794 // (SPUindirect <arg>, <const + const>)
2795 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2796 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2797 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2799 #if !defined(NDEBUG)
2800 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2802 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2803 << "), " << CN0->getSExtValue() << ")\n"
2804 << "With: (SPUindirect <arg>, "
2805 << combinedConst << ")\n";
2809 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2810 IndirectArg, combinedValue);
2816 case ISD::SIGN_EXTEND:
2817 case ISD::ZERO_EXTEND:
2818 case ISD::ANY_EXTEND: {
2819 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2820 // (any_extend (SPUextract_elt0 <arg>)) ->
2821 // (SPUextract_elt0 <arg>)
2822 // Types must match, however...
2823 #if !defined(NDEBUG)
2824 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2825 cerr << "\nReplace: ";
2828 Op0.getNode()->dump(&DAG);
2837 case SPUISD::IndirectAddr: {
2838 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2839 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2840 if (CN != 0 && CN->getZExtValue() == 0) {
2841 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2842 // (SPUaform <addr>, 0)
2844 DEBUG(cerr << "Replace: ");
2845 DEBUG(N->dump(&DAG));
2846 DEBUG(cerr << "\nWith: ");
2847 DEBUG(Op0.getNode()->dump(&DAG));
2848 DEBUG(cerr << "\n");
2852 } else if (Op0.getOpcode() == ISD::ADD) {
2853 SDValue Op1 = N->getOperand(1);
2854 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2855 // (SPUindirect (add <arg>, <arg>), 0) ->
2856 // (SPUindirect <arg>, <arg>)
2857 if (CN1->isNullValue()) {
2859 #if !defined(NDEBUG)
2860 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2862 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2863 << "With: (SPUindirect <arg>, <arg>)\n";
2867 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2868 Op0.getOperand(0), Op0.getOperand(1));
2874 case SPUISD::SHLQUAD_L_BITS:
2875 case SPUISD::SHLQUAD_L_BYTES:
2876 case SPUISD::VEC_SHL:
2877 case SPUISD::VEC_SRL:
2878 case SPUISD::VEC_SRA:
2879 case SPUISD::ROTBYTES_LEFT: {
2880 SDValue Op1 = N->getOperand(1);
2882 // Kill degenerate vector shifts:
2883 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2884 if (CN->isNullValue()) {
2890 case SPUISD::PREFSLOT2VEC: {
2891 switch (Op0.getOpcode()) {
2894 case ISD::ANY_EXTEND:
2895 case ISD::ZERO_EXTEND:
2896 case ISD::SIGN_EXTEND: {
2897 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2899 // but only if the SPUprefslot2vec and <arg> types match.
2900 SDValue Op00 = Op0.getOperand(0);
2901 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2902 SDValue Op000 = Op00.getOperand(0);
2903 if (Op000.getValueType() == NodeVT) {
2909 case SPUISD::VEC2PREFSLOT: {
2910 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2912 Result = Op0.getOperand(0);
2920 // Otherwise, return unchanged.
2922 if (Result.getNode()) {
2923 DEBUG(cerr << "\nReplace.SPU: ");
2924 DEBUG(N->dump(&DAG));
2925 DEBUG(cerr << "\nWith: ");
2926 DEBUG(Result.getNode()->dump(&DAG));
2927 DEBUG(cerr << "\n");
2934 //===----------------------------------------------------------------------===//
2935 // Inline Assembly Support
2936 //===----------------------------------------------------------------------===//
2938 /// getConstraintType - Given a constraint letter, return the type of
2939 /// constraint it is for this target.
2940 SPUTargetLowering::ConstraintType
2941 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2942 if (ConstraintLetter.size() == 1) {
2943 switch (ConstraintLetter[0]) {
2950 return C_RegisterClass;
2953 return TargetLowering::getConstraintType(ConstraintLetter);
2956 std::pair<unsigned, const TargetRegisterClass*>
2957 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2960 if (Constraint.size() == 1) {
2961 // GCC RS6000 Constraint Letters
2962 switch (Constraint[0]) {
2966 return std::make_pair(0U, SPU::R64CRegisterClass);
2967 return std::make_pair(0U, SPU::R32CRegisterClass);
2970 return std::make_pair(0U, SPU::R32FPRegisterClass);
2971 else if (VT == MVT::f64)
2972 return std::make_pair(0U, SPU::R64FPRegisterClass);
2975 return std::make_pair(0U, SPU::GPRCRegisterClass);
2979 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2982 //! Compute used/known bits for a SPU operand
2984 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2988 const SelectionDAG &DAG,
2989 unsigned Depth ) const {
2991 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2993 switch (Op.getOpcode()) {
2995 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3001 case SPUISD::PREFSLOT2VEC:
3002 case SPUISD::LDRESULT:
3003 case SPUISD::VEC2PREFSLOT:
3004 case SPUISD::SHLQUAD_L_BITS:
3005 case SPUISD::SHLQUAD_L_BYTES:
3006 case SPUISD::VEC_SHL:
3007 case SPUISD::VEC_SRL:
3008 case SPUISD::VEC_SRA:
3009 case SPUISD::VEC_ROTL:
3010 case SPUISD::VEC_ROTR:
3011 case SPUISD::ROTBYTES_LEFT:
3012 case SPUISD::SELECT_MASK:
3019 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3020 unsigned Depth) const {
3021 switch (Op.getOpcode()) {
3026 MVT VT = Op.getValueType();
3028 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3031 return VT.getSizeInBits();
3036 // LowerAsmOperandForConstraint
3038 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3039 char ConstraintLetter,
3041 std::vector<SDValue> &Ops,
3042 SelectionDAG &DAG) const {
3043 // Default, for the time being, to the base class handler
3044 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3048 /// isLegalAddressImmediate - Return true if the integer value can be used
3049 /// as the offset of the target addressing mode.
3050 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3051 const Type *Ty) const {
3052 // SPU's addresses are 256K:
3053 return (V > -(1 << 18) && V < (1 << 18) - 1);
3056 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3061 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3062 // The SPU target isn't yet aware of offsets.