2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! EVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 EVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
116 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
117 std::pair<SDValue, SDValue> CallInfo =
118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119 0, TLI.getLibcallCallingConv(LC), false,
120 /*isReturnValueUsed=*/true,
121 Callee, Args, DAG, Op.getDebugLoc(),
122 DAG.GetOrdering(InChain.getNode()));
124 return CallInfo.first;
128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
129 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
131 // Fold away setcc operations if possible.
134 // Use _setjmp/_longjmp instead of setjmp/longjmp.
135 setUseUnderscoreSetJmp(true);
136 setUseUnderscoreLongJmp(true);
138 // Set RTLIB libcall names as used by SPU:
139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
141 // Set up the SPU's register classes:
142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
150 // SPU has no sign or zero extended loads for i1, i8, i16:
151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
163 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
165 // SPU constant load actions are custom lowered:
166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
169 // SPU's loads and stores have to be custom lowered:
170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
172 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
174 setOperationAction(ISD::LOAD, VT, Custom);
175 setOperationAction(ISD::STORE, VT, Custom);
176 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
182 setTruncStoreAction(VT, StoreVT, Expand);
186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
188 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
190 setOperationAction(ISD::LOAD, VT, Custom);
191 setOperationAction(ISD::STORE, VT, Custom);
193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
195 setTruncStoreAction(VT, StoreVT, Expand);
199 // Expand the jumptable branches
200 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
201 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
203 // Custom lower SELECT_CC for most cases, but expand by default
204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
210 // SPU has no intrinsics for these particular operations:
211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
213 // SPU has no division/remainder instructions
214 setOperationAction(ISD::SREM, MVT::i8, Expand);
215 setOperationAction(ISD::UREM, MVT::i8, Expand);
216 setOperationAction(ISD::SDIV, MVT::i8, Expand);
217 setOperationAction(ISD::UDIV, MVT::i8, Expand);
218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
220 setOperationAction(ISD::SREM, MVT::i16, Expand);
221 setOperationAction(ISD::UREM, MVT::i16, Expand);
222 setOperationAction(ISD::SDIV, MVT::i16, Expand);
223 setOperationAction(ISD::UDIV, MVT::i16, Expand);
224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
226 setOperationAction(ISD::SREM, MVT::i32, Expand);
227 setOperationAction(ISD::UREM, MVT::i32, Expand);
228 setOperationAction(ISD::SDIV, MVT::i32, Expand);
229 setOperationAction(ISD::UDIV, MVT::i32, Expand);
230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
232 setOperationAction(ISD::SREM, MVT::i64, Expand);
233 setOperationAction(ISD::UREM, MVT::i64, Expand);
234 setOperationAction(ISD::SDIV, MVT::i64, Expand);
235 setOperationAction(ISD::UDIV, MVT::i64, Expand);
236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
238 setOperationAction(ISD::SREM, MVT::i128, Expand);
239 setOperationAction(ISD::UREM, MVT::i128, Expand);
240 setOperationAction(ISD::SDIV, MVT::i128, Expand);
241 setOperationAction(ISD::UDIV, MVT::i128, Expand);
242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
245 // We don't support sin/cos/sqrt/fmod
246 setOperationAction(ISD::FSIN , MVT::f64, Expand);
247 setOperationAction(ISD::FCOS , MVT::f64, Expand);
248 setOperationAction(ISD::FREM , MVT::f64, Expand);
249 setOperationAction(ISD::FSIN , MVT::f32, Expand);
250 setOperationAction(ISD::FCOS , MVT::f32, Expand);
251 setOperationAction(ISD::FREM , MVT::f32, Expand);
253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
255 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
261 // SPU can do rotate right and left, so legalize it... but customize for i8
262 // because instructions don't exist.
264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
270 setOperationAction(ISD::ROTL, MVT::i32, Legal);
271 setOperationAction(ISD::ROTL, MVT::i16, Legal);
272 setOperationAction(ISD::ROTL, MVT::i8, Custom);
274 // SPU has no native version of shift left/right for i8
275 setOperationAction(ISD::SHL, MVT::i8, Custom);
276 setOperationAction(ISD::SRL, MVT::i8, Custom);
277 setOperationAction(ISD::SRA, MVT::i8, Custom);
279 // Make these operations legal and handle them during instruction selection:
280 setOperationAction(ISD::SHL, MVT::i64, Legal);
281 setOperationAction(ISD::SRL, MVT::i64, Legal);
282 setOperationAction(ISD::SRA, MVT::i64, Legal);
284 // Custom lower i8, i32 and i64 multiplications
285 setOperationAction(ISD::MUL, MVT::i8, Custom);
286 setOperationAction(ISD::MUL, MVT::i32, Legal);
287 setOperationAction(ISD::MUL, MVT::i64, Legal);
289 // Expand double-width multiplication
290 // FIXME: It would probably be reasonable to support some of these operations
291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
293 setOperationAction(ISD::MULHU, MVT::i8, Expand);
294 setOperationAction(ISD::MULHS, MVT::i8, Expand);
295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297 setOperationAction(ISD::MULHU, MVT::i16, Expand);
298 setOperationAction(ISD::MULHS, MVT::i16, Expand);
299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301 setOperationAction(ISD::MULHU, MVT::i32, Expand);
302 setOperationAction(ISD::MULHS, MVT::i32, Expand);
303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305 setOperationAction(ISD::MULHU, MVT::i64, Expand);
306 setOperationAction(ISD::MULHS, MVT::i64, Expand);
308 // Need to custom handle (some) common i8, i64 math ops
309 setOperationAction(ISD::ADD, MVT::i8, Custom);
310 setOperationAction(ISD::ADD, MVT::i64, Legal);
311 setOperationAction(ISD::SUB, MVT::i8, Custom);
312 setOperationAction(ISD::SUB, MVT::i64, Legal);
314 // SPU does not have BSWAP. It does have i32 support CTLZ.
315 // CTPOP has to be custom lowered.
316 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
317 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
319 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
323 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
329 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
331 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
333 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
334 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
335 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
337 // SPU has a version of select that implements (a&~c)|(b&c), just like
338 // select ought to work:
339 setOperationAction(ISD::SELECT, MVT::i8, Legal);
340 setOperationAction(ISD::SELECT, MVT::i16, Legal);
341 setOperationAction(ISD::SELECT, MVT::i32, Legal);
342 setOperationAction(ISD::SELECT, MVT::i64, Legal);
344 setOperationAction(ISD::SETCC, MVT::i8, Legal);
345 setOperationAction(ISD::SETCC, MVT::i16, Legal);
346 setOperationAction(ISD::SETCC, MVT::i32, Legal);
347 setOperationAction(ISD::SETCC, MVT::i64, Legal);
348 setOperationAction(ISD::SETCC, MVT::f64, Custom);
350 // Custom lower i128 -> i64 truncates
351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
353 // Custom lower i32/i64 -> i128 sign extend
354 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
356 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
357 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
358 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
359 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
360 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
361 // to expand to a libcall, hence the custom lowering:
362 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
363 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
364 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
365 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
366 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
367 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
369 // FDIV on SPU requires custom lowering
370 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
372 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
373 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
374 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
375 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
376 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
378 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
379 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
380 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
382 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
383 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
384 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
385 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
387 // We cannot sextinreg(i1). Expand to shifts.
388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
390 // We want to legalize GlobalAddress and ConstantPool nodes into the
391 // appropriate instructions to materialize the address.
392 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
394 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
396 setOperationAction(ISD::GlobalAddress, VT, Custom);
397 setOperationAction(ISD::ConstantPool, VT, Custom);
398 setOperationAction(ISD::JumpTable, VT, Custom);
401 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
402 setOperationAction(ISD::VASTART , MVT::Other, Custom);
404 // Use the default implementation.
405 setOperationAction(ISD::VAARG , MVT::Other, Expand);
406 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
407 setOperationAction(ISD::VAEND , MVT::Other, Expand);
408 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
409 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
410 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
411 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
413 // Cell SPU has instructions for converting between i64 and fp.
414 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
415 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
417 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
418 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
420 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
421 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
423 // First set operation action for all vector types to expand. Then we
424 // will selectively turn on ones that can be effectively codegen'd.
425 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
426 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
427 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
428 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
429 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
432 // "Odd size" vector classes that we're willing to support:
433 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
435 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
436 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
437 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
439 // add/sub are legal for all supported vector VT's.
440 setOperationAction(ISD::ADD, VT, Legal);
441 setOperationAction(ISD::SUB, VT, Legal);
442 // mul has to be custom lowered.
443 setOperationAction(ISD::MUL, VT, Legal);
445 setOperationAction(ISD::AND, VT, Legal);
446 setOperationAction(ISD::OR, VT, Legal);
447 setOperationAction(ISD::XOR, VT, Legal);
448 setOperationAction(ISD::LOAD, VT, Legal);
449 setOperationAction(ISD::SELECT, VT, Legal);
450 setOperationAction(ISD::STORE, VT, Legal);
452 // These operations need to be expanded:
453 setOperationAction(ISD::SDIV, VT, Expand);
454 setOperationAction(ISD::SREM, VT, Expand);
455 setOperationAction(ISD::UDIV, VT, Expand);
456 setOperationAction(ISD::UREM, VT, Expand);
458 // Custom lower build_vector, constant pool spills, insert and
459 // extract vector elements:
460 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
461 setOperationAction(ISD::ConstantPool, VT, Custom);
462 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
463 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
464 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
465 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
468 setOperationAction(ISD::AND, MVT::v16i8, Custom);
469 setOperationAction(ISD::OR, MVT::v16i8, Custom);
470 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
471 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
473 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
475 setShiftAmountType(MVT::i32);
476 setBooleanContents(ZeroOrNegativeOneBooleanContent);
478 setStackPointerRegisterToSaveRestore(SPU::R1);
480 // We have target-specific dag combine patterns for the following nodes:
481 setTargetDAGCombine(ISD::ADD);
482 setTargetDAGCombine(ISD::ZERO_EXTEND);
483 setTargetDAGCombine(ISD::SIGN_EXTEND);
484 setTargetDAGCombine(ISD::ANY_EXTEND);
486 computeRegisterProperties();
488 // Set pre-RA register scheduler default to BURR, which produces slightly
489 // better code than the default (could also be TDRR, but TargetLowering.h
490 // needs a mod to support that model):
491 setSchedulingPreference(SchedulingForRegPressure);
495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
497 if (node_names.empty()) {
498 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
499 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
500 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
501 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
502 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
503 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
504 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
505 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
506 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
507 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
508 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
509 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
510 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
511 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
512 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
513 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
514 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
515 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
516 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
517 "SPUISD::ROTBYTES_LEFT_BITS";
518 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
519 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
520 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
521 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
522 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
525 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
527 return ((i != node_names.end()) ? i->second : 0);
530 /// getFunctionAlignment - Return the Log2 alignment of this function.
531 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
535 //===----------------------------------------------------------------------===//
536 // Return the Cell SPU's SETCC result type
537 //===----------------------------------------------------------------------===//
539 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
540 // i16 and i32 are valid SETCC result types
541 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
542 VT.getSimpleVT().SimpleTy :
546 //===----------------------------------------------------------------------===//
547 // Calling convention code:
548 //===----------------------------------------------------------------------===//
550 #include "SPUGenCallingConv.inc"
552 //===----------------------------------------------------------------------===//
553 // LowerOperation implementation
554 //===----------------------------------------------------------------------===//
556 /// Custom lower loads for CellSPU
558 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
559 within a 16-byte block, we have to rotate to extract the requested element.
561 For extending loads, we also want to ensure that the following sequence is
562 emitted, e.g. for MVT::f32 extending load to MVT::f64:
566 %2 v16i8,ch = rotate %1
567 %3 v4f8, ch = bitconvert %2
568 %4 f32 = vec2perfslot %3
569 %5 f64 = fp_extend %4
573 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
574 LoadSDNode *LN = cast<LoadSDNode>(Op);
575 SDValue the_chain = LN->getChain();
576 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
577 EVT InVT = LN->getMemoryVT();
578 EVT OutVT = Op.getValueType();
579 ISD::LoadExtType ExtType = LN->getExtensionType();
580 unsigned alignment = LN->getAlignment();
581 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
582 DebugLoc dl = Op.getDebugLoc();
584 switch (LN->getAddressingMode()) {
585 case ISD::UNINDEXED: {
587 SDValue basePtr = LN->getBasePtr();
590 if (alignment == 16) {
593 // Special cases for a known aligned load to simplify the base pointer
594 // and the rotation amount:
595 if (basePtr.getOpcode() == ISD::ADD
596 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
597 // Known offset into basePtr
598 int64_t offset = CN->getSExtValue();
599 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
604 rotate = DAG.getConstant(rotamt, MVT::i16);
606 // Simplify the base pointer for this case:
607 basePtr = basePtr.getOperand(0);
608 if ((offset & ~0xf) > 0) {
609 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
611 DAG.getConstant((offset & ~0xf), PtrVT));
613 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
614 || (basePtr.getOpcode() == SPUISD::IndirectAddr
615 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
616 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
617 // Plain aligned a-form address: rotate into preferred slot
618 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
619 int64_t rotamt = -vtm->prefslot_byte;
622 rotate = DAG.getConstant(rotamt, MVT::i16);
624 // Offset the rotate amount by the basePtr and the preferred slot
626 int64_t rotamt = -vtm->prefslot_byte;
629 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
631 DAG.getConstant(rotamt, PtrVT));
634 // Unaligned load: must be more pessimistic about addressing modes:
635 if (basePtr.getOpcode() == ISD::ADD) {
636 MachineFunction &MF = DAG.getMachineFunction();
637 MachineRegisterInfo &RegInfo = MF.getRegInfo();
638 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
641 SDValue Op0 = basePtr.getOperand(0);
642 SDValue Op1 = basePtr.getOperand(1);
644 if (isa<ConstantSDNode>(Op1)) {
645 // Convert the (add <ptr>, <const>) to an indirect address contained
646 // in a register. Note that this is done because we need to avoid
647 // creating a 0(reg) d-form address due to the SPU's block loads.
648 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
649 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
650 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
652 // Convert the (add <arg1>, <arg2>) to an indirect address, which
653 // will likely be lowered as a reg(reg) x-form address.
654 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
657 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
659 DAG.getConstant(0, PtrVT));
662 // Offset the rotate amount by the basePtr and the preferred slot
664 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
666 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
669 // Re-emit as a v16i8 vector load
670 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
671 LN->getSrcValue(), LN->getSrcValueOffset(),
672 LN->isVolatile(), 16);
675 the_chain = result.getValue(1);
677 // Rotate into the preferred slot:
678 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
679 result.getValue(0), rotate);
681 // Convert the loaded v16i8 vector to the appropriate vector type
682 // specified by the operand:
683 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
684 InVT, (128 / InVT.getSizeInBits()));
685 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
686 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
688 // Handle extending loads by extending the scalar result:
689 if (ExtType == ISD::SEXTLOAD) {
690 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
691 } else if (ExtType == ISD::ZEXTLOAD) {
692 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
693 } else if (ExtType == ISD::EXTLOAD) {
694 unsigned NewOpc = ISD::ANY_EXTEND;
696 if (OutVT.isFloatingPoint())
697 NewOpc = ISD::FP_EXTEND;
699 result = DAG.getNode(NewOpc, dl, OutVT, result);
702 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
703 SDValue retops[2] = {
708 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
709 retops, sizeof(retops) / sizeof(retops[0]));
716 case ISD::LAST_INDEXED_MODE:
719 raw_string_ostream Msg(msg);
720 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
722 Msg << (unsigned) LN->getAddressingMode();
723 llvm_report_error(Msg.str());
731 /// Custom lower stores for CellSPU
733 All CellSPU stores are aligned to 16-byte boundaries, so for elements
734 within a 16-byte block, we have to generate a shuffle to insert the
735 requested element into its place, then store the resulting block.
738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
739 StoreSDNode *SN = cast<StoreSDNode>(Op);
740 SDValue Value = SN->getValue();
741 EVT VT = Value.getValueType();
742 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
743 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
744 DebugLoc dl = Op.getDebugLoc();
745 unsigned alignment = SN->getAlignment();
747 switch (SN->getAddressingMode()) {
748 case ISD::UNINDEXED: {
749 // The vector type we really want to load from the 16-byte chunk.
750 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
751 VT, (128 / VT.getSizeInBits())),
752 stVecVT = EVT::getVectorVT(*DAG.getContext(),
753 StVT, (128 / StVT.getSizeInBits()));
755 SDValue alignLoadVec;
756 SDValue basePtr = SN->getBasePtr();
757 SDValue the_chain = SN->getChain();
758 SDValue insertEltOffs;
760 if (alignment == 16) {
763 // Special cases for a known aligned load to simplify the base pointer
764 // and insertion byte:
765 if (basePtr.getOpcode() == ISD::ADD
766 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
767 // Known offset into basePtr
768 int64_t offset = CN->getSExtValue();
770 // Simplify the base pointer for this case:
771 basePtr = basePtr.getOperand(0);
772 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
774 DAG.getConstant((offset & 0xf), PtrVT));
776 if ((offset & ~0xf) > 0) {
777 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
779 DAG.getConstant((offset & ~0xf), PtrVT));
782 // Otherwise, assume it's at byte 0 of basePtr
783 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
785 DAG.getConstant(0, PtrVT));
788 // Unaligned load: must be more pessimistic about addressing modes:
789 if (basePtr.getOpcode() == ISD::ADD) {
790 MachineFunction &MF = DAG.getMachineFunction();
791 MachineRegisterInfo &RegInfo = MF.getRegInfo();
792 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
795 SDValue Op0 = basePtr.getOperand(0);
796 SDValue Op1 = basePtr.getOperand(1);
798 if (isa<ConstantSDNode>(Op1)) {
799 // Convert the (add <ptr>, <const>) to an indirect address contained
800 // in a register. Note that this is done because we need to avoid
801 // creating a 0(reg) d-form address due to the SPU's block loads.
802 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
803 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
804 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
806 // Convert the (add <arg1>, <arg2>) to an indirect address, which
807 // will likely be lowered as a reg(reg) x-form address.
808 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
811 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
816 // Insertion point is solely determined by basePtr's contents
817 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
819 DAG.getConstant(0, PtrVT));
822 // Re-emit as a v16i8 vector load
823 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
824 SN->getSrcValue(), SN->getSrcValueOffset(),
825 SN->isVolatile(), 16);
828 the_chain = alignLoadVec.getValue(1);
830 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
831 SDValue theValue = SN->getValue();
835 && (theValue.getOpcode() == ISD::AssertZext
836 || theValue.getOpcode() == ISD::AssertSext)) {
837 // Drill down and get the value for zero- and sign-extended
839 theValue = theValue.getOperand(0);
842 // If the base pointer is already a D-form address, then just create
843 // a new D-form address with a slot offset and the orignal base pointer.
844 // Otherwise generate a D-form address with the slot offset relative
845 // to the stack pointer, which is always aligned.
847 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
848 errs() << "CellSPU LowerSTORE: basePtr = ";
849 basePtr.getNode()->dump(&DAG);
854 SDValue insertEltOp =
855 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
856 SDValue vectorizeOp =
857 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
859 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
860 vectorizeOp, alignLoadVec,
861 DAG.getNode(ISD::BIT_CONVERT, dl,
862 MVT::v4i32, insertEltOp));
864 result = DAG.getStore(the_chain, dl, result, basePtr,
865 LN->getSrcValue(), LN->getSrcValueOffset(),
866 LN->isVolatile(), LN->getAlignment());
868 #if 0 && !defined(NDEBUG)
869 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
870 const SDValue ¤tRoot = DAG.getRoot();
873 errs() << "------- CellSPU:LowerStore result:\n";
875 errs() << "-------\n";
876 DAG.setRoot(currentRoot);
887 case ISD::LAST_INDEXED_MODE:
890 raw_string_ostream Msg(msg);
891 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
893 Msg << (unsigned) SN->getAddressingMode();
894 llvm_report_error(Msg.str());
902 //! Generate the address of a constant pool entry.
904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
905 EVT PtrVT = Op.getValueType();
906 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
907 Constant *C = CP->getConstVal();
908 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
909 SDValue Zero = DAG.getConstant(0, PtrVT);
910 const TargetMachine &TM = DAG.getTarget();
911 // FIXME there is no actual debug info here
912 DebugLoc dl = Op.getDebugLoc();
914 if (TM.getRelocationModel() == Reloc::Static) {
915 if (!ST->usingLargeMem()) {
916 // Just return the SDValue with the constant pool address in it.
917 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
919 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
920 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
921 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
925 llvm_unreachable("LowerConstantPool: Relocation model other than static"
930 //! Alternate entry point for generating the address of a constant pool entry
932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
933 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
938 EVT PtrVT = Op.getValueType();
939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
940 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
941 SDValue Zero = DAG.getConstant(0, PtrVT);
942 const TargetMachine &TM = DAG.getTarget();
943 // FIXME there is no actual debug info here
944 DebugLoc dl = Op.getDebugLoc();
946 if (TM.getRelocationModel() == Reloc::Static) {
947 if (!ST->usingLargeMem()) {
948 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
950 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
951 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
952 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
956 llvm_unreachable("LowerJumpTable: Relocation model other than static"
962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
963 EVT PtrVT = Op.getValueType();
964 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
965 GlobalValue *GV = GSDN->getGlobal();
966 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
967 const TargetMachine &TM = DAG.getTarget();
968 SDValue Zero = DAG.getConstant(0, PtrVT);
969 // FIXME there is no actual debug info here
970 DebugLoc dl = Op.getDebugLoc();
972 if (TM.getRelocationModel() == Reloc::Static) {
973 if (!ST->usingLargeMem()) {
974 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
976 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
977 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
978 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
981 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
989 //! Custom lower double precision floating point constants
991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
992 EVT VT = Op.getValueType();
993 // FIXME there is no actual debug info here
994 DebugLoc dl = Op.getDebugLoc();
996 if (VT == MVT::f64) {
997 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1000 "LowerConstantFP: Node is not ConstantFPSDNode");
1002 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003 SDValue T = DAG.getConstant(dbits, MVT::i64);
1004 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1005 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1013 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1014 CallingConv::ID CallConv, bool isVarArg,
1015 const SmallVectorImpl<ISD::InputArg>
1017 DebugLoc dl, SelectionDAG &DAG,
1018 SmallVectorImpl<SDValue> &InVals) {
1020 MachineFunction &MF = DAG.getMachineFunction();
1021 MachineFrameInfo *MFI = MF.getFrameInfo();
1022 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1024 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1025 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1027 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1028 unsigned ArgRegIdx = 0;
1029 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1031 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1033 // Add DAG nodes to load the arguments or copy them out of registers.
1034 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1035 EVT ObjectVT = Ins[ArgNo].VT;
1036 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1039 if (ArgRegIdx < NumArgRegs) {
1040 const TargetRegisterClass *ArgRegClass;
1042 switch (ObjectVT.getSimpleVT().SimpleTy) {
1045 raw_string_ostream Msg(msg);
1046 Msg << "LowerFormalArguments Unhandled argument type: "
1047 << ObjectVT.getEVTString();
1048 llvm_report_error(Msg.str());
1051 ArgRegClass = &SPU::R8CRegClass;
1054 ArgRegClass = &SPU::R16CRegClass;
1057 ArgRegClass = &SPU::R32CRegClass;
1060 ArgRegClass = &SPU::R64CRegClass;
1063 ArgRegClass = &SPU::GPRCRegClass;
1066 ArgRegClass = &SPU::R32FPRegClass;
1069 ArgRegClass = &SPU::R64FPRegClass;
1077 ArgRegClass = &SPU::VECREGRegClass;
1081 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1086 // We need to load the argument to a virtual register if we determined
1087 // above that we ran out of physical registers of the appropriate type
1088 // or we're forced to do vararg
1089 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1090 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1092 ArgOffset += StackSlotSize;
1095 InVals.push_back(ArgVal);
1097 Chain = ArgVal.getOperand(0);
1102 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103 // We will spill (79-3)+1 registers to the stack
1104 SmallVector<SDValue, 79-3+1> MemOps;
1106 // Create the frame slot
1108 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1111 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1112 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1113 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1114 Chain = Store.getOperand(0);
1115 MemOps.push_back(Store);
1117 // Increment address by stack slot size for the next stored argument
1118 ArgOffset += StackSlotSize;
1120 if (!MemOps.empty())
1121 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122 &MemOps[0], MemOps.size());
1128 /// isLSAAddress - Return the immediate to use if the specified
1129 /// value is representable as a LSA address.
1130 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1131 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1134 int Addr = C->getZExtValue();
1135 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1136 (Addr << 14 >> 14) != Addr)
1137 return 0; // Top 14 bits have to be sext of immediate.
1139 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1143 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1144 CallingConv::ID CallConv, bool isVarArg,
1146 const SmallVectorImpl<ISD::OutputArg> &Outs,
1147 const SmallVectorImpl<ISD::InputArg> &Ins,
1148 DebugLoc dl, SelectionDAG &DAG,
1149 SmallVectorImpl<SDValue> &InVals) {
1151 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1152 unsigned NumOps = Outs.size();
1153 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1157 // Handy pointer type
1158 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1160 // Accumulate how many bytes are to be pushed on the stack, including the
1161 // linkage area, and parameter passing area. According to the SPU ABI,
1162 // we minimally need space for [LR] and [SP]
1163 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1165 // Set up a copy of the stack pointer for use loading and storing any
1166 // arguments that may not fit in the registers available for argument
1168 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1170 // Figure out which arguments are going to go in registers, and which in
1172 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1173 unsigned ArgRegIdx = 0;
1175 // Keep track of registers passing arguments
1176 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1177 // And the arguments passed on the stack
1178 SmallVector<SDValue, 8> MemOpChains;
1180 for (unsigned i = 0; i != NumOps; ++i) {
1181 SDValue Arg = Outs[i].Val;
1183 // PtrOff will be used to store the current argument to the stack if a
1184 // register cannot be found for it.
1185 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1186 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1188 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1189 default: llvm_unreachable("Unexpected ValueType for argument!");
1195 if (ArgRegIdx != NumArgRegs) {
1196 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1198 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1199 ArgOffset += StackSlotSize;
1204 if (ArgRegIdx != NumArgRegs) {
1205 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1207 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1208 ArgOffset += StackSlotSize;
1217 if (ArgRegIdx != NumArgRegs) {
1218 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1220 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1221 ArgOffset += StackSlotSize;
1227 // Update number of stack bytes actually used, insert a call sequence start
1228 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1229 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1232 if (!MemOpChains.empty()) {
1233 // Adjust the stack pointer for the stack arguments.
1234 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1235 &MemOpChains[0], MemOpChains.size());
1238 // Build a sequence of copy-to-reg nodes chained together with token chain
1239 // and flag operands which copy the outgoing args into the appropriate regs.
1241 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1242 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1243 RegsToPass[i].second, InFlag);
1244 InFlag = Chain.getValue(1);
1247 SmallVector<SDValue, 8> Ops;
1248 unsigned CallOpc = SPUISD::CALL;
1250 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1251 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1252 // node so that legalize doesn't hack it.
1253 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1254 GlobalValue *GV = G->getGlobal();
1255 EVT CalleeVT = Callee.getValueType();
1256 SDValue Zero = DAG.getConstant(0, PtrVT);
1257 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1259 if (!ST->usingLargeMem()) {
1260 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1261 // style calls, otherwise, external symbols are BRASL calls. This assumes
1262 // that declared/defined symbols are in the same compilation unit and can
1263 // be reached through PC-relative jumps.
1266 // This may be an unsafe assumption for JIT and really large compilation
1268 if (GV->isDeclaration()) {
1269 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1271 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1274 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1276 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1278 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1279 EVT CalleeVT = Callee.getValueType();
1280 SDValue Zero = DAG.getConstant(0, PtrVT);
1281 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1282 Callee.getValueType());
1284 if (!ST->usingLargeMem()) {
1285 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1287 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1289 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1290 // If this is an absolute destination address that appears to be a legal
1291 // local store address, use the munged value.
1292 Callee = SDValue(Dest, 0);
1295 Ops.push_back(Chain);
1296 Ops.push_back(Callee);
1298 // Add argument registers to the end of the list so that they are known live
1300 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1301 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1302 RegsToPass[i].second.getValueType()));
1304 if (InFlag.getNode())
1305 Ops.push_back(InFlag);
1306 // Returns a chain and a flag for retval copy to use.
1307 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1308 &Ops[0], Ops.size());
1309 InFlag = Chain.getValue(1);
1311 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1312 DAG.getIntPtrConstant(0, true), InFlag);
1314 InFlag = Chain.getValue(1);
1316 // If the function returns void, just return the chain.
1320 // If the call has results, copy the values out of the ret val registers.
1321 switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1322 default: llvm_unreachable("Unexpected ret value!");
1323 case MVT::Other: break;
1325 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1326 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1327 MVT::i32, InFlag).getValue(1);
1328 InVals.push_back(Chain.getValue(0));
1329 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1330 Chain.getValue(2)).getValue(1);
1331 InVals.push_back(Chain.getValue(0));
1333 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1334 InFlag).getValue(1);
1335 InVals.push_back(Chain.getValue(0));
1339 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1340 InFlag).getValue(1);
1341 InVals.push_back(Chain.getValue(0));
1344 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1345 InFlag).getValue(1);
1346 InVals.push_back(Chain.getValue(0));
1350 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1351 InFlag).getValue(1);
1352 InVals.push_back(Chain.getValue(0));
1360 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1361 InFlag).getValue(1);
1362 InVals.push_back(Chain.getValue(0));
1370 SPUTargetLowering::LowerReturn(SDValue Chain,
1371 CallingConv::ID CallConv, bool isVarArg,
1372 const SmallVectorImpl<ISD::OutputArg> &Outs,
1373 DebugLoc dl, SelectionDAG &DAG) {
1375 SmallVector<CCValAssign, 16> RVLocs;
1376 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1377 RVLocs, *DAG.getContext());
1378 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1380 // If this is the first return lowered for this function, add the regs to the
1381 // liveout set for the function.
1382 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1383 for (unsigned i = 0; i != RVLocs.size(); ++i)
1384 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1389 // Copy the result values into the output registers.
1390 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1391 CCValAssign &VA = RVLocs[i];
1392 assert(VA.isRegLoc() && "Can only return in registers!");
1393 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1395 Flag = Chain.getValue(1);
1399 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1401 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1405 //===----------------------------------------------------------------------===//
1406 // Vector related lowering:
1407 //===----------------------------------------------------------------------===//
1409 static ConstantSDNode *
1410 getVecImm(SDNode *N) {
1411 SDValue OpVal(0, 0);
1413 // Check to see if this buildvec has a single non-undef value in its elements.
1414 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1415 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1416 if (OpVal.getNode() == 0)
1417 OpVal = N->getOperand(i);
1418 else if (OpVal != N->getOperand(i))
1422 if (OpVal.getNode() != 0) {
1423 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1431 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1432 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1434 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1436 if (ConstantSDNode *CN = getVecImm(N)) {
1437 uint64_t Value = CN->getZExtValue();
1438 if (ValueType == MVT::i64) {
1439 uint64_t UValue = CN->getZExtValue();
1440 uint32_t upper = uint32_t(UValue >> 32);
1441 uint32_t lower = uint32_t(UValue);
1444 Value = Value >> 32;
1446 if (Value <= 0x3ffff)
1447 return DAG.getTargetConstant(Value, ValueType);
1453 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1454 /// and the value fits into a signed 16-bit constant, and if so, return the
1456 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1458 if (ConstantSDNode *CN = getVecImm(N)) {
1459 int64_t Value = CN->getSExtValue();
1460 if (ValueType == MVT::i64) {
1461 uint64_t UValue = CN->getZExtValue();
1462 uint32_t upper = uint32_t(UValue >> 32);
1463 uint32_t lower = uint32_t(UValue);
1466 Value = Value >> 32;
1468 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1469 return DAG.getTargetConstant(Value, ValueType);
1476 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1477 /// and the value fits into a signed 10-bit constant, and if so, return the
1479 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1481 if (ConstantSDNode *CN = getVecImm(N)) {
1482 int64_t Value = CN->getSExtValue();
1483 if (ValueType == MVT::i64) {
1484 uint64_t UValue = CN->getZExtValue();
1485 uint32_t upper = uint32_t(UValue >> 32);
1486 uint32_t lower = uint32_t(UValue);
1489 Value = Value >> 32;
1491 if (isS10Constant(Value))
1492 return DAG.getTargetConstant(Value, ValueType);
1498 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1499 /// and the value fits into a signed 8-bit constant, and if so, return the
1502 /// @note: The incoming vector is v16i8 because that's the only way we can load
1503 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1505 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1507 if (ConstantSDNode *CN = getVecImm(N)) {
1508 int Value = (int) CN->getZExtValue();
1509 if (ValueType == MVT::i16
1510 && Value <= 0xffff /* truncated from uint64_t */
1511 && ((short) Value >> 8) == ((short) Value & 0xff))
1512 return DAG.getTargetConstant(Value & 0xff, ValueType);
1513 else if (ValueType == MVT::i8
1514 && (Value & 0xff) == Value)
1515 return DAG.getTargetConstant(Value, ValueType);
1521 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1522 /// and the value fits into a signed 16-bit constant, and if so, return the
1524 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1526 if (ConstantSDNode *CN = getVecImm(N)) {
1527 uint64_t Value = CN->getZExtValue();
1528 if ((ValueType == MVT::i32
1529 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1530 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1531 return DAG.getTargetConstant(Value >> 16, ValueType);
1537 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1538 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1539 if (ConstantSDNode *CN = getVecImm(N)) {
1540 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1546 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1547 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1548 if (ConstantSDNode *CN = getVecImm(N)) {
1549 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1555 //! Lower a BUILD_VECTOR instruction creatively:
1557 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1558 EVT VT = Op.getValueType();
1559 EVT EltVT = VT.getVectorElementType();
1560 DebugLoc dl = Op.getDebugLoc();
1561 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1562 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1563 unsigned minSplatBits = EltVT.getSizeInBits();
1565 if (minSplatBits < 16)
1568 APInt APSplatBits, APSplatUndef;
1569 unsigned SplatBitSize;
1572 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1573 HasAnyUndefs, minSplatBits)
1574 || minSplatBits < SplatBitSize)
1575 return SDValue(); // Wasn't a constant vector or splat exceeded min
1577 uint64_t SplatBits = APSplatBits.getZExtValue();
1579 switch (VT.getSimpleVT().SimpleTy) {
1582 raw_string_ostream Msg(msg);
1583 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1584 << VT.getEVTString();
1585 llvm_report_error(Msg.str());
1589 uint32_t Value32 = uint32_t(SplatBits);
1590 assert(SplatBitSize == 32
1591 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1592 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593 SDValue T = DAG.getConstant(Value32, MVT::i32);
1594 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1595 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1599 uint64_t f64val = uint64_t(SplatBits);
1600 assert(SplatBitSize == 64
1601 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDValue T = DAG.getConstant(f64val, MVT::i64);
1604 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1605 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1609 // 8-bit constants have to be expanded to 16-bits
1610 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1611 SmallVector<SDValue, 8> Ops;
1613 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1614 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1615 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1618 unsigned short Value16 = SplatBits;
1619 SDValue T = DAG.getConstant(Value16, EltVT);
1620 SmallVector<SDValue, 8> Ops;
1623 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1626 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1627 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1630 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1631 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1634 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1644 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1646 uint32_t upper = uint32_t(SplatVal >> 32);
1647 uint32_t lower = uint32_t(SplatVal);
1649 if (upper == lower) {
1650 // Magic constant that can be matched by IL, ILA, et. al.
1651 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1652 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1653 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1654 Val, Val, Val, Val));
1656 bool upper_special, lower_special;
1658 // NOTE: This code creates common-case shuffle masks that can be easily
1659 // detected as common expressions. It is not attempting to create highly
1660 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1662 // Detect if the upper or lower half is a special shuffle mask pattern:
1663 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1664 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1666 // Both upper and lower are special, lower to a constant pool load:
1667 if (lower_special && upper_special) {
1668 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1669 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1670 SplatValCN, SplatValCN);
1675 SmallVector<SDValue, 16> ShufBytes;
1678 // Create lower vector if not a special pattern
1679 if (!lower_special) {
1680 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1681 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1682 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1683 LO32C, LO32C, LO32C, LO32C));
1686 // Create upper vector if not a special pattern
1687 if (!upper_special) {
1688 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1689 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1690 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1691 HI32C, HI32C, HI32C, HI32C));
1694 // If either upper or lower are special, then the two input operands are
1695 // the same (basically, one of them is a "don't care")
1701 for (int i = 0; i < 4; ++i) {
1703 for (int j = 0; j < 4; ++j) {
1705 bool process_upper, process_lower;
1707 process_upper = (upper_special && (i & 1) == 0);
1708 process_lower = (lower_special && (i & 1) == 1);
1710 if (process_upper || process_lower) {
1711 if ((process_upper && upper == 0)
1712 || (process_lower && lower == 0))
1714 else if ((process_upper && upper == 0xffffffff)
1715 || (process_lower && lower == 0xffffffff))
1717 else if ((process_upper && upper == 0x80000000)
1718 || (process_lower && lower == 0x80000000))
1719 val |= (j == 0 ? 0xe0 : 0x80);
1721 val |= i * 4 + j + ((i & 1) * 16);
1724 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1727 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1728 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1729 &ShufBytes[0], ShufBytes.size()));
1733 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1734 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1735 /// permutation vector, V3, is monotonically increasing with one "exception"
1736 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1737 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1738 /// In either case, the net result is going to eventually invoke SHUFB to
1739 /// permute/shuffle the bytes from V1 and V2.
1741 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1742 /// control word for byte/halfword/word insertion. This takes care of a single
1743 /// element move from V2 into V1.
1745 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1746 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1747 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1748 SDValue V1 = Op.getOperand(0);
1749 SDValue V2 = Op.getOperand(1);
1750 DebugLoc dl = Op.getDebugLoc();
1752 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1754 // If we have a single element being moved from V1 to V2, this can be handled
1755 // using the C*[DX] compute mask instructions, but the vector elements have
1756 // to be monotonically increasing with one exception element.
1757 EVT VecVT = V1.getValueType();
1758 EVT EltVT = VecVT.getVectorElementType();
1759 unsigned EltsFromV2 = 0;
1761 unsigned V2EltIdx0 = 0;
1762 unsigned CurrElt = 0;
1763 unsigned MaxElts = VecVT.getVectorNumElements();
1764 unsigned PrevElt = 0;
1766 bool monotonic = true;
1769 if (EltVT == MVT::i8) {
1771 } else if (EltVT == MVT::i16) {
1773 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1775 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1778 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1780 for (unsigned i = 0; i != MaxElts; ++i) {
1781 if (SVN->getMaskElt(i) < 0)
1784 unsigned SrcElt = SVN->getMaskElt(i);
1787 if (SrcElt >= V2EltIdx0) {
1788 if (1 >= (++EltsFromV2)) {
1789 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1791 } else if (CurrElt != SrcElt) {
1799 if (PrevElt > 0 && SrcElt < MaxElts) {
1800 if ((PrevElt == SrcElt - 1)
1801 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1808 } else if (PrevElt == 0) {
1809 // First time through, need to keep track of previous element
1812 // This isn't a rotation, takes elements from vector 2
1818 if (EltsFromV2 == 1 && monotonic) {
1819 // Compute mask and shuffle
1820 MachineFunction &MF = DAG.getMachineFunction();
1821 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1822 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1823 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1824 // Initialize temporary register to 0
1825 SDValue InitTempReg =
1826 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1827 // Copy register's contents as index in SHUFFLE_MASK:
1828 SDValue ShufMaskOp =
1829 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1830 DAG.getTargetConstant(V2Elt, MVT::i32),
1831 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1832 // Use shuffle mask in SHUFB synthetic instruction:
1833 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1835 } else if (rotate) {
1836 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1838 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1839 V1, DAG.getConstant(rotamt, MVT::i16));
1841 // Convert the SHUFFLE_VECTOR mask's input element units to the
1843 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1845 SmallVector<SDValue, 16> ResultMask;
1846 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1847 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1849 for (unsigned j = 0; j < BytesPerElement; ++j)
1850 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1853 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1854 &ResultMask[0], ResultMask.size());
1855 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1859 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1860 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1861 DebugLoc dl = Op.getDebugLoc();
1863 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1864 // For a constant, build the appropriate constant vector, which will
1865 // eventually simplify to a vector register load.
1867 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1868 SmallVector<SDValue, 16> ConstVecValues;
1872 // Create a constant vector:
1873 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1874 default: llvm_unreachable("Unexpected constant value type in "
1875 "LowerSCALAR_TO_VECTOR");
1876 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1877 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1878 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1879 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1880 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1881 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1884 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1885 for (size_t j = 0; j < n_copies; ++j)
1886 ConstVecValues.push_back(CValue);
1888 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1889 &ConstVecValues[0], ConstVecValues.size());
1891 // Otherwise, copy the value from one register to another:
1892 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1893 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1900 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1907 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1908 EVT VT = Op.getValueType();
1909 SDValue N = Op.getOperand(0);
1910 SDValue Elt = Op.getOperand(1);
1911 DebugLoc dl = Op.getDebugLoc();
1914 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1915 // Constant argument:
1916 int EltNo = (int) C->getZExtValue();
1919 if (VT == MVT::i8 && EltNo >= 16)
1920 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1921 else if (VT == MVT::i16 && EltNo >= 8)
1922 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1923 else if (VT == MVT::i32 && EltNo >= 4)
1924 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1925 else if (VT == MVT::i64 && EltNo >= 2)
1926 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1928 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1929 // i32 and i64: Element 0 is the preferred slot
1930 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1933 // Need to generate shuffle mask and extract:
1934 int prefslot_begin = -1, prefslot_end = -1;
1935 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1937 switch (VT.getSimpleVT().SimpleTy) {
1939 assert(false && "Invalid value type!");
1941 prefslot_begin = prefslot_end = 3;
1945 prefslot_begin = 2; prefslot_end = 3;
1950 prefslot_begin = 0; prefslot_end = 3;
1955 prefslot_begin = 0; prefslot_end = 7;
1960 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1961 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1963 unsigned int ShufBytes[16] = {
1964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1966 for (int i = 0; i < 16; ++i) {
1967 // zero fill uppper part of preferred slot, don't care about the
1969 unsigned int mask_val;
1970 if (i <= prefslot_end) {
1972 ((i < prefslot_begin)
1974 : elt_byte + (i - prefslot_begin));
1976 ShufBytes[i] = mask_val;
1978 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1981 SDValue ShufMask[4];
1982 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1983 unsigned bidx = i * 4;
1984 unsigned int bits = ((ShufBytes[bidx] << 24) |
1985 (ShufBytes[bidx+1] << 16) |
1986 (ShufBytes[bidx+2] << 8) |
1988 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1991 SDValue ShufMaskVec =
1992 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1993 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1995 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1996 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1997 N, N, ShufMaskVec));
1999 // Variable index: Rotate the requested element into slot 0, then replicate
2000 // slot 0 across the vector
2001 EVT VecVT = N.getValueType();
2002 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2003 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2007 // Make life easier by making sure the index is zero-extended to i32
2008 if (Elt.getValueType() != MVT::i32)
2009 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2011 // Scale the index to a bit/byte shift quantity
2013 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2014 unsigned scaleShift = scaleFactor.logBase2();
2017 if (scaleShift > 0) {
2018 // Scale the shift factor:
2019 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2020 DAG.getConstant(scaleShift, MVT::i32));
2023 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2025 // Replicate the bytes starting at byte 0 across the entire vector (for
2026 // consistency with the notion of a unified register set)
2029 switch (VT.getSimpleVT().SimpleTy) {
2031 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2035 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2036 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2037 factor, factor, factor, factor);
2041 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2042 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2043 factor, factor, factor, factor);
2048 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2049 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2050 factor, factor, factor, factor);
2055 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2056 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2057 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2058 loFactor, hiFactor, loFactor, hiFactor);
2063 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2064 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2065 vecShift, vecShift, replicate));
2071 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2072 SDValue VecOp = Op.getOperand(0);
2073 SDValue ValOp = Op.getOperand(1);
2074 SDValue IdxOp = Op.getOperand(2);
2075 DebugLoc dl = Op.getDebugLoc();
2076 EVT VT = Op.getValueType();
2078 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2079 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2081 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2082 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2083 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2084 DAG.getRegister(SPU::R1, PtrVT),
2085 DAG.getConstant(CN->getSExtValue(), PtrVT));
2086 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2089 DAG.getNode(SPUISD::SHUFB, dl, VT,
2090 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2092 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2097 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2098 const TargetLowering &TLI)
2100 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2101 DebugLoc dl = Op.getDebugLoc();
2102 EVT ShiftVT = TLI.getShiftAmountTy();
2104 assert(Op.getValueType() == MVT::i8);
2107 llvm_unreachable("Unhandled i8 math operator");
2111 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2113 SDValue N1 = Op.getOperand(1);
2114 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2115 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2116 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2117 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2122 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2124 SDValue N1 = Op.getOperand(1);
2125 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2126 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2127 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2128 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132 SDValue N1 = Op.getOperand(1);
2133 EVT N1VT = N1.getValueType();
2135 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2136 if (!N1VT.bitsEq(ShiftVT)) {
2137 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2140 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2143 // Replicate lower 8-bits into upper 8:
2145 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2146 DAG.getNode(ISD::SHL, dl, MVT::i16,
2147 N0, DAG.getConstant(8, MVT::i32)));
2149 // Truncate back down to i8
2150 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2151 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2155 SDValue N1 = Op.getOperand(1);
2156 EVT N1VT = N1.getValueType();
2158 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2159 if (!N1VT.bitsEq(ShiftVT)) {
2160 unsigned N1Opc = ISD::ZERO_EXTEND;
2162 if (N1.getValueType().bitsGT(ShiftVT))
2163 N1Opc = ISD::TRUNCATE;
2165 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2168 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2169 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2172 SDValue N1 = Op.getOperand(1);
2173 EVT N1VT = N1.getValueType();
2175 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2176 if (!N1VT.bitsEq(ShiftVT)) {
2177 unsigned N1Opc = ISD::SIGN_EXTEND;
2179 if (N1VT.bitsGT(ShiftVT))
2180 N1Opc = ISD::TRUNCATE;
2181 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2184 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2185 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2188 SDValue N1 = Op.getOperand(1);
2190 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2191 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2192 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2193 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2201 //! Lower byte immediate operations for v16i8 vectors:
2203 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2206 EVT VT = Op.getValueType();
2207 DebugLoc dl = Op.getDebugLoc();
2209 ConstVec = Op.getOperand(0);
2210 Arg = Op.getOperand(1);
2211 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2212 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2213 ConstVec = ConstVec.getOperand(0);
2215 ConstVec = Op.getOperand(1);
2216 Arg = Op.getOperand(0);
2217 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2218 ConstVec = ConstVec.getOperand(0);
2223 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2224 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2225 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2227 APInt APSplatBits, APSplatUndef;
2228 unsigned SplatBitSize;
2230 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2232 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2233 HasAnyUndefs, minSplatBits)
2234 && minSplatBits <= SplatBitSize) {
2235 uint64_t SplatBits = APSplatBits.getZExtValue();
2236 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2238 SmallVector<SDValue, 16> tcVec;
2239 tcVec.assign(16, tc);
2240 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2241 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2245 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2246 // lowered. Return the operation, rather than a null SDValue.
2250 //! Custom lowering for CTPOP (count population)
2252 Custom lowering code that counts the number ones in the input
2253 operand. SPU has such an instruction, but it counts the number of
2254 ones per byte, which then have to be accumulated.
2256 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2257 EVT VT = Op.getValueType();
2258 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2259 VT, (128 / VT.getSizeInBits()));
2260 DebugLoc dl = Op.getDebugLoc();
2262 switch (VT.getSimpleVT().SimpleTy) {
2264 assert(false && "Invalid value type!");
2266 SDValue N = Op.getOperand(0);
2267 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2269 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2270 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2272 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2276 MachineFunction &MF = DAG.getMachineFunction();
2277 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2279 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2281 SDValue N = Op.getOperand(0);
2282 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2283 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2284 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2286 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2287 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2289 // CNTB_result becomes the chain to which all of the virtual registers
2290 // CNTB_reg, SUM1_reg become associated:
2291 SDValue CNTB_result =
2292 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2294 SDValue CNTB_rescopy =
2295 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2297 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2299 return DAG.getNode(ISD::AND, dl, MVT::i16,
2300 DAG.getNode(ISD::ADD, dl, MVT::i16,
2301 DAG.getNode(ISD::SRL, dl, MVT::i16,
2308 MachineFunction &MF = DAG.getMachineFunction();
2309 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2311 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2312 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2314 SDValue N = Op.getOperand(0);
2315 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2316 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2317 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2318 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2320 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2321 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2323 // CNTB_result becomes the chain to which all of the virtual registers
2324 // CNTB_reg, SUM1_reg become associated:
2325 SDValue CNTB_result =
2326 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2328 SDValue CNTB_rescopy =
2329 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2332 DAG.getNode(ISD::SRL, dl, MVT::i32,
2333 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2337 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2338 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2340 SDValue Sum1_rescopy =
2341 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2344 DAG.getNode(ISD::SRL, dl, MVT::i32,
2345 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2348 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2349 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2351 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2361 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2363 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2364 All conversions to i64 are expanded to a libcall.
2366 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2367 SPUTargetLowering &TLI) {
2368 EVT OpVT = Op.getValueType();
2369 SDValue Op0 = Op.getOperand(0);
2370 EVT Op0VT = Op0.getValueType();
2372 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2373 || OpVT == MVT::i64) {
2374 // Convert f32 / f64 to i32 / i64 via libcall.
2376 (Op.getOpcode() == ISD::FP_TO_SINT)
2377 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2378 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2379 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2381 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2387 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2389 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2390 All conversions from i64 are expanded to a libcall.
2392 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2393 SPUTargetLowering &TLI) {
2394 EVT OpVT = Op.getValueType();
2395 SDValue Op0 = Op.getOperand(0);
2396 EVT Op0VT = Op0.getValueType();
2398 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2399 || Op0VT == MVT::i64) {
2400 // Convert i32, i64 to f64 via libcall:
2402 (Op.getOpcode() == ISD::SINT_TO_FP)
2403 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2404 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2405 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2407 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2413 //! Lower ISD::SETCC
2415 This handles MVT::f64 (double floating point) condition lowering
2417 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2418 const TargetLowering &TLI) {
2419 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2420 DebugLoc dl = Op.getDebugLoc();
2421 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2423 SDValue lhs = Op.getOperand(0);
2424 SDValue rhs = Op.getOperand(1);
2425 EVT lhsVT = lhs.getValueType();
2426 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2428 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2429 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2430 EVT IntVT(MVT::i64);
2432 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2433 // selected to a NOP:
2434 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2436 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2437 DAG.getNode(ISD::SRL, dl, IntVT,
2438 i64lhs, DAG.getConstant(32, MVT::i32)));
2439 SDValue lhsHi32abs =
2440 DAG.getNode(ISD::AND, dl, MVT::i32,
2441 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2443 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2445 // SETO and SETUO only use the lhs operand:
2446 if (CC->get() == ISD::SETO) {
2447 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2449 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2450 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2451 DAG.getSetCC(dl, ccResultVT,
2452 lhs, DAG.getConstantFP(0.0, lhsVT),
2454 DAG.getConstant(ccResultAllOnes, ccResultVT));
2455 } else if (CC->get() == ISD::SETUO) {
2456 // Evaluates to true if Op0 is [SQ]NaN
2457 return DAG.getNode(ISD::AND, dl, ccResultVT,
2458 DAG.getSetCC(dl, ccResultVT,
2460 DAG.getConstant(0x7ff00000, MVT::i32),
2462 DAG.getSetCC(dl, ccResultVT,
2464 DAG.getConstant(0, MVT::i32),
2468 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2470 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2471 DAG.getNode(ISD::SRL, dl, IntVT,
2472 i64rhs, DAG.getConstant(32, MVT::i32)));
2474 // If a value is negative, subtract from the sign magnitude constant:
2475 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2477 // Convert the sign-magnitude representation into 2's complement:
2478 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2479 lhsHi32, DAG.getConstant(31, MVT::i32));
2480 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2482 DAG.getNode(ISD::SELECT, dl, IntVT,
2483 lhsSelectMask, lhsSignMag2TC, i64lhs);
2485 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2486 rhsHi32, DAG.getConstant(31, MVT::i32));
2487 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2489 DAG.getNode(ISD::SELECT, dl, IntVT,
2490 rhsSelectMask, rhsSignMag2TC, i64rhs);
2494 switch (CC->get()) {
2497 compareOp = ISD::SETEQ; break;
2500 compareOp = ISD::SETGT; break;
2503 compareOp = ISD::SETGE; break;
2506 compareOp = ISD::SETLT; break;
2509 compareOp = ISD::SETLE; break;
2512 compareOp = ISD::SETNE; break;
2514 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2518 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2519 (ISD::CondCode) compareOp);
2521 if ((CC->get() & 0x8) == 0) {
2522 // Ordered comparison:
2523 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2524 lhs, DAG.getConstantFP(0.0, MVT::f64),
2526 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2527 rhs, DAG.getConstantFP(0.0, MVT::f64),
2529 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2531 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2537 //! Lower ISD::SELECT_CC
2539 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2542 \note Need to revisit this in the future: if the code path through the true
2543 and false value computations is longer than the latency of a branch (6
2544 cycles), then it would be more advantageous to branch and insert a new basic
2545 block and branch on the condition. However, this code does not make that
2546 assumption, given the simplisitc uses so far.
2549 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2550 const TargetLowering &TLI) {
2551 EVT VT = Op.getValueType();
2552 SDValue lhs = Op.getOperand(0);
2553 SDValue rhs = Op.getOperand(1);
2554 SDValue trueval = Op.getOperand(2);
2555 SDValue falseval = Op.getOperand(3);
2556 SDValue condition = Op.getOperand(4);
2557 DebugLoc dl = Op.getDebugLoc();
2559 // NOTE: SELB's arguments: $rA, $rB, $mask
2561 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2562 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2563 // condition was true and 0s where the condition was false. Hence, the
2564 // arguments to SELB get reversed.
2566 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2567 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2568 // with another "cannot select select_cc" assert:
2570 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2571 TLI.getSetCCResultType(Op.getValueType()),
2572 lhs, rhs, condition);
2573 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2576 //! Custom lower ISD::TRUNCATE
2577 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2579 // Type to truncate to
2580 EVT VT = Op.getValueType();
2581 MVT simpleVT = VT.getSimpleVT();
2582 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2583 VT, (128 / VT.getSizeInBits()));
2584 DebugLoc dl = Op.getDebugLoc();
2586 // Type to truncate from
2587 SDValue Op0 = Op.getOperand(0);
2588 EVT Op0VT = Op0.getValueType();
2590 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2591 // Create shuffle mask, least significant doubleword of quadword
2592 unsigned maskHigh = 0x08090a0b;
2593 unsigned maskLow = 0x0c0d0e0f;
2594 // Use a shuffle to perform the truncation
2595 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2596 DAG.getConstant(maskHigh, MVT::i32),
2597 DAG.getConstant(maskLow, MVT::i32),
2598 DAG.getConstant(maskHigh, MVT::i32),
2599 DAG.getConstant(maskLow, MVT::i32));
2601 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2602 Op0, Op0, shufMask);
2604 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2607 return SDValue(); // Leave the truncate unmolested
2611 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2612 * algorithm is to duplicate the sign bit using rotmai to generate at
2613 * least one byte full of sign bits. Then propagate the "sign-byte" into
2614 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2616 * @param Op The sext operand
2617 * @param DAG The current DAG
2618 * @return The SDValue with the entire instruction sequence
2620 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2622 DebugLoc dl = Op.getDebugLoc();
2624 // Type to extend to
2625 MVT OpVT = Op.getValueType().getSimpleVT();
2626 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2627 OpVT, (128 / OpVT.getSizeInBits()));
2629 // Type to extend from
2630 SDValue Op0 = Op.getOperand(0);
2631 MVT Op0VT = Op0.getValueType().getSimpleVT();
2633 // The type to extend to needs to be a i128 and
2634 // the type to extend from needs to be i64 or i32.
2635 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2636 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2638 // Create shuffle mask
2639 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2640 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2641 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2642 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2643 DAG.getConstant(mask1, MVT::i32),
2644 DAG.getConstant(mask1, MVT::i32),
2645 DAG.getConstant(mask2, MVT::i32),
2646 DAG.getConstant(mask3, MVT::i32));
2648 // Word wise arithmetic right shift to generate at least one byte
2649 // that contains sign bits.
2650 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2651 SDValue sraVal = DAG.getNode(ISD::SRA,
2654 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2655 DAG.getConstant(31, MVT::i32));
2657 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2658 // and the input value into the lower 64 bits.
2659 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2660 DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2662 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2665 //! Custom (target-specific) lowering entry point
2667 This is where LLVM's DAG selection process calls to do target-specific
2671 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2673 unsigned Opc = (unsigned) Op.getOpcode();
2674 EVT VT = Op.getValueType();
2679 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2680 errs() << "Op.getOpcode() = " << Opc << "\n";
2681 errs() << "*Op.getNode():\n";
2682 Op.getNode()->dump();
2684 llvm_unreachable(0);
2690 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2692 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2693 case ISD::ConstantPool:
2694 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2695 case ISD::GlobalAddress:
2696 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2697 case ISD::JumpTable:
2698 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2699 case ISD::ConstantFP:
2700 return LowerConstantFP(Op, DAG);
2702 // i8, i64 math ops:
2711 return LowerI8Math(Op, DAG, Opc, *this);
2715 case ISD::FP_TO_SINT:
2716 case ISD::FP_TO_UINT:
2717 return LowerFP_TO_INT(Op, DAG, *this);
2719 case ISD::SINT_TO_FP:
2720 case ISD::UINT_TO_FP:
2721 return LowerINT_TO_FP(Op, DAG, *this);
2723 // Vector-related lowering.
2724 case ISD::BUILD_VECTOR:
2725 return LowerBUILD_VECTOR(Op, DAG);
2726 case ISD::SCALAR_TO_VECTOR:
2727 return LowerSCALAR_TO_VECTOR(Op, DAG);
2728 case ISD::VECTOR_SHUFFLE:
2729 return LowerVECTOR_SHUFFLE(Op, DAG);
2730 case ISD::EXTRACT_VECTOR_ELT:
2731 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2732 case ISD::INSERT_VECTOR_ELT:
2733 return LowerINSERT_VECTOR_ELT(Op, DAG);
2735 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2739 return LowerByteImmed(Op, DAG);
2741 // Vector and i8 multiply:
2744 return LowerI8Math(Op, DAG, Opc, *this);
2747 return LowerCTPOP(Op, DAG);
2749 case ISD::SELECT_CC:
2750 return LowerSELECT_CC(Op, DAG, *this);
2753 return LowerSETCC(Op, DAG, *this);
2756 return LowerTRUNCATE(Op, DAG);
2758 case ISD::SIGN_EXTEND:
2759 return LowerSIGN_EXTEND(Op, DAG);
2765 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2766 SmallVectorImpl<SDValue>&Results,
2770 unsigned Opc = (unsigned) N->getOpcode();
2771 EVT OpVT = N->getValueType(0);
2775 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2776 errs() << "Op.getOpcode() = " << Opc << "\n";
2777 errs() << "*Op.getNode():\n";
2785 /* Otherwise, return unchanged */
2788 //===----------------------------------------------------------------------===//
2789 // Target Optimization Hooks
2790 //===----------------------------------------------------------------------===//
2793 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2796 TargetMachine &TM = getTargetMachine();
2798 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2799 SelectionDAG &DAG = DCI.DAG;
2800 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2801 EVT NodeVT = N->getValueType(0); // The node's value type
2802 EVT Op0VT = Op0.getValueType(); // The first operand's result
2803 SDValue Result; // Initially, empty result
2804 DebugLoc dl = N->getDebugLoc();
2806 switch (N->getOpcode()) {
2809 SDValue Op1 = N->getOperand(1);
2811 if (Op0.getOpcode() == SPUISD::IndirectAddr
2812 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2813 // Normalize the operands to reduce repeated code
2814 SDValue IndirectArg = Op0, AddArg = Op1;
2816 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2821 if (isa<ConstantSDNode>(AddArg)) {
2822 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2823 SDValue IndOp1 = IndirectArg.getOperand(1);
2825 if (CN0->isNullValue()) {
2826 // (add (SPUindirect <arg>, <arg>), 0) ->
2827 // (SPUindirect <arg>, <arg>)
2829 #if !defined(NDEBUG)
2830 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2833 << "With: (SPUindirect <arg>, <arg>)\n";
2838 } else if (isa<ConstantSDNode>(IndOp1)) {
2839 // (add (SPUindirect <arg>, <const>), <const>) ->
2840 // (SPUindirect <arg>, <const + const>)
2841 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2842 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2843 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2845 #if !defined(NDEBUG)
2846 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2848 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2849 << "), " << CN0->getSExtValue() << ")\n"
2850 << "With: (SPUindirect <arg>, "
2851 << combinedConst << ")\n";
2855 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2856 IndirectArg, combinedValue);
2862 case ISD::SIGN_EXTEND:
2863 case ISD::ZERO_EXTEND:
2864 case ISD::ANY_EXTEND: {
2865 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2866 // (any_extend (SPUextract_elt0 <arg>)) ->
2867 // (SPUextract_elt0 <arg>)
2868 // Types must match, however...
2869 #if !defined(NDEBUG)
2870 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2871 errs() << "\nReplace: ";
2873 errs() << "\nWith: ";
2874 Op0.getNode()->dump(&DAG);
2883 case SPUISD::IndirectAddr: {
2884 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2885 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2886 if (CN != 0 && CN->getZExtValue() == 0) {
2887 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2888 // (SPUaform <addr>, 0)
2890 DEBUG(errs() << "Replace: ");
2891 DEBUG(N->dump(&DAG));
2892 DEBUG(errs() << "\nWith: ");
2893 DEBUG(Op0.getNode()->dump(&DAG));
2894 DEBUG(errs() << "\n");
2898 } else if (Op0.getOpcode() == ISD::ADD) {
2899 SDValue Op1 = N->getOperand(1);
2900 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2901 // (SPUindirect (add <arg>, <arg>), 0) ->
2902 // (SPUindirect <arg>, <arg>)
2903 if (CN1->isNullValue()) {
2905 #if !defined(NDEBUG)
2906 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2908 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2909 << "With: (SPUindirect <arg>, <arg>)\n";
2913 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2914 Op0.getOperand(0), Op0.getOperand(1));
2920 case SPUISD::SHLQUAD_L_BITS:
2921 case SPUISD::SHLQUAD_L_BYTES:
2922 case SPUISD::ROTBYTES_LEFT: {
2923 SDValue Op1 = N->getOperand(1);
2925 // Kill degenerate vector shifts:
2926 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2927 if (CN->isNullValue()) {
2933 case SPUISD::PREFSLOT2VEC: {
2934 switch (Op0.getOpcode()) {
2937 case ISD::ANY_EXTEND:
2938 case ISD::ZERO_EXTEND:
2939 case ISD::SIGN_EXTEND: {
2940 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2942 // but only if the SPUprefslot2vec and <arg> types match.
2943 SDValue Op00 = Op0.getOperand(0);
2944 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2945 SDValue Op000 = Op00.getOperand(0);
2946 if (Op000.getValueType() == NodeVT) {
2952 case SPUISD::VEC2PREFSLOT: {
2953 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2955 Result = Op0.getOperand(0);
2963 // Otherwise, return unchanged.
2965 if (Result.getNode()) {
2966 DEBUG(errs() << "\nReplace.SPU: ");
2967 DEBUG(N->dump(&DAG));
2968 DEBUG(errs() << "\nWith: ");
2969 DEBUG(Result.getNode()->dump(&DAG));
2970 DEBUG(errs() << "\n");
2977 //===----------------------------------------------------------------------===//
2978 // Inline Assembly Support
2979 //===----------------------------------------------------------------------===//
2981 /// getConstraintType - Given a constraint letter, return the type of
2982 /// constraint it is for this target.
2983 SPUTargetLowering::ConstraintType
2984 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2985 if (ConstraintLetter.size() == 1) {
2986 switch (ConstraintLetter[0]) {
2993 return C_RegisterClass;
2996 return TargetLowering::getConstraintType(ConstraintLetter);
2999 std::pair<unsigned, const TargetRegisterClass*>
3000 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3003 if (Constraint.size() == 1) {
3004 // GCC RS6000 Constraint Letters
3005 switch (Constraint[0]) {
3009 return std::make_pair(0U, SPU::R64CRegisterClass);
3010 return std::make_pair(0U, SPU::R32CRegisterClass);
3013 return std::make_pair(0U, SPU::R32FPRegisterClass);
3014 else if (VT == MVT::f64)
3015 return std::make_pair(0U, SPU::R64FPRegisterClass);
3018 return std::make_pair(0U, SPU::GPRCRegisterClass);
3022 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3025 //! Compute used/known bits for a SPU operand
3027 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3031 const SelectionDAG &DAG,
3032 unsigned Depth ) const {
3034 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3036 switch (Op.getOpcode()) {
3038 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3044 case SPUISD::PREFSLOT2VEC:
3045 case SPUISD::LDRESULT:
3046 case SPUISD::VEC2PREFSLOT:
3047 case SPUISD::SHLQUAD_L_BITS:
3048 case SPUISD::SHLQUAD_L_BYTES:
3049 case SPUISD::VEC_ROTL:
3050 case SPUISD::VEC_ROTR:
3051 case SPUISD::ROTBYTES_LEFT:
3052 case SPUISD::SELECT_MASK:
3059 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3060 unsigned Depth) const {
3061 switch (Op.getOpcode()) {
3066 EVT VT = Op.getValueType();
3068 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3071 return VT.getSizeInBits();
3076 // LowerAsmOperandForConstraint
3078 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3079 char ConstraintLetter,
3081 std::vector<SDValue> &Ops,
3082 SelectionDAG &DAG) const {
3083 // Default, for the time being, to the base class handler
3084 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3088 /// isLegalAddressImmediate - Return true if the integer value can be used
3089 /// as the offset of the target addressing mode.
3090 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3091 const Type *Ty) const {
3092 // SPU's addresses are 256K:
3093 return (V > -(1 << 18) && V < (1 << 18) - 1);
3096 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3101 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3102 // The SPU target isn't yet aware of offsets.