2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! EVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 EVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
116 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
117 std::pair<SDValue, SDValue> CallInfo =
118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119 0, TLI.getLibcallCallingConv(LC), false,
120 /*isReturnValueUsed=*/true,
121 Callee, Args, DAG, Op.getDebugLoc(),
122 DAG.GetOrdering(InChain.getNode()));
124 return CallInfo.first;
128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
129 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
131 // Fold away setcc operations if possible.
134 // Use _setjmp/_longjmp instead of setjmp/longjmp.
135 setUseUnderscoreSetJmp(true);
136 setUseUnderscoreLongJmp(true);
138 // Set RTLIB libcall names as used by SPU:
139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
141 // Set up the SPU's register classes:
142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
150 // SPU has no sign or zero extended loads for i1, i8, i16:
151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
163 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
165 // SPU constant load actions are custom lowered:
166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
169 // SPU's loads and stores have to be custom lowered:
170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
172 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
174 setOperationAction(ISD::LOAD, VT, Custom);
175 setOperationAction(ISD::STORE, VT, Custom);
176 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
182 setTruncStoreAction(VT, StoreVT, Expand);
186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
188 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
190 setOperationAction(ISD::LOAD, VT, Custom);
191 setOperationAction(ISD::STORE, VT, Custom);
193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
195 setTruncStoreAction(VT, StoreVT, Expand);
199 // Expand the jumptable branches
200 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
201 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
203 // Custom lower SELECT_CC for most cases, but expand by default
204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
210 // SPU has no intrinsics for these particular operations:
211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
213 // SPU has no division/remainder instructions
214 setOperationAction(ISD::SREM, MVT::i8, Expand);
215 setOperationAction(ISD::UREM, MVT::i8, Expand);
216 setOperationAction(ISD::SDIV, MVT::i8, Expand);
217 setOperationAction(ISD::UDIV, MVT::i8, Expand);
218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
220 setOperationAction(ISD::SREM, MVT::i16, Expand);
221 setOperationAction(ISD::UREM, MVT::i16, Expand);
222 setOperationAction(ISD::SDIV, MVT::i16, Expand);
223 setOperationAction(ISD::UDIV, MVT::i16, Expand);
224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
226 setOperationAction(ISD::SREM, MVT::i32, Expand);
227 setOperationAction(ISD::UREM, MVT::i32, Expand);
228 setOperationAction(ISD::SDIV, MVT::i32, Expand);
229 setOperationAction(ISD::UDIV, MVT::i32, Expand);
230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
232 setOperationAction(ISD::SREM, MVT::i64, Expand);
233 setOperationAction(ISD::UREM, MVT::i64, Expand);
234 setOperationAction(ISD::SDIV, MVT::i64, Expand);
235 setOperationAction(ISD::UDIV, MVT::i64, Expand);
236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
238 setOperationAction(ISD::SREM, MVT::i128, Expand);
239 setOperationAction(ISD::UREM, MVT::i128, Expand);
240 setOperationAction(ISD::SDIV, MVT::i128, Expand);
241 setOperationAction(ISD::UDIV, MVT::i128, Expand);
242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
245 // We don't support sin/cos/sqrt/fmod
246 setOperationAction(ISD::FSIN , MVT::f64, Expand);
247 setOperationAction(ISD::FCOS , MVT::f64, Expand);
248 setOperationAction(ISD::FREM , MVT::f64, Expand);
249 setOperationAction(ISD::FSIN , MVT::f32, Expand);
250 setOperationAction(ISD::FCOS , MVT::f32, Expand);
251 setOperationAction(ISD::FREM , MVT::f32, Expand);
253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
255 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
261 // SPU can do rotate right and left, so legalize it... but customize for i8
262 // because instructions don't exist.
264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
270 setOperationAction(ISD::ROTL, MVT::i32, Legal);
271 setOperationAction(ISD::ROTL, MVT::i16, Legal);
272 setOperationAction(ISD::ROTL, MVT::i8, Custom);
274 // SPU has no native version of shift left/right for i8
275 setOperationAction(ISD::SHL, MVT::i8, Custom);
276 setOperationAction(ISD::SRL, MVT::i8, Custom);
277 setOperationAction(ISD::SRA, MVT::i8, Custom);
279 // Make these operations legal and handle them during instruction selection:
280 setOperationAction(ISD::SHL, MVT::i64, Legal);
281 setOperationAction(ISD::SRL, MVT::i64, Legal);
282 setOperationAction(ISD::SRA, MVT::i64, Legal);
284 // Custom lower i8, i32 and i64 multiplications
285 setOperationAction(ISD::MUL, MVT::i8, Custom);
286 setOperationAction(ISD::MUL, MVT::i32, Legal);
287 setOperationAction(ISD::MUL, MVT::i64, Legal);
289 // Expand double-width multiplication
290 // FIXME: It would probably be reasonable to support some of these operations
291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
293 setOperationAction(ISD::MULHU, MVT::i8, Expand);
294 setOperationAction(ISD::MULHS, MVT::i8, Expand);
295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297 setOperationAction(ISD::MULHU, MVT::i16, Expand);
298 setOperationAction(ISD::MULHS, MVT::i16, Expand);
299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301 setOperationAction(ISD::MULHU, MVT::i32, Expand);
302 setOperationAction(ISD::MULHS, MVT::i32, Expand);
303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305 setOperationAction(ISD::MULHU, MVT::i64, Expand);
306 setOperationAction(ISD::MULHS, MVT::i64, Expand);
308 // Need to custom handle (some) common i8, i64 math ops
309 setOperationAction(ISD::ADD, MVT::i8, Custom);
310 setOperationAction(ISD::ADD, MVT::i64, Legal);
311 setOperationAction(ISD::SUB, MVT::i8, Custom);
312 setOperationAction(ISD::SUB, MVT::i64, Legal);
314 // SPU does not have BSWAP. It does have i32 support CTLZ.
315 // CTPOP has to be custom lowered.
316 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
317 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
319 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
323 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
329 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
331 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
333 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
334 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
335 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
337 // SPU has a version of select that implements (a&~c)|(b&c), just like
338 // select ought to work:
339 setOperationAction(ISD::SELECT, MVT::i8, Legal);
340 setOperationAction(ISD::SELECT, MVT::i16, Legal);
341 setOperationAction(ISD::SELECT, MVT::i32, Legal);
342 setOperationAction(ISD::SELECT, MVT::i64, Legal);
344 setOperationAction(ISD::SETCC, MVT::i8, Legal);
345 setOperationAction(ISD::SETCC, MVT::i16, Legal);
346 setOperationAction(ISD::SETCC, MVT::i32, Legal);
347 setOperationAction(ISD::SETCC, MVT::i64, Legal);
348 setOperationAction(ISD::SETCC, MVT::f64, Custom);
350 // Custom lower i128 -> i64 truncates
351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
353 // Custom lower i32/i64 -> i128 sign extend
354 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
356 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
357 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
358 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
359 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
360 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
361 // to expand to a libcall, hence the custom lowering:
362 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
363 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
364 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
365 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
366 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
367 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
369 // FDIV on SPU requires custom lowering
370 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
372 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
373 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
374 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
375 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
376 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
378 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
379 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
380 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
382 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
383 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
384 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
385 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
387 // We cannot sextinreg(i1). Expand to shifts.
388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
390 // We want to legalize GlobalAddress and ConstantPool nodes into the
391 // appropriate instructions to materialize the address.
392 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
394 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
396 setOperationAction(ISD::GlobalAddress, VT, Custom);
397 setOperationAction(ISD::ConstantPool, VT, Custom);
398 setOperationAction(ISD::JumpTable, VT, Custom);
401 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
402 setOperationAction(ISD::VASTART , MVT::Other, Custom);
404 // Use the default implementation.
405 setOperationAction(ISD::VAARG , MVT::Other, Expand);
406 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
407 setOperationAction(ISD::VAEND , MVT::Other, Expand);
408 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
409 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
410 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
411 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
413 // Cell SPU has instructions for converting between i64 and fp.
414 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
415 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
417 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
418 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
420 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
421 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
423 // First set operation action for all vector types to expand. Then we
424 // will selectively turn on ones that can be effectively codegen'd.
425 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
426 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
427 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
428 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
429 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
432 // "Odd size" vector classes that we're willing to support:
433 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
435 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
436 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
437 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
439 // add/sub are legal for all supported vector VT's.
440 setOperationAction(ISD::ADD, VT, Legal);
441 setOperationAction(ISD::SUB, VT, Legal);
442 // mul has to be custom lowered.
443 setOperationAction(ISD::MUL, VT, Legal);
445 setOperationAction(ISD::AND, VT, Legal);
446 setOperationAction(ISD::OR, VT, Legal);
447 setOperationAction(ISD::XOR, VT, Legal);
448 setOperationAction(ISD::LOAD, VT, Legal);
449 setOperationAction(ISD::SELECT, VT, Legal);
450 setOperationAction(ISD::STORE, VT, Legal);
452 // These operations need to be expanded:
453 setOperationAction(ISD::SDIV, VT, Expand);
454 setOperationAction(ISD::SREM, VT, Expand);
455 setOperationAction(ISD::UDIV, VT, Expand);
456 setOperationAction(ISD::UREM, VT, Expand);
458 // Custom lower build_vector, constant pool spills, insert and
459 // extract vector elements:
460 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
461 setOperationAction(ISD::ConstantPool, VT, Custom);
462 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
463 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
464 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
465 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
468 setOperationAction(ISD::AND, MVT::v16i8, Custom);
469 setOperationAction(ISD::OR, MVT::v16i8, Custom);
470 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
471 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
473 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
475 setShiftAmountType(MVT::i32);
476 setBooleanContents(ZeroOrNegativeOneBooleanContent);
478 setStackPointerRegisterToSaveRestore(SPU::R1);
480 // We have target-specific dag combine patterns for the following nodes:
481 setTargetDAGCombine(ISD::ADD);
482 setTargetDAGCombine(ISD::ZERO_EXTEND);
483 setTargetDAGCombine(ISD::SIGN_EXTEND);
484 setTargetDAGCombine(ISD::ANY_EXTEND);
486 computeRegisterProperties();
488 // Set pre-RA register scheduler default to BURR, which produces slightly
489 // better code than the default (could also be TDRR, but TargetLowering.h
490 // needs a mod to support that model):
491 setSchedulingPreference(SchedulingForRegPressure);
495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
497 if (node_names.empty()) {
498 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
499 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
500 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
501 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
502 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
503 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
504 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
505 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
506 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
507 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
508 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
509 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
510 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
511 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
512 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
513 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
514 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
515 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
516 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
517 "SPUISD::ROTBYTES_LEFT_BITS";
518 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
519 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
520 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
521 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
522 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
525 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
527 return ((i != node_names.end()) ? i->second : 0);
530 /// getFunctionAlignment - Return the Log2 alignment of this function.
531 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
535 //===----------------------------------------------------------------------===//
536 // Return the Cell SPU's SETCC result type
537 //===----------------------------------------------------------------------===//
539 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
540 // i16 and i32 are valid SETCC result types
541 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
542 VT.getSimpleVT().SimpleTy :
546 //===----------------------------------------------------------------------===//
547 // Calling convention code:
548 //===----------------------------------------------------------------------===//
550 #include "SPUGenCallingConv.inc"
552 //===----------------------------------------------------------------------===//
553 // LowerOperation implementation
554 //===----------------------------------------------------------------------===//
556 /// Custom lower loads for CellSPU
558 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
559 within a 16-byte block, we have to rotate to extract the requested element.
561 For extending loads, we also want to ensure that the following sequence is
562 emitted, e.g. for MVT::f32 extending load to MVT::f64:
566 %2 v16i8,ch = rotate %1
567 %3 v4f8, ch = bitconvert %2
568 %4 f32 = vec2perfslot %3
569 %5 f64 = fp_extend %4
573 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
574 LoadSDNode *LN = cast<LoadSDNode>(Op);
575 SDValue the_chain = LN->getChain();
576 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
577 EVT InVT = LN->getMemoryVT();
578 EVT OutVT = Op.getValueType();
579 ISD::LoadExtType ExtType = LN->getExtensionType();
580 unsigned alignment = LN->getAlignment();
581 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
582 DebugLoc dl = Op.getDebugLoc();
584 switch (LN->getAddressingMode()) {
585 case ISD::UNINDEXED: {
587 SDValue basePtr = LN->getBasePtr();
590 if (alignment == 16) {
593 // Special cases for a known aligned load to simplify the base pointer
594 // and the rotation amount:
595 if (basePtr.getOpcode() == ISD::ADD
596 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
597 // Known offset into basePtr
598 int64_t offset = CN->getSExtValue();
599 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
604 rotate = DAG.getConstant(rotamt, MVT::i16);
606 // Simplify the base pointer for this case:
607 basePtr = basePtr.getOperand(0);
608 if ((offset & ~0xf) > 0) {
609 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
611 DAG.getConstant((offset & ~0xf), PtrVT));
613 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
614 || (basePtr.getOpcode() == SPUISD::IndirectAddr
615 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
616 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
617 // Plain aligned a-form address: rotate into preferred slot
618 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
619 int64_t rotamt = -vtm->prefslot_byte;
622 rotate = DAG.getConstant(rotamt, MVT::i16);
624 // Offset the rotate amount by the basePtr and the preferred slot
626 int64_t rotamt = -vtm->prefslot_byte;
629 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
631 DAG.getConstant(rotamt, PtrVT));
634 // Unaligned load: must be more pessimistic about addressing modes:
635 if (basePtr.getOpcode() == ISD::ADD) {
636 MachineFunction &MF = DAG.getMachineFunction();
637 MachineRegisterInfo &RegInfo = MF.getRegInfo();
638 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
641 SDValue Op0 = basePtr.getOperand(0);
642 SDValue Op1 = basePtr.getOperand(1);
644 if (isa<ConstantSDNode>(Op1)) {
645 // Convert the (add <ptr>, <const>) to an indirect address contained
646 // in a register. Note that this is done because we need to avoid
647 // creating a 0(reg) d-form address due to the SPU's block loads.
648 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
649 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
650 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
652 // Convert the (add <arg1>, <arg2>) to an indirect address, which
653 // will likely be lowered as a reg(reg) x-form address.
654 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
657 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
659 DAG.getConstant(0, PtrVT));
662 // Offset the rotate amount by the basePtr and the preferred slot
664 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
666 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
669 // Re-emit as a v16i8 vector load
670 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
671 LN->getSrcValue(), LN->getSrcValueOffset(),
672 LN->isVolatile(), 16);
675 the_chain = result.getValue(1);
677 // Rotate into the preferred slot:
678 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
679 result.getValue(0), rotate);
681 // Convert the loaded v16i8 vector to the appropriate vector type
682 // specified by the operand:
683 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
684 InVT, (128 / InVT.getSizeInBits()));
685 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
686 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
688 // Handle extending loads by extending the scalar result:
689 if (ExtType == ISD::SEXTLOAD) {
690 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
691 } else if (ExtType == ISD::ZEXTLOAD) {
692 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
693 } else if (ExtType == ISD::EXTLOAD) {
694 unsigned NewOpc = ISD::ANY_EXTEND;
696 if (OutVT.isFloatingPoint())
697 NewOpc = ISD::FP_EXTEND;
699 result = DAG.getNode(NewOpc, dl, OutVT, result);
702 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
703 SDValue retops[2] = {
708 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
709 retops, sizeof(retops) / sizeof(retops[0]));
716 case ISD::LAST_INDEXED_MODE:
719 raw_string_ostream Msg(msg);
720 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
722 Msg << (unsigned) LN->getAddressingMode();
723 llvm_report_error(Msg.str());
731 /// Custom lower stores for CellSPU
733 All CellSPU stores are aligned to 16-byte boundaries, so for elements
734 within a 16-byte block, we have to generate a shuffle to insert the
735 requested element into its place, then store the resulting block.
738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
739 StoreSDNode *SN = cast<StoreSDNode>(Op);
740 SDValue Value = SN->getValue();
741 EVT VT = Value.getValueType();
742 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
743 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
744 DebugLoc dl = Op.getDebugLoc();
745 unsigned alignment = SN->getAlignment();
747 switch (SN->getAddressingMode()) {
748 case ISD::UNINDEXED: {
749 // The vector type we really want to load from the 16-byte chunk.
750 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
751 VT, (128 / VT.getSizeInBits())),
752 stVecVT = EVT::getVectorVT(*DAG.getContext(),
753 StVT, (128 / StVT.getSizeInBits()));
755 SDValue alignLoadVec;
756 SDValue basePtr = SN->getBasePtr();
757 SDValue the_chain = SN->getChain();
758 SDValue insertEltOffs;
760 if (alignment == 16) {
763 // Special cases for a known aligned load to simplify the base pointer
764 // and insertion byte:
765 if (basePtr.getOpcode() == ISD::ADD
766 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
767 // Known offset into basePtr
768 int64_t offset = CN->getSExtValue();
770 // Simplify the base pointer for this case:
771 basePtr = basePtr.getOperand(0);
772 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
774 DAG.getConstant((offset & 0xf), PtrVT));
776 if ((offset & ~0xf) > 0) {
777 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
779 DAG.getConstant((offset & ~0xf), PtrVT));
782 // Otherwise, assume it's at byte 0 of basePtr
783 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
785 DAG.getConstant(0, PtrVT));
788 // Unaligned load: must be more pessimistic about addressing modes:
789 if (basePtr.getOpcode() == ISD::ADD) {
790 MachineFunction &MF = DAG.getMachineFunction();
791 MachineRegisterInfo &RegInfo = MF.getRegInfo();
792 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
795 SDValue Op0 = basePtr.getOperand(0);
796 SDValue Op1 = basePtr.getOperand(1);
798 if (isa<ConstantSDNode>(Op1)) {
799 // Convert the (add <ptr>, <const>) to an indirect address contained
800 // in a register. Note that this is done because we need to avoid
801 // creating a 0(reg) d-form address due to the SPU's block loads.
802 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
803 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
804 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
806 // Convert the (add <arg1>, <arg2>) to an indirect address, which
807 // will likely be lowered as a reg(reg) x-form address.
808 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
811 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
816 // Insertion point is solely determined by basePtr's contents
817 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
819 DAG.getConstant(0, PtrVT));
822 // Re-emit as a v16i8 vector load
823 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
824 SN->getSrcValue(), SN->getSrcValueOffset(),
825 SN->isVolatile(), 16);
828 the_chain = alignLoadVec.getValue(1);
830 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
831 SDValue theValue = SN->getValue();
835 && (theValue.getOpcode() == ISD::AssertZext
836 || theValue.getOpcode() == ISD::AssertSext)) {
837 // Drill down and get the value for zero- and sign-extended
839 theValue = theValue.getOperand(0);
842 // If the base pointer is already a D-form address, then just create
843 // a new D-form address with a slot offset and the orignal base pointer.
844 // Otherwise generate a D-form address with the slot offset relative
845 // to the stack pointer, which is always aligned.
847 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
848 errs() << "CellSPU LowerSTORE: basePtr = ";
849 basePtr.getNode()->dump(&DAG);
854 SDValue insertEltOp =
855 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
856 SDValue vectorizeOp =
857 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
859 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
860 vectorizeOp, alignLoadVec,
861 DAG.getNode(ISD::BIT_CONVERT, dl,
862 MVT::v4i32, insertEltOp));
864 result = DAG.getStore(the_chain, dl, result, basePtr,
865 LN->getSrcValue(), LN->getSrcValueOffset(),
866 LN->isVolatile(), LN->getAlignment());
868 #if 0 && !defined(NDEBUG)
869 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
870 const SDValue ¤tRoot = DAG.getRoot();
873 errs() << "------- CellSPU:LowerStore result:\n";
875 errs() << "-------\n";
876 DAG.setRoot(currentRoot);
887 case ISD::LAST_INDEXED_MODE:
890 raw_string_ostream Msg(msg);
891 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
893 Msg << (unsigned) SN->getAddressingMode();
894 llvm_report_error(Msg.str());
902 //! Generate the address of a constant pool entry.
904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
905 EVT PtrVT = Op.getValueType();
906 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
907 Constant *C = CP->getConstVal();
908 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
909 SDValue Zero = DAG.getConstant(0, PtrVT);
910 const TargetMachine &TM = DAG.getTarget();
911 // FIXME there is no actual debug info here
912 DebugLoc dl = Op.getDebugLoc();
914 if (TM.getRelocationModel() == Reloc::Static) {
915 if (!ST->usingLargeMem()) {
916 // Just return the SDValue with the constant pool address in it.
917 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
919 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
920 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
921 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
925 llvm_unreachable("LowerConstantPool: Relocation model other than static"
930 //! Alternate entry point for generating the address of a constant pool entry
932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
933 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
938 EVT PtrVT = Op.getValueType();
939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
940 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
941 SDValue Zero = DAG.getConstant(0, PtrVT);
942 const TargetMachine &TM = DAG.getTarget();
943 // FIXME there is no actual debug info here
944 DebugLoc dl = Op.getDebugLoc();
946 if (TM.getRelocationModel() == Reloc::Static) {
947 if (!ST->usingLargeMem()) {
948 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
950 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
951 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
952 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
956 llvm_unreachable("LowerJumpTable: Relocation model other than static"
962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
963 EVT PtrVT = Op.getValueType();
964 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
965 GlobalValue *GV = GSDN->getGlobal();
966 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
967 const TargetMachine &TM = DAG.getTarget();
968 SDValue Zero = DAG.getConstant(0, PtrVT);
969 // FIXME there is no actual debug info here
970 DebugLoc dl = Op.getDebugLoc();
972 if (TM.getRelocationModel() == Reloc::Static) {
973 if (!ST->usingLargeMem()) {
974 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
976 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
977 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
978 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
981 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
989 //! Custom lower double precision floating point constants
991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
992 EVT VT = Op.getValueType();
993 // FIXME there is no actual debug info here
994 DebugLoc dl = Op.getDebugLoc();
996 if (VT == MVT::f64) {
997 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1000 "LowerConstantFP: Node is not ConstantFPSDNode");
1002 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003 SDValue T = DAG.getConstant(dbits, MVT::i64);
1004 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1005 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1013 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1014 CallingConv::ID CallConv, bool isVarArg,
1015 const SmallVectorImpl<ISD::InputArg>
1017 DebugLoc dl, SelectionDAG &DAG,
1018 SmallVectorImpl<SDValue> &InVals) {
1020 MachineFunction &MF = DAG.getMachineFunction();
1021 MachineFrameInfo *MFI = MF.getFrameInfo();
1022 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1024 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1025 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1027 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1028 unsigned ArgRegIdx = 0;
1029 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1031 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1033 // Add DAG nodes to load the arguments or copy them out of registers.
1034 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1035 EVT ObjectVT = Ins[ArgNo].VT;
1036 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1039 if (ArgRegIdx < NumArgRegs) {
1040 const TargetRegisterClass *ArgRegClass;
1042 switch (ObjectVT.getSimpleVT().SimpleTy) {
1045 raw_string_ostream Msg(msg);
1046 Msg << "LowerFormalArguments Unhandled argument type: "
1047 << ObjectVT.getEVTString();
1048 llvm_report_error(Msg.str());
1051 ArgRegClass = &SPU::R8CRegClass;
1054 ArgRegClass = &SPU::R16CRegClass;
1057 ArgRegClass = &SPU::R32CRegClass;
1060 ArgRegClass = &SPU::R64CRegClass;
1063 ArgRegClass = &SPU::GPRCRegClass;
1066 ArgRegClass = &SPU::R32FPRegClass;
1069 ArgRegClass = &SPU::R64FPRegClass;
1077 ArgRegClass = &SPU::VECREGRegClass;
1081 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1086 // We need to load the argument to a virtual register if we determined
1087 // above that we ran out of physical registers of the appropriate type
1088 // or we're forced to do vararg
1089 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1090 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1092 ArgOffset += StackSlotSize;
1095 InVals.push_back(ArgVal);
1097 Chain = ArgVal.getOperand(0);
1102 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103 // We will spill (79-3)+1 registers to the stack
1104 SmallVector<SDValue, 79-3+1> MemOps;
1106 // Create the frame slot
1108 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1111 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1112 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1113 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1114 Chain = Store.getOperand(0);
1115 MemOps.push_back(Store);
1117 // Increment address by stack slot size for the next stored argument
1118 ArgOffset += StackSlotSize;
1120 if (!MemOps.empty())
1121 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122 &MemOps[0], MemOps.size());
1128 /// isLSAAddress - Return the immediate to use if the specified
1129 /// value is representable as a LSA address.
1130 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1131 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1134 int Addr = C->getZExtValue();
1135 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1136 (Addr << 14 >> 14) != Addr)
1137 return 0; // Top 14 bits have to be sext of immediate.
1139 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1143 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1144 CallingConv::ID CallConv, bool isVarArg,
1146 const SmallVectorImpl<ISD::OutputArg> &Outs,
1147 const SmallVectorImpl<ISD::InputArg> &Ins,
1148 DebugLoc dl, SelectionDAG &DAG,
1149 SmallVectorImpl<SDValue> &InVals) {
1151 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1152 unsigned NumOps = Outs.size();
1153 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1157 // Handy pointer type
1158 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1160 // Set up a copy of the stack pointer for use loading and storing any
1161 // arguments that may not fit in the registers available for argument
1163 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1165 // Figure out which arguments are going to go in registers, and which in
1167 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1168 unsigned ArgRegIdx = 0;
1170 // Keep track of registers passing arguments
1171 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1172 // And the arguments passed on the stack
1173 SmallVector<SDValue, 8> MemOpChains;
1175 for (unsigned i = 0; i != NumOps; ++i) {
1176 SDValue Arg = Outs[i].Val;
1178 // PtrOff will be used to store the current argument to the stack if a
1179 // register cannot be found for it.
1180 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1181 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1183 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1184 default: llvm_unreachable("Unexpected ValueType for argument!");
1190 if (ArgRegIdx != NumArgRegs) {
1191 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1193 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1194 ArgOffset += StackSlotSize;
1199 if (ArgRegIdx != NumArgRegs) {
1200 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1202 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1203 ArgOffset += StackSlotSize;
1212 if (ArgRegIdx != NumArgRegs) {
1213 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1215 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1216 ArgOffset += StackSlotSize;
1222 // Accumulate how many bytes are to be pushed on the stack, including the
1223 // linkage area, and parameter passing area. According to the SPU ABI,
1224 // we minimally need space for [LR] and [SP].
1225 unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1227 // Insert a call sequence start
1228 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1231 if (!MemOpChains.empty()) {
1232 // Adjust the stack pointer for the stack arguments.
1233 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1234 &MemOpChains[0], MemOpChains.size());
1237 // Build a sequence of copy-to-reg nodes chained together with token chain
1238 // and flag operands which copy the outgoing args into the appropriate regs.
1240 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1241 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1242 RegsToPass[i].second, InFlag);
1243 InFlag = Chain.getValue(1);
1246 SmallVector<SDValue, 8> Ops;
1247 unsigned CallOpc = SPUISD::CALL;
1249 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1250 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1251 // node so that legalize doesn't hack it.
1252 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1253 GlobalValue *GV = G->getGlobal();
1254 EVT CalleeVT = Callee.getValueType();
1255 SDValue Zero = DAG.getConstant(0, PtrVT);
1256 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1258 if (!ST->usingLargeMem()) {
1259 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1260 // style calls, otherwise, external symbols are BRASL calls. This assumes
1261 // that declared/defined symbols are in the same compilation unit and can
1262 // be reached through PC-relative jumps.
1265 // This may be an unsafe assumption for JIT and really large compilation
1267 if (GV->isDeclaration()) {
1268 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1270 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1273 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1275 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1277 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1278 EVT CalleeVT = Callee.getValueType();
1279 SDValue Zero = DAG.getConstant(0, PtrVT);
1280 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1281 Callee.getValueType());
1283 if (!ST->usingLargeMem()) {
1284 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1286 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1288 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1289 // If this is an absolute destination address that appears to be a legal
1290 // local store address, use the munged value.
1291 Callee = SDValue(Dest, 0);
1294 Ops.push_back(Chain);
1295 Ops.push_back(Callee);
1297 // Add argument registers to the end of the list so that they are known live
1299 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1300 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1301 RegsToPass[i].second.getValueType()));
1303 if (InFlag.getNode())
1304 Ops.push_back(InFlag);
1305 // Returns a chain and a flag for retval copy to use.
1306 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1307 &Ops[0], Ops.size());
1308 InFlag = Chain.getValue(1);
1310 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1311 DAG.getIntPtrConstant(0, true), InFlag);
1313 InFlag = Chain.getValue(1);
1315 // If the function returns void, just return the chain.
1319 // If the call has results, copy the values out of the ret val registers.
1320 switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1321 default: llvm_unreachable("Unexpected ret value!");
1322 case MVT::Other: break;
1324 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1325 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1326 MVT::i32, InFlag).getValue(1);
1327 InVals.push_back(Chain.getValue(0));
1328 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1329 Chain.getValue(2)).getValue(1);
1330 InVals.push_back(Chain.getValue(0));
1332 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333 InFlag).getValue(1);
1334 InVals.push_back(Chain.getValue(0));
1338 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1339 InFlag).getValue(1);
1340 InVals.push_back(Chain.getValue(0));
1343 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1344 InFlag).getValue(1);
1345 InVals.push_back(Chain.getValue(0));
1349 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1350 InFlag).getValue(1);
1351 InVals.push_back(Chain.getValue(0));
1359 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1360 InFlag).getValue(1);
1361 InVals.push_back(Chain.getValue(0));
1369 SPUTargetLowering::LowerReturn(SDValue Chain,
1370 CallingConv::ID CallConv, bool isVarArg,
1371 const SmallVectorImpl<ISD::OutputArg> &Outs,
1372 DebugLoc dl, SelectionDAG &DAG) {
1374 SmallVector<CCValAssign, 16> RVLocs;
1375 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1376 RVLocs, *DAG.getContext());
1377 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1379 // If this is the first return lowered for this function, add the regs to the
1380 // liveout set for the function.
1381 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1382 for (unsigned i = 0; i != RVLocs.size(); ++i)
1383 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1388 // Copy the result values into the output registers.
1389 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1390 CCValAssign &VA = RVLocs[i];
1391 assert(VA.isRegLoc() && "Can only return in registers!");
1392 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1394 Flag = Chain.getValue(1);
1398 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1400 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1404 //===----------------------------------------------------------------------===//
1405 // Vector related lowering:
1406 //===----------------------------------------------------------------------===//
1408 static ConstantSDNode *
1409 getVecImm(SDNode *N) {
1410 SDValue OpVal(0, 0);
1412 // Check to see if this buildvec has a single non-undef value in its elements.
1413 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1414 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1415 if (OpVal.getNode() == 0)
1416 OpVal = N->getOperand(i);
1417 else if (OpVal != N->getOperand(i))
1421 if (OpVal.getNode() != 0) {
1422 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1430 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1431 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1433 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 uint64_t Value = CN->getZExtValue();
1437 if (ValueType == MVT::i64) {
1438 uint64_t UValue = CN->getZExtValue();
1439 uint32_t upper = uint32_t(UValue >> 32);
1440 uint32_t lower = uint32_t(UValue);
1443 Value = Value >> 32;
1445 if (Value <= 0x3ffff)
1446 return DAG.getTargetConstant(Value, ValueType);
1452 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1453 /// and the value fits into a signed 16-bit constant, and if so, return the
1455 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1457 if (ConstantSDNode *CN = getVecImm(N)) {
1458 int64_t Value = CN->getSExtValue();
1459 if (ValueType == MVT::i64) {
1460 uint64_t UValue = CN->getZExtValue();
1461 uint32_t upper = uint32_t(UValue >> 32);
1462 uint32_t lower = uint32_t(UValue);
1465 Value = Value >> 32;
1467 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1468 return DAG.getTargetConstant(Value, ValueType);
1475 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1476 /// and the value fits into a signed 10-bit constant, and if so, return the
1478 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1480 if (ConstantSDNode *CN = getVecImm(N)) {
1481 int64_t Value = CN->getSExtValue();
1482 if (ValueType == MVT::i64) {
1483 uint64_t UValue = CN->getZExtValue();
1484 uint32_t upper = uint32_t(UValue >> 32);
1485 uint32_t lower = uint32_t(UValue);
1488 Value = Value >> 32;
1490 if (isS10Constant(Value))
1491 return DAG.getTargetConstant(Value, ValueType);
1497 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1498 /// and the value fits into a signed 8-bit constant, and if so, return the
1501 /// @note: The incoming vector is v16i8 because that's the only way we can load
1502 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1504 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1506 if (ConstantSDNode *CN = getVecImm(N)) {
1507 int Value = (int) CN->getZExtValue();
1508 if (ValueType == MVT::i16
1509 && Value <= 0xffff /* truncated from uint64_t */
1510 && ((short) Value >> 8) == ((short) Value & 0xff))
1511 return DAG.getTargetConstant(Value & 0xff, ValueType);
1512 else if (ValueType == MVT::i8
1513 && (Value & 0xff) == Value)
1514 return DAG.getTargetConstant(Value, ValueType);
1520 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1521 /// and the value fits into a signed 16-bit constant, and if so, return the
1523 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1525 if (ConstantSDNode *CN = getVecImm(N)) {
1526 uint64_t Value = CN->getZExtValue();
1527 if ((ValueType == MVT::i32
1528 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1529 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1530 return DAG.getTargetConstant(Value >> 16, ValueType);
1536 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1537 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1538 if (ConstantSDNode *CN = getVecImm(N)) {
1539 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1545 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1546 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1547 if (ConstantSDNode *CN = getVecImm(N)) {
1548 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1554 //! Lower a BUILD_VECTOR instruction creatively:
1556 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1557 EVT VT = Op.getValueType();
1558 EVT EltVT = VT.getVectorElementType();
1559 DebugLoc dl = Op.getDebugLoc();
1560 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1561 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1562 unsigned minSplatBits = EltVT.getSizeInBits();
1564 if (minSplatBits < 16)
1567 APInt APSplatBits, APSplatUndef;
1568 unsigned SplatBitSize;
1571 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1572 HasAnyUndefs, minSplatBits)
1573 || minSplatBits < SplatBitSize)
1574 return SDValue(); // Wasn't a constant vector or splat exceeded min
1576 uint64_t SplatBits = APSplatBits.getZExtValue();
1578 switch (VT.getSimpleVT().SimpleTy) {
1581 raw_string_ostream Msg(msg);
1582 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1583 << VT.getEVTString();
1584 llvm_report_error(Msg.str());
1588 uint32_t Value32 = uint32_t(SplatBits);
1589 assert(SplatBitSize == 32
1590 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592 SDValue T = DAG.getConstant(Value32, MVT::i32);
1593 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1594 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1598 uint64_t f64val = uint64_t(SplatBits);
1599 assert(SplatBitSize == 64
1600 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDValue T = DAG.getConstant(f64val, MVT::i64);
1603 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1604 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1608 // 8-bit constants have to be expanded to 16-bits
1609 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1610 SmallVector<SDValue, 8> Ops;
1612 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1613 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1614 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1617 unsigned short Value16 = SplatBits;
1618 SDValue T = DAG.getConstant(Value16, EltVT);
1619 SmallVector<SDValue, 8> Ops;
1622 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1625 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1626 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1629 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1633 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1643 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1645 uint32_t upper = uint32_t(SplatVal >> 32);
1646 uint32_t lower = uint32_t(SplatVal);
1648 if (upper == lower) {
1649 // Magic constant that can be matched by IL, ILA, et. al.
1650 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1651 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1652 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1653 Val, Val, Val, Val));
1655 bool upper_special, lower_special;
1657 // NOTE: This code creates common-case shuffle masks that can be easily
1658 // detected as common expressions. It is not attempting to create highly
1659 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1661 // Detect if the upper or lower half is a special shuffle mask pattern:
1662 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1663 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1665 // Both upper and lower are special, lower to a constant pool load:
1666 if (lower_special && upper_special) {
1667 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1668 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1669 SplatValCN, SplatValCN);
1674 SmallVector<SDValue, 16> ShufBytes;
1677 // Create lower vector if not a special pattern
1678 if (!lower_special) {
1679 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1680 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1681 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1682 LO32C, LO32C, LO32C, LO32C));
1685 // Create upper vector if not a special pattern
1686 if (!upper_special) {
1687 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1688 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1689 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1690 HI32C, HI32C, HI32C, HI32C));
1693 // If either upper or lower are special, then the two input operands are
1694 // the same (basically, one of them is a "don't care")
1700 for (int i = 0; i < 4; ++i) {
1702 for (int j = 0; j < 4; ++j) {
1704 bool process_upper, process_lower;
1706 process_upper = (upper_special && (i & 1) == 0);
1707 process_lower = (lower_special && (i & 1) == 1);
1709 if (process_upper || process_lower) {
1710 if ((process_upper && upper == 0)
1711 || (process_lower && lower == 0))
1713 else if ((process_upper && upper == 0xffffffff)
1714 || (process_lower && lower == 0xffffffff))
1716 else if ((process_upper && upper == 0x80000000)
1717 || (process_lower && lower == 0x80000000))
1718 val |= (j == 0 ? 0xe0 : 0x80);
1720 val |= i * 4 + j + ((i & 1) * 16);
1723 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1726 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1727 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1728 &ShufBytes[0], ShufBytes.size()));
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1747 SDValue V1 = Op.getOperand(0);
1748 SDValue V2 = Op.getOperand(1);
1749 DebugLoc dl = Op.getDebugLoc();
1751 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1753 // If we have a single element being moved from V1 to V2, this can be handled
1754 // using the C*[DX] compute mask instructions, but the vector elements have
1755 // to be monotonically increasing with one exception element.
1756 EVT VecVT = V1.getValueType();
1757 EVT EltVT = VecVT.getVectorElementType();
1758 unsigned EltsFromV2 = 0;
1760 unsigned V2EltIdx0 = 0;
1761 unsigned CurrElt = 0;
1762 unsigned MaxElts = VecVT.getVectorNumElements();
1763 unsigned PrevElt = 0;
1765 bool monotonic = true;
1768 if (EltVT == MVT::i8) {
1770 } else if (EltVT == MVT::i16) {
1772 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1774 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1777 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1779 for (unsigned i = 0; i != MaxElts; ++i) {
1780 if (SVN->getMaskElt(i) < 0)
1783 unsigned SrcElt = SVN->getMaskElt(i);
1786 if (SrcElt >= V2EltIdx0) {
1787 if (1 >= (++EltsFromV2)) {
1788 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790 } else if (CurrElt != SrcElt) {
1798 if (PrevElt > 0 && SrcElt < MaxElts) {
1799 if ((PrevElt == SrcElt - 1)
1800 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1807 } else if (PrevElt == 0) {
1808 // First time through, need to keep track of previous element
1811 // This isn't a rotation, takes elements from vector 2
1817 if (EltsFromV2 == 1 && monotonic) {
1818 // Compute mask and shuffle
1819 MachineFunction &MF = DAG.getMachineFunction();
1820 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1821 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1822 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1823 // Initialize temporary register to 0
1824 SDValue InitTempReg =
1825 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1826 // Copy register's contents as index in SHUFFLE_MASK:
1827 SDValue ShufMaskOp =
1828 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1829 DAG.getTargetConstant(V2Elt, MVT::i32),
1830 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1831 // Use shuffle mask in SHUFB synthetic instruction:
1832 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1834 } else if (rotate) {
1835 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1837 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1838 V1, DAG.getConstant(rotamt, MVT::i16));
1840 // Convert the SHUFFLE_VECTOR mask's input element units to the
1842 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1844 SmallVector<SDValue, 16> ResultMask;
1845 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1846 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1848 for (unsigned j = 0; j < BytesPerElement; ++j)
1849 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1852 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1853 &ResultMask[0], ResultMask.size());
1854 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1858 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1859 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1860 DebugLoc dl = Op.getDebugLoc();
1862 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1863 // For a constant, build the appropriate constant vector, which will
1864 // eventually simplify to a vector register load.
1866 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1867 SmallVector<SDValue, 16> ConstVecValues;
1871 // Create a constant vector:
1872 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1873 default: llvm_unreachable("Unexpected constant value type in "
1874 "LowerSCALAR_TO_VECTOR");
1875 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1876 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1877 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1878 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1879 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1880 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1883 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1884 for (size_t j = 0; j < n_copies; ++j)
1885 ConstVecValues.push_back(CValue);
1887 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1888 &ConstVecValues[0], ConstVecValues.size());
1890 // Otherwise, copy the value from one register to another:
1891 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1892 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1899 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1906 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1907 EVT VT = Op.getValueType();
1908 SDValue N = Op.getOperand(0);
1909 SDValue Elt = Op.getOperand(1);
1910 DebugLoc dl = Op.getDebugLoc();
1913 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1914 // Constant argument:
1915 int EltNo = (int) C->getZExtValue();
1918 if (VT == MVT::i8 && EltNo >= 16)
1919 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1920 else if (VT == MVT::i16 && EltNo >= 8)
1921 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1922 else if (VT == MVT::i32 && EltNo >= 4)
1923 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1924 else if (VT == MVT::i64 && EltNo >= 2)
1925 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1927 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1928 // i32 and i64: Element 0 is the preferred slot
1929 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1932 // Need to generate shuffle mask and extract:
1933 int prefslot_begin = -1, prefslot_end = -1;
1934 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1936 switch (VT.getSimpleVT().SimpleTy) {
1938 assert(false && "Invalid value type!");
1940 prefslot_begin = prefslot_end = 3;
1944 prefslot_begin = 2; prefslot_end = 3;
1949 prefslot_begin = 0; prefslot_end = 3;
1954 prefslot_begin = 0; prefslot_end = 7;
1959 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1960 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1962 unsigned int ShufBytes[16] = {
1963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1965 for (int i = 0; i < 16; ++i) {
1966 // zero fill uppper part of preferred slot, don't care about the
1968 unsigned int mask_val;
1969 if (i <= prefslot_end) {
1971 ((i < prefslot_begin)
1973 : elt_byte + (i - prefslot_begin));
1975 ShufBytes[i] = mask_val;
1977 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1980 SDValue ShufMask[4];
1981 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1982 unsigned bidx = i * 4;
1983 unsigned int bits = ((ShufBytes[bidx] << 24) |
1984 (ShufBytes[bidx+1] << 16) |
1985 (ShufBytes[bidx+2] << 8) |
1987 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1990 SDValue ShufMaskVec =
1991 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1992 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1994 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1995 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1996 N, N, ShufMaskVec));
1998 // Variable index: Rotate the requested element into slot 0, then replicate
1999 // slot 0 across the vector
2000 EVT VecVT = N.getValueType();
2001 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2002 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2006 // Make life easier by making sure the index is zero-extended to i32
2007 if (Elt.getValueType() != MVT::i32)
2008 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2010 // Scale the index to a bit/byte shift quantity
2012 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2013 unsigned scaleShift = scaleFactor.logBase2();
2016 if (scaleShift > 0) {
2017 // Scale the shift factor:
2018 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2019 DAG.getConstant(scaleShift, MVT::i32));
2022 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2024 // Replicate the bytes starting at byte 0 across the entire vector (for
2025 // consistency with the notion of a unified register set)
2028 switch (VT.getSimpleVT().SimpleTy) {
2030 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2034 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2035 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2036 factor, factor, factor, factor);
2040 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2041 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2042 factor, factor, factor, factor);
2047 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2048 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2049 factor, factor, factor, factor);
2054 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2055 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2056 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2057 loFactor, hiFactor, loFactor, hiFactor);
2062 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2063 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2064 vecShift, vecShift, replicate));
2070 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071 SDValue VecOp = Op.getOperand(0);
2072 SDValue ValOp = Op.getOperand(1);
2073 SDValue IdxOp = Op.getOperand(2);
2074 DebugLoc dl = Op.getDebugLoc();
2075 EVT VT = Op.getValueType();
2077 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2078 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2080 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2081 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2082 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2083 DAG.getRegister(SPU::R1, PtrVT),
2084 DAG.getConstant(CN->getSExtValue(), PtrVT));
2085 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2088 DAG.getNode(SPUISD::SHUFB, dl, VT,
2089 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2091 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2096 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2097 const TargetLowering &TLI)
2099 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2100 DebugLoc dl = Op.getDebugLoc();
2101 EVT ShiftVT = TLI.getShiftAmountTy();
2103 assert(Op.getValueType() == MVT::i8);
2106 llvm_unreachable("Unhandled i8 math operator");
2110 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2112 SDValue N1 = Op.getOperand(1);
2113 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2114 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2115 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2116 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2121 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2123 SDValue N1 = Op.getOperand(1);
2124 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2125 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2126 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2127 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2131 SDValue N1 = Op.getOperand(1);
2132 EVT N1VT = N1.getValueType();
2134 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2135 if (!N1VT.bitsEq(ShiftVT)) {
2136 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2139 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2142 // Replicate lower 8-bits into upper 8:
2144 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2145 DAG.getNode(ISD::SHL, dl, MVT::i16,
2146 N0, DAG.getConstant(8, MVT::i32)));
2148 // Truncate back down to i8
2149 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2150 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2154 SDValue N1 = Op.getOperand(1);
2155 EVT N1VT = N1.getValueType();
2157 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2158 if (!N1VT.bitsEq(ShiftVT)) {
2159 unsigned N1Opc = ISD::ZERO_EXTEND;
2161 if (N1.getValueType().bitsGT(ShiftVT))
2162 N1Opc = ISD::TRUNCATE;
2164 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2167 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2168 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2171 SDValue N1 = Op.getOperand(1);
2172 EVT N1VT = N1.getValueType();
2174 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2175 if (!N1VT.bitsEq(ShiftVT)) {
2176 unsigned N1Opc = ISD::SIGN_EXTEND;
2178 if (N1VT.bitsGT(ShiftVT))
2179 N1Opc = ISD::TRUNCATE;
2180 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2183 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2184 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2187 SDValue N1 = Op.getOperand(1);
2189 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2190 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2191 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2192 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2200 //! Lower byte immediate operations for v16i8 vectors:
2202 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2205 EVT VT = Op.getValueType();
2206 DebugLoc dl = Op.getDebugLoc();
2208 ConstVec = Op.getOperand(0);
2209 Arg = Op.getOperand(1);
2210 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2211 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2212 ConstVec = ConstVec.getOperand(0);
2214 ConstVec = Op.getOperand(1);
2215 Arg = Op.getOperand(0);
2216 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2217 ConstVec = ConstVec.getOperand(0);
2222 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2223 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2224 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2226 APInt APSplatBits, APSplatUndef;
2227 unsigned SplatBitSize;
2229 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2231 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2232 HasAnyUndefs, minSplatBits)
2233 && minSplatBits <= SplatBitSize) {
2234 uint64_t SplatBits = APSplatBits.getZExtValue();
2235 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2237 SmallVector<SDValue, 16> tcVec;
2238 tcVec.assign(16, tc);
2239 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2240 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2244 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2245 // lowered. Return the operation, rather than a null SDValue.
2249 //! Custom lowering for CTPOP (count population)
2251 Custom lowering code that counts the number ones in the input
2252 operand. SPU has such an instruction, but it counts the number of
2253 ones per byte, which then have to be accumulated.
2255 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2256 EVT VT = Op.getValueType();
2257 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2258 VT, (128 / VT.getSizeInBits()));
2259 DebugLoc dl = Op.getDebugLoc();
2261 switch (VT.getSimpleVT().SimpleTy) {
2263 assert(false && "Invalid value type!");
2265 SDValue N = Op.getOperand(0);
2266 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2268 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2269 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2271 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2275 MachineFunction &MF = DAG.getMachineFunction();
2276 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2278 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2280 SDValue N = Op.getOperand(0);
2281 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2282 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2283 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2285 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2286 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2288 // CNTB_result becomes the chain to which all of the virtual registers
2289 // CNTB_reg, SUM1_reg become associated:
2290 SDValue CNTB_result =
2291 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2293 SDValue CNTB_rescopy =
2294 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2296 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2298 return DAG.getNode(ISD::AND, dl, MVT::i16,
2299 DAG.getNode(ISD::ADD, dl, MVT::i16,
2300 DAG.getNode(ISD::SRL, dl, MVT::i16,
2307 MachineFunction &MF = DAG.getMachineFunction();
2308 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2310 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2311 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2313 SDValue N = Op.getOperand(0);
2314 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2315 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2316 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2317 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2319 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2320 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2322 // CNTB_result becomes the chain to which all of the virtual registers
2323 // CNTB_reg, SUM1_reg become associated:
2324 SDValue CNTB_result =
2325 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2327 SDValue CNTB_rescopy =
2328 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2331 DAG.getNode(ISD::SRL, dl, MVT::i32,
2332 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2336 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2337 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2339 SDValue Sum1_rescopy =
2340 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2343 DAG.getNode(ISD::SRL, dl, MVT::i32,
2344 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2347 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2348 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2350 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2360 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2362 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2363 All conversions to i64 are expanded to a libcall.
2365 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2366 SPUTargetLowering &TLI) {
2367 EVT OpVT = Op.getValueType();
2368 SDValue Op0 = Op.getOperand(0);
2369 EVT Op0VT = Op0.getValueType();
2371 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2372 || OpVT == MVT::i64) {
2373 // Convert f32 / f64 to i32 / i64 via libcall.
2375 (Op.getOpcode() == ISD::FP_TO_SINT)
2376 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2377 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2378 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2380 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2386 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2388 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2389 All conversions from i64 are expanded to a libcall.
2391 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2392 SPUTargetLowering &TLI) {
2393 EVT OpVT = Op.getValueType();
2394 SDValue Op0 = Op.getOperand(0);
2395 EVT Op0VT = Op0.getValueType();
2397 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2398 || Op0VT == MVT::i64) {
2399 // Convert i32, i64 to f64 via libcall:
2401 (Op.getOpcode() == ISD::SINT_TO_FP)
2402 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2403 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2404 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2406 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2412 //! Lower ISD::SETCC
2414 This handles MVT::f64 (double floating point) condition lowering
2416 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2417 const TargetLowering &TLI) {
2418 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2419 DebugLoc dl = Op.getDebugLoc();
2420 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2422 SDValue lhs = Op.getOperand(0);
2423 SDValue rhs = Op.getOperand(1);
2424 EVT lhsVT = lhs.getValueType();
2425 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2427 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2428 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2429 EVT IntVT(MVT::i64);
2431 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2432 // selected to a NOP:
2433 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2435 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2436 DAG.getNode(ISD::SRL, dl, IntVT,
2437 i64lhs, DAG.getConstant(32, MVT::i32)));
2438 SDValue lhsHi32abs =
2439 DAG.getNode(ISD::AND, dl, MVT::i32,
2440 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2442 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2444 // SETO and SETUO only use the lhs operand:
2445 if (CC->get() == ISD::SETO) {
2446 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2448 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2449 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2450 DAG.getSetCC(dl, ccResultVT,
2451 lhs, DAG.getConstantFP(0.0, lhsVT),
2453 DAG.getConstant(ccResultAllOnes, ccResultVT));
2454 } else if (CC->get() == ISD::SETUO) {
2455 // Evaluates to true if Op0 is [SQ]NaN
2456 return DAG.getNode(ISD::AND, dl, ccResultVT,
2457 DAG.getSetCC(dl, ccResultVT,
2459 DAG.getConstant(0x7ff00000, MVT::i32),
2461 DAG.getSetCC(dl, ccResultVT,
2463 DAG.getConstant(0, MVT::i32),
2467 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2469 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2470 DAG.getNode(ISD::SRL, dl, IntVT,
2471 i64rhs, DAG.getConstant(32, MVT::i32)));
2473 // If a value is negative, subtract from the sign magnitude constant:
2474 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2476 // Convert the sign-magnitude representation into 2's complement:
2477 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2478 lhsHi32, DAG.getConstant(31, MVT::i32));
2479 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2481 DAG.getNode(ISD::SELECT, dl, IntVT,
2482 lhsSelectMask, lhsSignMag2TC, i64lhs);
2484 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2485 rhsHi32, DAG.getConstant(31, MVT::i32));
2486 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2488 DAG.getNode(ISD::SELECT, dl, IntVT,
2489 rhsSelectMask, rhsSignMag2TC, i64rhs);
2493 switch (CC->get()) {
2496 compareOp = ISD::SETEQ; break;
2499 compareOp = ISD::SETGT; break;
2502 compareOp = ISD::SETGE; break;
2505 compareOp = ISD::SETLT; break;
2508 compareOp = ISD::SETLE; break;
2511 compareOp = ISD::SETNE; break;
2513 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2517 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2518 (ISD::CondCode) compareOp);
2520 if ((CC->get() & 0x8) == 0) {
2521 // Ordered comparison:
2522 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2523 lhs, DAG.getConstantFP(0.0, MVT::f64),
2525 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2526 rhs, DAG.getConstantFP(0.0, MVT::f64),
2528 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2530 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2536 //! Lower ISD::SELECT_CC
2538 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2541 \note Need to revisit this in the future: if the code path through the true
2542 and false value computations is longer than the latency of a branch (6
2543 cycles), then it would be more advantageous to branch and insert a new basic
2544 block and branch on the condition. However, this code does not make that
2545 assumption, given the simplisitc uses so far.
2548 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2549 const TargetLowering &TLI) {
2550 EVT VT = Op.getValueType();
2551 SDValue lhs = Op.getOperand(0);
2552 SDValue rhs = Op.getOperand(1);
2553 SDValue trueval = Op.getOperand(2);
2554 SDValue falseval = Op.getOperand(3);
2555 SDValue condition = Op.getOperand(4);
2556 DebugLoc dl = Op.getDebugLoc();
2558 // NOTE: SELB's arguments: $rA, $rB, $mask
2560 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2561 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2562 // condition was true and 0s where the condition was false. Hence, the
2563 // arguments to SELB get reversed.
2565 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2566 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2567 // with another "cannot select select_cc" assert:
2569 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2570 TLI.getSetCCResultType(Op.getValueType()),
2571 lhs, rhs, condition);
2572 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2575 //! Custom lower ISD::TRUNCATE
2576 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2578 // Type to truncate to
2579 EVT VT = Op.getValueType();
2580 MVT simpleVT = VT.getSimpleVT();
2581 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2582 VT, (128 / VT.getSizeInBits()));
2583 DebugLoc dl = Op.getDebugLoc();
2585 // Type to truncate from
2586 SDValue Op0 = Op.getOperand(0);
2587 EVT Op0VT = Op0.getValueType();
2589 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2590 // Create shuffle mask, least significant doubleword of quadword
2591 unsigned maskHigh = 0x08090a0b;
2592 unsigned maskLow = 0x0c0d0e0f;
2593 // Use a shuffle to perform the truncation
2594 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2595 DAG.getConstant(maskHigh, MVT::i32),
2596 DAG.getConstant(maskLow, MVT::i32),
2597 DAG.getConstant(maskHigh, MVT::i32),
2598 DAG.getConstant(maskLow, MVT::i32));
2600 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2601 Op0, Op0, shufMask);
2603 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2606 return SDValue(); // Leave the truncate unmolested
2610 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2611 * algorithm is to duplicate the sign bit using rotmai to generate at
2612 * least one byte full of sign bits. Then propagate the "sign-byte" into
2613 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2615 * @param Op The sext operand
2616 * @param DAG The current DAG
2617 * @return The SDValue with the entire instruction sequence
2619 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2621 DebugLoc dl = Op.getDebugLoc();
2623 // Type to extend to
2624 MVT OpVT = Op.getValueType().getSimpleVT();
2625 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2626 OpVT, (128 / OpVT.getSizeInBits()));
2628 // Type to extend from
2629 SDValue Op0 = Op.getOperand(0);
2630 MVT Op0VT = Op0.getValueType().getSimpleVT();
2632 // The type to extend to needs to be a i128 and
2633 // the type to extend from needs to be i64 or i32.
2634 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2635 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2637 // Create shuffle mask
2638 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2639 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2640 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2641 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2642 DAG.getConstant(mask1, MVT::i32),
2643 DAG.getConstant(mask1, MVT::i32),
2644 DAG.getConstant(mask2, MVT::i32),
2645 DAG.getConstant(mask3, MVT::i32));
2647 // Word wise arithmetic right shift to generate at least one byte
2648 // that contains sign bits.
2649 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2650 SDValue sraVal = DAG.getNode(ISD::SRA,
2653 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2654 DAG.getConstant(31, MVT::i32));
2656 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2657 // and the input value into the lower 64 bits.
2658 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2659 DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2661 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2664 //! Custom (target-specific) lowering entry point
2666 This is where LLVM's DAG selection process calls to do target-specific
2670 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2672 unsigned Opc = (unsigned) Op.getOpcode();
2673 EVT VT = Op.getValueType();
2678 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2679 errs() << "Op.getOpcode() = " << Opc << "\n";
2680 errs() << "*Op.getNode():\n";
2681 Op.getNode()->dump();
2683 llvm_unreachable(0);
2689 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2691 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2692 case ISD::ConstantPool:
2693 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2694 case ISD::GlobalAddress:
2695 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2696 case ISD::JumpTable:
2697 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2698 case ISD::ConstantFP:
2699 return LowerConstantFP(Op, DAG);
2701 // i8, i64 math ops:
2710 return LowerI8Math(Op, DAG, Opc, *this);
2714 case ISD::FP_TO_SINT:
2715 case ISD::FP_TO_UINT:
2716 return LowerFP_TO_INT(Op, DAG, *this);
2718 case ISD::SINT_TO_FP:
2719 case ISD::UINT_TO_FP:
2720 return LowerINT_TO_FP(Op, DAG, *this);
2722 // Vector-related lowering.
2723 case ISD::BUILD_VECTOR:
2724 return LowerBUILD_VECTOR(Op, DAG);
2725 case ISD::SCALAR_TO_VECTOR:
2726 return LowerSCALAR_TO_VECTOR(Op, DAG);
2727 case ISD::VECTOR_SHUFFLE:
2728 return LowerVECTOR_SHUFFLE(Op, DAG);
2729 case ISD::EXTRACT_VECTOR_ELT:
2730 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2731 case ISD::INSERT_VECTOR_ELT:
2732 return LowerINSERT_VECTOR_ELT(Op, DAG);
2734 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2738 return LowerByteImmed(Op, DAG);
2740 // Vector and i8 multiply:
2743 return LowerI8Math(Op, DAG, Opc, *this);
2746 return LowerCTPOP(Op, DAG);
2748 case ISD::SELECT_CC:
2749 return LowerSELECT_CC(Op, DAG, *this);
2752 return LowerSETCC(Op, DAG, *this);
2755 return LowerTRUNCATE(Op, DAG);
2757 case ISD::SIGN_EXTEND:
2758 return LowerSIGN_EXTEND(Op, DAG);
2764 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2765 SmallVectorImpl<SDValue>&Results,
2769 unsigned Opc = (unsigned) N->getOpcode();
2770 EVT OpVT = N->getValueType(0);
2774 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2775 errs() << "Op.getOpcode() = " << Opc << "\n";
2776 errs() << "*Op.getNode():\n";
2784 /* Otherwise, return unchanged */
2787 //===----------------------------------------------------------------------===//
2788 // Target Optimization Hooks
2789 //===----------------------------------------------------------------------===//
2792 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2795 TargetMachine &TM = getTargetMachine();
2797 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2798 SelectionDAG &DAG = DCI.DAG;
2799 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2800 EVT NodeVT = N->getValueType(0); // The node's value type
2801 EVT Op0VT = Op0.getValueType(); // The first operand's result
2802 SDValue Result; // Initially, empty result
2803 DebugLoc dl = N->getDebugLoc();
2805 switch (N->getOpcode()) {
2808 SDValue Op1 = N->getOperand(1);
2810 if (Op0.getOpcode() == SPUISD::IndirectAddr
2811 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2812 // Normalize the operands to reduce repeated code
2813 SDValue IndirectArg = Op0, AddArg = Op1;
2815 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2820 if (isa<ConstantSDNode>(AddArg)) {
2821 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2822 SDValue IndOp1 = IndirectArg.getOperand(1);
2824 if (CN0->isNullValue()) {
2825 // (add (SPUindirect <arg>, <arg>), 0) ->
2826 // (SPUindirect <arg>, <arg>)
2828 #if !defined(NDEBUG)
2829 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2831 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2832 << "With: (SPUindirect <arg>, <arg>)\n";
2837 } else if (isa<ConstantSDNode>(IndOp1)) {
2838 // (add (SPUindirect <arg>, <const>), <const>) ->
2839 // (SPUindirect <arg>, <const + const>)
2840 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2841 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2842 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2844 #if !defined(NDEBUG)
2845 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2847 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2848 << "), " << CN0->getSExtValue() << ")\n"
2849 << "With: (SPUindirect <arg>, "
2850 << combinedConst << ")\n";
2854 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2855 IndirectArg, combinedValue);
2861 case ISD::SIGN_EXTEND:
2862 case ISD::ZERO_EXTEND:
2863 case ISD::ANY_EXTEND: {
2864 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2865 // (any_extend (SPUextract_elt0 <arg>)) ->
2866 // (SPUextract_elt0 <arg>)
2867 // Types must match, however...
2868 #if !defined(NDEBUG)
2869 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2870 errs() << "\nReplace: ";
2872 errs() << "\nWith: ";
2873 Op0.getNode()->dump(&DAG);
2882 case SPUISD::IndirectAddr: {
2883 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2884 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2885 if (CN != 0 && CN->getZExtValue() == 0) {
2886 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2887 // (SPUaform <addr>, 0)
2889 DEBUG(errs() << "Replace: ");
2890 DEBUG(N->dump(&DAG));
2891 DEBUG(errs() << "\nWith: ");
2892 DEBUG(Op0.getNode()->dump(&DAG));
2893 DEBUG(errs() << "\n");
2897 } else if (Op0.getOpcode() == ISD::ADD) {
2898 SDValue Op1 = N->getOperand(1);
2899 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2900 // (SPUindirect (add <arg>, <arg>), 0) ->
2901 // (SPUindirect <arg>, <arg>)
2902 if (CN1->isNullValue()) {
2904 #if !defined(NDEBUG)
2905 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2907 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2908 << "With: (SPUindirect <arg>, <arg>)\n";
2912 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2913 Op0.getOperand(0), Op0.getOperand(1));
2919 case SPUISD::SHLQUAD_L_BITS:
2920 case SPUISD::SHLQUAD_L_BYTES:
2921 case SPUISD::ROTBYTES_LEFT: {
2922 SDValue Op1 = N->getOperand(1);
2924 // Kill degenerate vector shifts:
2925 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2926 if (CN->isNullValue()) {
2932 case SPUISD::PREFSLOT2VEC: {
2933 switch (Op0.getOpcode()) {
2936 case ISD::ANY_EXTEND:
2937 case ISD::ZERO_EXTEND:
2938 case ISD::SIGN_EXTEND: {
2939 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2941 // but only if the SPUprefslot2vec and <arg> types match.
2942 SDValue Op00 = Op0.getOperand(0);
2943 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2944 SDValue Op000 = Op00.getOperand(0);
2945 if (Op000.getValueType() == NodeVT) {
2951 case SPUISD::VEC2PREFSLOT: {
2952 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2954 Result = Op0.getOperand(0);
2962 // Otherwise, return unchanged.
2964 if (Result.getNode()) {
2965 DEBUG(errs() << "\nReplace.SPU: ");
2966 DEBUG(N->dump(&DAG));
2967 DEBUG(errs() << "\nWith: ");
2968 DEBUG(Result.getNode()->dump(&DAG));
2969 DEBUG(errs() << "\n");
2976 //===----------------------------------------------------------------------===//
2977 // Inline Assembly Support
2978 //===----------------------------------------------------------------------===//
2980 /// getConstraintType - Given a constraint letter, return the type of
2981 /// constraint it is for this target.
2982 SPUTargetLowering::ConstraintType
2983 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2984 if (ConstraintLetter.size() == 1) {
2985 switch (ConstraintLetter[0]) {
2992 return C_RegisterClass;
2995 return TargetLowering::getConstraintType(ConstraintLetter);
2998 std::pair<unsigned, const TargetRegisterClass*>
2999 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3002 if (Constraint.size() == 1) {
3003 // GCC RS6000 Constraint Letters
3004 switch (Constraint[0]) {
3008 return std::make_pair(0U, SPU::R64CRegisterClass);
3009 return std::make_pair(0U, SPU::R32CRegisterClass);
3012 return std::make_pair(0U, SPU::R32FPRegisterClass);
3013 else if (VT == MVT::f64)
3014 return std::make_pair(0U, SPU::R64FPRegisterClass);
3017 return std::make_pair(0U, SPU::GPRCRegisterClass);
3021 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3024 //! Compute used/known bits for a SPU operand
3026 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3030 const SelectionDAG &DAG,
3031 unsigned Depth ) const {
3033 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3035 switch (Op.getOpcode()) {
3037 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3043 case SPUISD::PREFSLOT2VEC:
3044 case SPUISD::LDRESULT:
3045 case SPUISD::VEC2PREFSLOT:
3046 case SPUISD::SHLQUAD_L_BITS:
3047 case SPUISD::SHLQUAD_L_BYTES:
3048 case SPUISD::VEC_ROTL:
3049 case SPUISD::VEC_ROTR:
3050 case SPUISD::ROTBYTES_LEFT:
3051 case SPUISD::SELECT_MASK:
3058 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3059 unsigned Depth) const {
3060 switch (Op.getOpcode()) {
3065 EVT VT = Op.getValueType();
3067 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3070 return VT.getSizeInBits();
3075 // LowerAsmOperandForConstraint
3077 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3078 char ConstraintLetter,
3080 std::vector<SDValue> &Ops,
3081 SelectionDAG &DAG) const {
3082 // Default, for the time being, to the base class handler
3083 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3087 /// isLegalAddressImmediate - Return true if the integer value can be used
3088 /// as the offset of the target addressing mode.
3089 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3090 const Type *Ty) const {
3091 // SPU's addresses are 256K:
3092 return (V > -(1 << 18) && V < (1 << 18) - 1);
3095 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3100 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3101 // The SPU target isn't yet aware of offsets.