1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2 // The LLVM Compiler Infrastructure
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
7 //===----------------------------------------------------------------------===//
9 // This file implements the SPUTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SPUISelLowering.h"
14 #include "SPUTargetMachine.h"
15 #include "SPUFrameLowering.h"
16 #include "SPUMachineFunction.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Function.h"
19 #include "llvm/Intrinsics.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/Type.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 // Byte offset of the preferred slot (counted from the MSB)
44 int prefslotOffset(EVT VT) {
46 if (VT==MVT::i1) retval=3;
47 if (VT==MVT::i8) retval=3;
48 if (VT==MVT::i16) retval=2;
53 //! Expand a library call into an actual call DAG node
56 This code is taken from SelectionDAGLegalize, since it is not exposed as
57 part of the LLVM SelectionDAG API.
61 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
62 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
63 // The input chain to this libcall is the entry node of the function.
64 // Legalizing the call will automatically add the previous call to the
66 SDValue InChain = DAG.getEntryNode();
68 TargetLowering::ArgListTy Args;
69 TargetLowering::ArgListEntry Entry;
70 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
71 EVT ArgVT = Op.getOperand(i).getValueType();
72 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
73 Entry.Node = Op.getOperand(i);
75 Entry.isSExt = isSigned;
76 Entry.isZExt = !isSigned;
77 Args.push_back(Entry);
79 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
82 // Splice the libcall in wherever FindInputOutputChains tells us to.
84 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
85 std::pair<SDValue, SDValue> CallInfo =
86 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
87 0, TLI.getLibcallCallingConv(LC), false,
88 /*isReturnValueUsed=*/true,
89 Callee, Args, DAG, Op.getDebugLoc());
91 return CallInfo.first;
95 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
96 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
99 // Use _setjmp/_longjmp instead of setjmp/longjmp.
100 setUseUnderscoreSetJmp(true);
101 setUseUnderscoreLongJmp(true);
103 // Set RTLIB libcall names as used by SPU:
104 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
106 // Set up the SPU's register classes:
107 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
108 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
109 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
110 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
111 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
112 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
113 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
115 // SPU has no sign or zero extended loads for i1, i8, i16:
116 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
117 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
118 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
120 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
121 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
123 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
124 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
125 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
126 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
128 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
130 // SPU constant load actions are custom lowered:
131 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
132 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
134 // SPU's loads and stores have to be custom lowered:
135 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
137 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
139 setOperationAction(ISD::LOAD, VT, Custom);
140 setOperationAction(ISD::STORE, VT, Custom);
141 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
142 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
143 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
146 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
147 setTruncStoreAction(VT, StoreVT, Expand);
151 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
153 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
155 setOperationAction(ISD::LOAD, VT, Custom);
156 setOperationAction(ISD::STORE, VT, Custom);
158 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
159 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
160 setTruncStoreAction(VT, StoreVT, Expand);
164 // Expand the jumptable branches
165 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
166 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
168 // Custom lower SELECT_CC for most cases, but expand by default
169 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
170 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
171 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
172 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
173 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
175 // SPU has no intrinsics for these particular operations:
176 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
177 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
179 // SPU has no division/remainder instructions
180 setOperationAction(ISD::SREM, MVT::i8, Expand);
181 setOperationAction(ISD::UREM, MVT::i8, Expand);
182 setOperationAction(ISD::SDIV, MVT::i8, Expand);
183 setOperationAction(ISD::UDIV, MVT::i8, Expand);
184 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
185 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
186 setOperationAction(ISD::SREM, MVT::i16, Expand);
187 setOperationAction(ISD::UREM, MVT::i16, Expand);
188 setOperationAction(ISD::SDIV, MVT::i16, Expand);
189 setOperationAction(ISD::UDIV, MVT::i16, Expand);
190 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
191 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
192 setOperationAction(ISD::SREM, MVT::i32, Expand);
193 setOperationAction(ISD::UREM, MVT::i32, Expand);
194 setOperationAction(ISD::SDIV, MVT::i32, Expand);
195 setOperationAction(ISD::UDIV, MVT::i32, Expand);
196 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
197 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
198 setOperationAction(ISD::SREM, MVT::i64, Expand);
199 setOperationAction(ISD::UREM, MVT::i64, Expand);
200 setOperationAction(ISD::SDIV, MVT::i64, Expand);
201 setOperationAction(ISD::UDIV, MVT::i64, Expand);
202 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
203 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
204 setOperationAction(ISD::SREM, MVT::i128, Expand);
205 setOperationAction(ISD::UREM, MVT::i128, Expand);
206 setOperationAction(ISD::SDIV, MVT::i128, Expand);
207 setOperationAction(ISD::UDIV, MVT::i128, Expand);
208 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
209 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
211 // We don't support sin/cos/sqrt/fmod
212 setOperationAction(ISD::FSIN , MVT::f64, Expand);
213 setOperationAction(ISD::FCOS , MVT::f64, Expand);
214 setOperationAction(ISD::FREM , MVT::f64, Expand);
215 setOperationAction(ISD::FSIN , MVT::f32, Expand);
216 setOperationAction(ISD::FCOS , MVT::f32, Expand);
217 setOperationAction(ISD::FREM , MVT::f32, Expand);
219 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
221 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
222 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
224 setOperationAction(ISD::FMA, MVT::f64, Expand);
225 setOperationAction(ISD::FMA, MVT::f32, Expand);
227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
230 // SPU can do rotate right and left, so legalize it... but customize for i8
231 // because instructions don't exist.
233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
239 setOperationAction(ISD::ROTL, MVT::i32, Legal);
240 setOperationAction(ISD::ROTL, MVT::i16, Legal);
241 setOperationAction(ISD::ROTL, MVT::i8, Custom);
243 // SPU has no native version of shift left/right for i8
244 setOperationAction(ISD::SHL, MVT::i8, Custom);
245 setOperationAction(ISD::SRL, MVT::i8, Custom);
246 setOperationAction(ISD::SRA, MVT::i8, Custom);
248 // Make these operations legal and handle them during instruction selection:
249 setOperationAction(ISD::SHL, MVT::i64, Legal);
250 setOperationAction(ISD::SRL, MVT::i64, Legal);
251 setOperationAction(ISD::SRA, MVT::i64, Legal);
253 // Custom lower i8, i32 and i64 multiplications
254 setOperationAction(ISD::MUL, MVT::i8, Custom);
255 setOperationAction(ISD::MUL, MVT::i32, Legal);
256 setOperationAction(ISD::MUL, MVT::i64, Legal);
258 // Expand double-width multiplication
259 // FIXME: It would probably be reasonable to support some of these operations
260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
262 setOperationAction(ISD::MULHU, MVT::i8, Expand);
263 setOperationAction(ISD::MULHS, MVT::i8, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
266 setOperationAction(ISD::MULHU, MVT::i16, Expand);
267 setOperationAction(ISD::MULHS, MVT::i16, Expand);
268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
270 setOperationAction(ISD::MULHU, MVT::i32, Expand);
271 setOperationAction(ISD::MULHS, MVT::i32, Expand);
272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
274 setOperationAction(ISD::MULHU, MVT::i64, Expand);
275 setOperationAction(ISD::MULHS, MVT::i64, Expand);
277 // Need to custom handle (some) common i8, i64 math ops
278 setOperationAction(ISD::ADD, MVT::i8, Custom);
279 setOperationAction(ISD::ADD, MVT::i64, Legal);
280 setOperationAction(ISD::SUB, MVT::i8, Custom);
281 setOperationAction(ISD::SUB, MVT::i64, Legal);
283 // SPU does not have BSWAP. It does have i32 support CTLZ.
284 // CTPOP has to be custom lowered.
285 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
286 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
288 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
289 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
294 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
295 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
296 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
297 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
298 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
300 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
301 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
302 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
303 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
304 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
306 // SPU has a version of select that implements (a&~c)|(b&c), just like
307 // select ought to work:
308 setOperationAction(ISD::SELECT, MVT::i8, Legal);
309 setOperationAction(ISD::SELECT, MVT::i16, Legal);
310 setOperationAction(ISD::SELECT, MVT::i32, Legal);
311 setOperationAction(ISD::SELECT, MVT::i64, Legal);
313 setOperationAction(ISD::SETCC, MVT::i8, Legal);
314 setOperationAction(ISD::SETCC, MVT::i16, Legal);
315 setOperationAction(ISD::SETCC, MVT::i32, Legal);
316 setOperationAction(ISD::SETCC, MVT::i64, Legal);
317 setOperationAction(ISD::SETCC, MVT::f64, Custom);
319 // Custom lower i128 -> i64 truncates
320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
322 // Custom lower i32/i64 -> i128 sign extend
323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
330 // to expand to a libcall, hence the custom lowering:
331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
338 // FDIV on SPU requires custom lowering
339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
351 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
352 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
353 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
354 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
356 // We cannot sextinreg(i1). Expand to shifts.
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
359 // We want to legalize GlobalAddress and ConstantPool nodes into the
360 // appropriate instructions to materialize the address.
361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
365 setOperationAction(ISD::GlobalAddress, VT, Custom);
366 setOperationAction(ISD::ConstantPool, VT, Custom);
367 setOperationAction(ISD::JumpTable, VT, Custom);
370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
371 setOperationAction(ISD::VASTART , MVT::Other, Custom);
373 // Use the default implementation.
374 setOperationAction(ISD::VAARG , MVT::Other, Expand);
375 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
376 setOperationAction(ISD::VAEND , MVT::Other, Expand);
377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
382 // Cell SPU has instructions for converting between i64 and fp.
383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
392 // First set operation action for all vector types to expand. Then we
393 // will selectively turn on ones that can be effectively codegen'd.
394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
405 // add/sub are legal for all supported vector VT's.
406 setOperationAction(ISD::ADD, VT, Legal);
407 setOperationAction(ISD::SUB, VT, Legal);
408 // mul has to be custom lowered.
409 setOperationAction(ISD::MUL, VT, Legal);
411 setOperationAction(ISD::AND, VT, Legal);
412 setOperationAction(ISD::OR, VT, Legal);
413 setOperationAction(ISD::XOR, VT, Legal);
414 setOperationAction(ISD::LOAD, VT, Custom);
415 setOperationAction(ISD::SELECT, VT, Legal);
416 setOperationAction(ISD::STORE, VT, Custom);
418 // These operations need to be expanded:
419 setOperationAction(ISD::SDIV, VT, Expand);
420 setOperationAction(ISD::SREM, VT, Expand);
421 setOperationAction(ISD::UDIV, VT, Expand);
422 setOperationAction(ISD::UREM, VT, Expand);
424 // Custom lower build_vector, constant pool spills, insert and
425 // extract vector elements:
426 if (isTypeLegal(VT)) {
427 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
428 setOperationAction(ISD::ConstantPool, VT, Custom);
429 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
430 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
431 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
432 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
436 setOperationAction(ISD::AND, MVT::v16i8, Custom);
437 setOperationAction(ISD::OR, MVT::v16i8, Custom);
438 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
439 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
441 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
443 setBooleanContents(ZeroOrNegativeOneBooleanContent);
444 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
446 setStackPointerRegisterToSaveRestore(SPU::R1);
448 // We have target-specific dag combine patterns for the following nodes:
449 setTargetDAGCombine(ISD::ADD);
450 setTargetDAGCombine(ISD::ZERO_EXTEND);
451 setTargetDAGCombine(ISD::SIGN_EXTEND);
452 setTargetDAGCombine(ISD::ANY_EXTEND);
454 setMinFunctionAlignment(3);
456 computeRegisterProperties();
458 // Set pre-RA register scheduler default to BURR, which produces slightly
459 // better code than the default (could also be TDRR, but TargetLowering.h
460 // needs a mod to support that model):
461 setSchedulingPreference(Sched::RegPressure);
465 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
467 if (node_names.empty()) {
468 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
469 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
470 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
471 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
472 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
473 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
474 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
475 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
476 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
477 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
478 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
479 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
480 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
481 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
482 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
483 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
484 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
485 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
486 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
487 "SPUISD::ROTBYTES_LEFT_BITS";
488 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
489 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
490 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
491 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
492 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
495 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
497 return ((i != node_names.end()) ? i->second : 0);
500 //===----------------------------------------------------------------------===//
501 // Return the Cell SPU's SETCC result type
502 //===----------------------------------------------------------------------===//
504 EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
505 // i8, i16 and i32 are valid SETCC result types
506 MVT::SimpleValueType retval;
508 switch(VT.getSimpleVT().SimpleTy){
511 retval = MVT::i8; break;
513 retval = MVT::i16; break;
521 //===----------------------------------------------------------------------===//
522 // Calling convention code:
523 //===----------------------------------------------------------------------===//
525 #include "SPUGenCallingConv.inc"
527 //===----------------------------------------------------------------------===//
528 // LowerOperation implementation
529 //===----------------------------------------------------------------------===//
531 /// Custom lower loads for CellSPU
533 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
534 within a 16-byte block, we have to rotate to extract the requested element.
536 For extending loads, we also want to ensure that the following sequence is
537 emitted, e.g. for MVT::f32 extending load to MVT::f64:
541 %2 v16i8,ch = rotate %1
542 %3 v4f8, ch = bitconvert %2
543 %4 f32 = vec2perfslot %3
544 %5 f64 = fp_extend %4
548 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
549 LoadSDNode *LN = cast<LoadSDNode>(Op);
550 SDValue the_chain = LN->getChain();
551 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
552 EVT InVT = LN->getMemoryVT();
553 EVT OutVT = Op.getValueType();
554 ISD::LoadExtType ExtType = LN->getExtensionType();
555 unsigned alignment = LN->getAlignment();
556 int pso = prefslotOffset(InVT);
557 DebugLoc dl = Op.getDebugLoc();
558 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
559 (128 / InVT.getSizeInBits()));
562 assert( LN->getAddressingMode() == ISD::UNINDEXED
563 && "we should get only UNINDEXED adresses");
564 // clean aligned loads can be selected as-is
565 if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
568 // Get pointerinfos to the memory chunk(s) that contain the data to load
569 uint64_t mpi_offset = LN->getPointerInfo().Offset;
570 mpi_offset -= mpi_offset%16;
571 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
572 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
575 SDValue basePtr = LN->getBasePtr();
578 if ((alignment%16) == 0) {
581 // Special cases for a known aligned load to simplify the base pointer
582 // and the rotation amount:
583 if (basePtr.getOpcode() == ISD::ADD
584 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
585 // Known offset into basePtr
586 int64_t offset = CN->getSExtValue();
587 int64_t rotamt = int64_t((offset & 0xf) - pso);
592 rotate = DAG.getConstant(rotamt, MVT::i16);
594 // Simplify the base pointer for this case:
595 basePtr = basePtr.getOperand(0);
596 if ((offset & ~0xf) > 0) {
597 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
599 DAG.getConstant((offset & ~0xf), PtrVT));
601 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
602 || (basePtr.getOpcode() == SPUISD::IndirectAddr
603 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
604 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
605 // Plain aligned a-form address: rotate into preferred slot
606 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
607 int64_t rotamt = -pso;
610 rotate = DAG.getConstant(rotamt, MVT::i16);
612 // Offset the rotate amount by the basePtr and the preferred slot
614 int64_t rotamt = -pso;
617 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
619 DAG.getConstant(rotamt, PtrVT));
622 // Unaligned load: must be more pessimistic about addressing modes:
623 if (basePtr.getOpcode() == ISD::ADD) {
624 MachineFunction &MF = DAG.getMachineFunction();
625 MachineRegisterInfo &RegInfo = MF.getRegInfo();
626 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
629 SDValue Op0 = basePtr.getOperand(0);
630 SDValue Op1 = basePtr.getOperand(1);
632 if (isa<ConstantSDNode>(Op1)) {
633 // Convert the (add <ptr>, <const>) to an indirect address contained
634 // in a register. Note that this is done because we need to avoid
635 // creating a 0(reg) d-form address due to the SPU's block loads.
636 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
637 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
638 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
640 // Convert the (add <arg1>, <arg2>) to an indirect address, which
641 // will likely be lowered as a reg(reg) x-form address.
642 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
645 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
647 DAG.getConstant(0, PtrVT));
650 // Offset the rotate amount by the basePtr and the preferred slot
652 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
654 DAG.getConstant(-pso, PtrVT));
657 // Do the load as a i128 to allow possible shifting
658 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
660 LN->isVolatile(), LN->isNonTemporal(), 16);
662 // When the size is not greater than alignment we get all data with just
664 if (alignment >= InVT.getSizeInBits()/8) {
666 the_chain = low.getValue(1);
668 // Rotate into the preferred slot:
669 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
670 low.getValue(0), rotate);
672 // Convert the loaded v16i8 vector to the appropriate vector type
673 // specified by the operand:
674 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
675 InVT, (128 / InVT.getSizeInBits()));
676 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
677 DAG.getNode(ISD::BITCAST, dl, vecVT, result));
679 // When alignment is less than the size, we might need (known only at
680 // run-time) two loads
681 // TODO: if the memory address is composed only from constants, we have
682 // extra kowledge, and might avoid the second load
684 // storage position offset from lower 16 byte aligned memory chunk
685 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
686 basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
687 // get a registerfull of ones. (this implementation is a workaround: LLVM
688 // cannot handle 128 bit signed int constants)
689 SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
690 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
692 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
693 DAG.getNode(ISD::ADD, dl, PtrVT,
695 DAG.getConstant(16, PtrVT)),
697 LN->isVolatile(), LN->isNonTemporal(), 16);
699 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
702 // Shift the (possible) high part right to compensate the misalignemnt.
703 // if there is no highpart (i.e. value is i64 and offset is 4), this
704 // will zero out the high value.
705 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
706 DAG.getNode(ISD::SUB, dl, MVT::i32,
707 DAG.getConstant( 16, MVT::i32),
711 // Shift the low similarly
712 // TODO: add SPUISD::SHL_BYTES
713 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
715 // Merge the two parts
716 result = DAG.getNode(ISD::BITCAST, dl, vecVT,
717 DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
719 if (!InVT.isVector()) {
720 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
724 // Handle extending loads by extending the scalar result:
725 if (ExtType == ISD::SEXTLOAD) {
726 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
727 } else if (ExtType == ISD::ZEXTLOAD) {
728 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
729 } else if (ExtType == ISD::EXTLOAD) {
730 unsigned NewOpc = ISD::ANY_EXTEND;
732 if (OutVT.isFloatingPoint())
733 NewOpc = ISD::FP_EXTEND;
735 result = DAG.getNode(NewOpc, dl, OutVT, result);
738 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
739 SDValue retops[2] = {
744 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
745 retops, sizeof(retops) / sizeof(retops[0]));
749 /// Custom lower stores for CellSPU
751 All CellSPU stores are aligned to 16-byte boundaries, so for elements
752 within a 16-byte block, we have to generate a shuffle to insert the
753 requested element into its place, then store the resulting block.
756 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
757 StoreSDNode *SN = cast<StoreSDNode>(Op);
758 SDValue Value = SN->getValue();
759 EVT VT = Value.getValueType();
760 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
761 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
762 DebugLoc dl = Op.getDebugLoc();
763 unsigned alignment = SN->getAlignment();
765 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
766 (128 / StVT.getSizeInBits()));
767 // Get pointerinfos to the memory chunk(s) that contain the data to load
768 uint64_t mpi_offset = SN->getPointerInfo().Offset;
769 mpi_offset -= mpi_offset%16;
770 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
771 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
775 assert( SN->getAddressingMode() == ISD::UNINDEXED
776 && "we should get only UNINDEXED adresses");
777 // clean aligned loads can be selected as-is
778 if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
781 SDValue alignLoadVec;
782 SDValue basePtr = SN->getBasePtr();
783 SDValue the_chain = SN->getChain();
784 SDValue insertEltOffs;
786 if ((alignment%16) == 0) {
788 // Special cases for a known aligned load to simplify the base pointer
789 // and insertion byte:
790 if (basePtr.getOpcode() == ISD::ADD
791 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
792 // Known offset into basePtr
793 int64_t offset = CN->getSExtValue();
795 // Simplify the base pointer for this case:
796 basePtr = basePtr.getOperand(0);
797 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
799 DAG.getConstant((offset & 0xf), PtrVT));
801 if ((offset & ~0xf) > 0) {
802 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
804 DAG.getConstant((offset & ~0xf), PtrVT));
807 // Otherwise, assume it's at byte 0 of basePtr
808 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
810 DAG.getConstant(0, PtrVT));
811 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
816 // Unaligned load: must be more pessimistic about addressing modes:
817 if (basePtr.getOpcode() == ISD::ADD) {
818 MachineFunction &MF = DAG.getMachineFunction();
819 MachineRegisterInfo &RegInfo = MF.getRegInfo();
820 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
823 SDValue Op0 = basePtr.getOperand(0);
824 SDValue Op1 = basePtr.getOperand(1);
826 if (isa<ConstantSDNode>(Op1)) {
827 // Convert the (add <ptr>, <const>) to an indirect address contained
828 // in a register. Note that this is done because we need to avoid
829 // creating a 0(reg) d-form address due to the SPU's block loads.
830 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
831 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
832 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
834 // Convert the (add <arg1>, <arg2>) to an indirect address, which
835 // will likely be lowered as a reg(reg) x-form address.
836 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
839 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
841 DAG.getConstant(0, PtrVT));
844 // Insertion point is solely determined by basePtr's contents
845 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
847 DAG.getConstant(0, PtrVT));
850 // Load the lower part of the memory to which to store.
851 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
852 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
854 // if we don't need to store over the 16 byte boundary, one store suffices
855 if (alignment >= StVT.getSizeInBits()/8) {
857 the_chain = low.getValue(1);
859 LoadSDNode *LN = cast<LoadSDNode>(low);
860 SDValue theValue = SN->getValue();
863 && (theValue.getOpcode() == ISD::AssertZext
864 || theValue.getOpcode() == ISD::AssertSext)) {
865 // Drill down and get the value for zero- and sign-extended
867 theValue = theValue.getOperand(0);
870 // If the base pointer is already a D-form address, then just create
871 // a new D-form address with a slot offset and the orignal base pointer.
872 // Otherwise generate a D-form address with the slot offset relative
873 // to the stack pointer, which is always aligned.
875 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
876 errs() << "CellSPU LowerSTORE: basePtr = ";
877 basePtr.getNode()->dump(&DAG);
882 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
884 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
887 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
889 DAG.getNode(ISD::BITCAST, dl,
890 MVT::v4i32, insertEltOp));
892 result = DAG.getStore(the_chain, dl, result, basePtr,
894 LN->isVolatile(), LN->isNonTemporal(),
898 // do the store when it might cross the 16 byte memory access boundary.
900 // TODO issue a warning if SN->isVolatile()== true? This is likely not
901 // what the user wanted.
903 // address offset from nearest lower 16byte alinged address
904 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
906 DAG.getConstant(0xf, MVT::i32));
908 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
909 DAG.getConstant( 16, MVT::i32),
911 // 16 - sizeof(Value)
912 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
913 DAG.getConstant( 16, MVT::i32),
914 DAG.getConstant( VT.getSizeInBits()/8,
916 // get a registerfull of ones
917 SDValue ones = DAG.getConstant(-1, MVT::v4i32);
918 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
920 // Create the 128 bit masks that have ones where the data to store is
922 SDValue lowmask, himask;
923 // if the value to store don't fill up the an entire 128 bits, zero
924 // out the last bits of the mask so that only the value we want to store
926 // this is e.g. in the case of store i32, align 2
928 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
929 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
930 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
932 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
933 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
938 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
940 // this will zero, if there are no data that goes to the high quad
941 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
943 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
946 // Load in the old data and zero out the parts that will be overwritten with
947 // the new data to store.
948 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
949 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
950 DAG.getConstant( 16, PtrVT)),
952 SN->isVolatile(), SN->isNonTemporal(), 16);
953 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
956 low = DAG.getNode(ISD::AND, dl, MVT::i128,
957 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
958 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
959 hi = DAG.getNode(ISD::AND, dl, MVT::i128,
960 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
961 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
963 // Shift the Value to store into place. rlow contains the parts that go to
964 // the lower memory chunk, rhi has the parts that go to the upper one.
965 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
966 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
967 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
970 // Merge the old data and the new data and store the results
971 // Need to convert vectors here to integer as 'OR'ing floats assert
972 rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
973 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
974 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
975 rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
976 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
977 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
979 low = DAG.getStore(the_chain, dl, rlow, basePtr,
981 SN->isVolatile(), SN->isNonTemporal(), 16);
982 hi = DAG.getStore(the_chain, dl, rhi,
983 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
984 DAG.getConstant( 16, PtrVT)),
986 SN->isVolatile(), SN->isNonTemporal(), 16);
987 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
994 //! Generate the address of a constant pool entry.
996 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
997 EVT PtrVT = Op.getValueType();
998 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
999 const Constant *C = CP->getConstVal();
1000 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1001 SDValue Zero = DAG.getConstant(0, PtrVT);
1002 const TargetMachine &TM = DAG.getTarget();
1003 // FIXME there is no actual debug info here
1004 DebugLoc dl = Op.getDebugLoc();
1006 if (TM.getRelocationModel() == Reloc::Static) {
1007 if (!ST->usingLargeMem()) {
1008 // Just return the SDValue with the constant pool address in it.
1009 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1011 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1012 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1013 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1017 llvm_unreachable("LowerConstantPool: Relocation model other than static"
1022 //! Alternate entry point for generating the address of a constant pool entry
1024 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1025 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1029 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1030 EVT PtrVT = Op.getValueType();
1031 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1032 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1033 SDValue Zero = DAG.getConstant(0, PtrVT);
1034 const TargetMachine &TM = DAG.getTarget();
1035 // FIXME there is no actual debug info here
1036 DebugLoc dl = Op.getDebugLoc();
1038 if (TM.getRelocationModel() == Reloc::Static) {
1039 if (!ST->usingLargeMem()) {
1040 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1042 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1043 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1044 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1048 llvm_unreachable("LowerJumpTable: Relocation model other than static"
1054 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1055 EVT PtrVT = Op.getValueType();
1056 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1057 const GlobalValue *GV = GSDN->getGlobal();
1058 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1059 PtrVT, GSDN->getOffset());
1060 const TargetMachine &TM = DAG.getTarget();
1061 SDValue Zero = DAG.getConstant(0, PtrVT);
1062 // FIXME there is no actual debug info here
1063 DebugLoc dl = Op.getDebugLoc();
1065 if (TM.getRelocationModel() == Reloc::Static) {
1066 if (!ST->usingLargeMem()) {
1067 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1069 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1070 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1071 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1074 report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1082 //! Custom lower double precision floating point constants
1084 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1085 EVT VT = Op.getValueType();
1086 // FIXME there is no actual debug info here
1087 DebugLoc dl = Op.getDebugLoc();
1089 if (VT == MVT::f64) {
1090 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1093 "LowerConstantFP: Node is not ConstantFPSDNode");
1095 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1096 SDValue T = DAG.getConstant(dbits, MVT::i64);
1097 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1098 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1099 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1106 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1107 CallingConv::ID CallConv, bool isVarArg,
1108 const SmallVectorImpl<ISD::InputArg>
1110 DebugLoc dl, SelectionDAG &DAG,
1111 SmallVectorImpl<SDValue> &InVals)
1114 MachineFunction &MF = DAG.getMachineFunction();
1115 MachineFrameInfo *MFI = MF.getFrameInfo();
1116 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1117 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1119 unsigned ArgOffset = SPUFrameLowering::minStackSize();
1120 unsigned ArgRegIdx = 0;
1121 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1123 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1125 SmallVector<CCValAssign, 16> ArgLocs;
1126 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1127 getTargetMachine(), ArgLocs, *DAG.getContext());
1128 // FIXME: allow for other calling conventions
1129 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1131 // Add DAG nodes to load the arguments or copy them out of registers.
1132 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1133 EVT ObjectVT = Ins[ArgNo].VT;
1134 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1136 CCValAssign &VA = ArgLocs[ArgNo];
1138 if (VA.isRegLoc()) {
1139 const TargetRegisterClass *ArgRegClass;
1141 switch (ObjectVT.getSimpleVT().SimpleTy) {
1143 report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1144 Twine(ObjectVT.getEVTString()));
1146 ArgRegClass = &SPU::R8CRegClass;
1149 ArgRegClass = &SPU::R16CRegClass;
1152 ArgRegClass = &SPU::R32CRegClass;
1155 ArgRegClass = &SPU::R64CRegClass;
1158 ArgRegClass = &SPU::GPRCRegClass;
1161 ArgRegClass = &SPU::R32FPRegClass;
1164 ArgRegClass = &SPU::R64FPRegClass;
1172 ArgRegClass = &SPU::VECREGRegClass;
1176 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1177 RegInfo.addLiveIn(VA.getLocReg(), VReg);
1178 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1181 // We need to load the argument to a virtual register if we determined
1182 // above that we ran out of physical registers of the appropriate type
1183 // or we're forced to do vararg
1184 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1185 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1186 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1188 ArgOffset += StackSlotSize;
1191 InVals.push_back(ArgVal);
1193 Chain = ArgVal.getOperand(0);
1198 // FIXME: we should be able to query the argument registers from
1199 // tablegen generated code.
1200 static const unsigned ArgRegs[] = {
1201 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1202 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1203 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1204 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1205 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1206 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1207 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1208 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1209 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1210 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1211 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1213 // size of ArgRegs array
1214 unsigned NumArgRegs = 77;
1216 // We will spill (79-3)+1 registers to the stack
1217 SmallVector<SDValue, 79-3+1> MemOps;
1219 // Create the frame slot
1220 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1221 FuncInfo->setVarArgsFrameIndex(
1222 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1223 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1224 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
1225 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1226 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1228 Chain = Store.getOperand(0);
1229 MemOps.push_back(Store);
1231 // Increment address by stack slot size for the next stored argument
1232 ArgOffset += StackSlotSize;
1234 if (!MemOps.empty())
1235 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1236 &MemOps[0], MemOps.size());
1242 /// isLSAAddress - Return the immediate to use if the specified
1243 /// value is representable as a LSA address.
1244 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1245 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1248 int Addr = C->getZExtValue();
1249 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1250 (Addr << 14 >> 14) != Addr)
1251 return 0; // Top 14 bits have to be sext of immediate.
1253 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1257 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1258 CallingConv::ID CallConv, bool isVarArg,
1260 const SmallVectorImpl<ISD::OutputArg> &Outs,
1261 const SmallVectorImpl<SDValue> &OutVals,
1262 const SmallVectorImpl<ISD::InputArg> &Ins,
1263 DebugLoc dl, SelectionDAG &DAG,
1264 SmallVectorImpl<SDValue> &InVals) const {
1265 // CellSPU target does not yet support tail call optimization.
1268 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1269 unsigned NumOps = Outs.size();
1270 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1272 SmallVector<CCValAssign, 16> ArgLocs;
1273 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1274 getTargetMachine(), ArgLocs, *DAG.getContext());
1275 // FIXME: allow for other calling conventions
1276 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1278 const unsigned NumArgRegs = ArgLocs.size();
1281 // Handy pointer type
1282 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1284 // Set up a copy of the stack pointer for use loading and storing any
1285 // arguments that may not fit in the registers available for argument
1287 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1289 // Figure out which arguments are going to go in registers, and which in
1291 unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
1292 unsigned ArgRegIdx = 0;
1294 // Keep track of registers passing arguments
1295 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1296 // And the arguments passed on the stack
1297 SmallVector<SDValue, 8> MemOpChains;
1299 for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1300 SDValue Arg = OutVals[ArgRegIdx];
1301 CCValAssign &VA = ArgLocs[ArgRegIdx];
1303 // PtrOff will be used to store the current argument to the stack if a
1304 // register cannot be found for it.
1305 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1306 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1308 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1309 default: llvm_unreachable("Unexpected ValueType for argument!");
1323 if (ArgRegIdx != NumArgRegs) {
1324 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1326 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1327 MachinePointerInfo(),
1329 ArgOffset += StackSlotSize;
1335 // Accumulate how many bytes are to be pushed on the stack, including the
1336 // linkage area, and parameter passing area. According to the SPU ABI,
1337 // we minimally need space for [LR] and [SP].
1338 unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
1340 // Insert a call sequence start
1341 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1344 if (!MemOpChains.empty()) {
1345 // Adjust the stack pointer for the stack arguments.
1346 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1347 &MemOpChains[0], MemOpChains.size());
1350 // Build a sequence of copy-to-reg nodes chained together with token chain
1351 // and flag operands which copy the outgoing args into the appropriate regs.
1353 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1354 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1355 RegsToPass[i].second, InFlag);
1356 InFlag = Chain.getValue(1);
1359 SmallVector<SDValue, 8> Ops;
1360 unsigned CallOpc = SPUISD::CALL;
1362 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1363 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1364 // node so that legalize doesn't hack it.
1365 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1366 const GlobalValue *GV = G->getGlobal();
1367 EVT CalleeVT = Callee.getValueType();
1368 SDValue Zero = DAG.getConstant(0, PtrVT);
1369 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1371 if (!ST->usingLargeMem()) {
1372 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1373 // style calls, otherwise, external symbols are BRASL calls. This assumes
1374 // that declared/defined symbols are in the same compilation unit and can
1375 // be reached through PC-relative jumps.
1378 // This may be an unsafe assumption for JIT and really large compilation
1380 if (GV->isDeclaration()) {
1381 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1383 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1386 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1388 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1390 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1391 EVT CalleeVT = Callee.getValueType();
1392 SDValue Zero = DAG.getConstant(0, PtrVT);
1393 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1394 Callee.getValueType());
1396 if (!ST->usingLargeMem()) {
1397 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1399 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1401 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1402 // If this is an absolute destination address that appears to be a legal
1403 // local store address, use the munged value.
1404 Callee = SDValue(Dest, 0);
1407 Ops.push_back(Chain);
1408 Ops.push_back(Callee);
1410 // Add argument registers to the end of the list so that they are known live
1412 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1413 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1414 RegsToPass[i].second.getValueType()));
1416 if (InFlag.getNode())
1417 Ops.push_back(InFlag);
1418 // Returns a chain and a flag for retval copy to use.
1419 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1420 &Ops[0], Ops.size());
1421 InFlag = Chain.getValue(1);
1423 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1424 DAG.getIntPtrConstant(0, true), InFlag);
1426 InFlag = Chain.getValue(1);
1428 // If the function returns void, just return the chain.
1432 // Now handle the return value(s)
1433 SmallVector<CCValAssign, 16> RVLocs;
1434 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1435 getTargetMachine(), RVLocs, *DAG.getContext());
1436 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1439 // If the call has results, copy the values out of the ret val registers.
1440 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1441 CCValAssign VA = RVLocs[i];
1443 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1445 Chain = Val.getValue(1);
1446 InFlag = Val.getValue(2);
1447 InVals.push_back(Val);
1454 SPUTargetLowering::LowerReturn(SDValue Chain,
1455 CallingConv::ID CallConv, bool isVarArg,
1456 const SmallVectorImpl<ISD::OutputArg> &Outs,
1457 const SmallVectorImpl<SDValue> &OutVals,
1458 DebugLoc dl, SelectionDAG &DAG) const {
1460 SmallVector<CCValAssign, 16> RVLocs;
1461 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1462 getTargetMachine(), RVLocs, *DAG.getContext());
1463 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1465 // If this is the first return lowered for this function, add the regs to the
1466 // liveout set for the function.
1467 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1468 for (unsigned i = 0; i != RVLocs.size(); ++i)
1469 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1474 // Copy the result values into the output registers.
1475 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1476 CCValAssign &VA = RVLocs[i];
1477 assert(VA.isRegLoc() && "Can only return in registers!");
1478 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1480 Flag = Chain.getValue(1);
1484 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1486 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1490 //===----------------------------------------------------------------------===//
1491 // Vector related lowering:
1492 //===----------------------------------------------------------------------===//
1494 static ConstantSDNode *
1495 getVecImm(SDNode *N) {
1496 SDValue OpVal(0, 0);
1498 // Check to see if this buildvec has a single non-undef value in its elements.
1499 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1500 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1501 if (OpVal.getNode() == 0)
1502 OpVal = N->getOperand(i);
1503 else if (OpVal != N->getOperand(i))
1507 if (OpVal.getNode() != 0) {
1508 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1516 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1517 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1519 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1521 if (ConstantSDNode *CN = getVecImm(N)) {
1522 uint64_t Value = CN->getZExtValue();
1523 if (ValueType == MVT::i64) {
1524 uint64_t UValue = CN->getZExtValue();
1525 uint32_t upper = uint32_t(UValue >> 32);
1526 uint32_t lower = uint32_t(UValue);
1529 Value = Value >> 32;
1531 if (Value <= 0x3ffff)
1532 return DAG.getTargetConstant(Value, ValueType);
1538 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1539 /// and the value fits into a signed 16-bit constant, and if so, return the
1541 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1543 if (ConstantSDNode *CN = getVecImm(N)) {
1544 int64_t Value = CN->getSExtValue();
1545 if (ValueType == MVT::i64) {
1546 uint64_t UValue = CN->getZExtValue();
1547 uint32_t upper = uint32_t(UValue >> 32);
1548 uint32_t lower = uint32_t(UValue);
1551 Value = Value >> 32;
1553 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1554 return DAG.getTargetConstant(Value, ValueType);
1561 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1562 /// and the value fits into a signed 10-bit constant, and if so, return the
1564 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1566 if (ConstantSDNode *CN = getVecImm(N)) {
1567 int64_t Value = CN->getSExtValue();
1568 if (ValueType == MVT::i64) {
1569 uint64_t UValue = CN->getZExtValue();
1570 uint32_t upper = uint32_t(UValue >> 32);
1571 uint32_t lower = uint32_t(UValue);
1574 Value = Value >> 32;
1576 if (isInt<10>(Value))
1577 return DAG.getTargetConstant(Value, ValueType);
1583 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1584 /// and the value fits into a signed 8-bit constant, and if so, return the
1587 /// @note: The incoming vector is v16i8 because that's the only way we can load
1588 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1590 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1592 if (ConstantSDNode *CN = getVecImm(N)) {
1593 int Value = (int) CN->getZExtValue();
1594 if (ValueType == MVT::i16
1595 && Value <= 0xffff /* truncated from uint64_t */
1596 && ((short) Value >> 8) == ((short) Value & 0xff))
1597 return DAG.getTargetConstant(Value & 0xff, ValueType);
1598 else if (ValueType == MVT::i8
1599 && (Value & 0xff) == Value)
1600 return DAG.getTargetConstant(Value, ValueType);
1606 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1607 /// and the value fits into a signed 16-bit constant, and if so, return the
1609 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1611 if (ConstantSDNode *CN = getVecImm(N)) {
1612 uint64_t Value = CN->getZExtValue();
1613 if ((ValueType == MVT::i32
1614 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1615 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1616 return DAG.getTargetConstant(Value >> 16, ValueType);
1622 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1623 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1624 if (ConstantSDNode *CN = getVecImm(N)) {
1625 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1631 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1632 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1633 if (ConstantSDNode *CN = getVecImm(N)) {
1634 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1640 //! Lower a BUILD_VECTOR instruction creatively:
1642 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1643 EVT VT = Op.getValueType();
1644 EVT EltVT = VT.getVectorElementType();
1645 DebugLoc dl = Op.getDebugLoc();
1646 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1647 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1648 unsigned minSplatBits = EltVT.getSizeInBits();
1650 if (minSplatBits < 16)
1653 APInt APSplatBits, APSplatUndef;
1654 unsigned SplatBitSize;
1657 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1658 HasAnyUndefs, minSplatBits)
1659 || minSplatBits < SplatBitSize)
1660 return SDValue(); // Wasn't a constant vector or splat exceeded min
1662 uint64_t SplatBits = APSplatBits.getZExtValue();
1664 switch (VT.getSimpleVT().SimpleTy) {
1666 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1667 Twine(VT.getEVTString()));
1670 uint32_t Value32 = uint32_t(SplatBits);
1671 assert(SplatBitSize == 32
1672 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1673 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1674 SDValue T = DAG.getConstant(Value32, MVT::i32);
1675 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1676 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1680 uint64_t f64val = uint64_t(SplatBits);
1681 assert(SplatBitSize == 64
1682 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1683 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1684 SDValue T = DAG.getConstant(f64val, MVT::i64);
1685 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1686 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1690 // 8-bit constants have to be expanded to 16-bits
1691 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1692 SmallVector<SDValue, 8> Ops;
1694 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1695 return DAG.getNode(ISD::BITCAST, dl, VT,
1696 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1699 unsigned short Value16 = SplatBits;
1700 SDValue T = DAG.getConstant(Value16, EltVT);
1701 SmallVector<SDValue, 8> Ops;
1704 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1707 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1708 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1711 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1721 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1723 uint32_t upper = uint32_t(SplatVal >> 32);
1724 uint32_t lower = uint32_t(SplatVal);
1726 if (upper == lower) {
1727 // Magic constant that can be matched by IL, ILA, et. al.
1728 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1729 return DAG.getNode(ISD::BITCAST, dl, OpVT,
1730 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1731 Val, Val, Val, Val));
1733 bool upper_special, lower_special;
1735 // NOTE: This code creates common-case shuffle masks that can be easily
1736 // detected as common expressions. It is not attempting to create highly
1737 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1739 // Detect if the upper or lower half is a special shuffle mask pattern:
1740 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1741 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1743 // Both upper and lower are special, lower to a constant pool load:
1744 if (lower_special && upper_special) {
1745 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1746 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1747 SplatValCN, SplatValCN);
1752 SmallVector<SDValue, 16> ShufBytes;
1755 // Create lower vector if not a special pattern
1756 if (!lower_special) {
1757 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1758 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1759 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1760 LO32C, LO32C, LO32C, LO32C));
1763 // Create upper vector if not a special pattern
1764 if (!upper_special) {
1765 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1766 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1767 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1768 HI32C, HI32C, HI32C, HI32C));
1771 // If either upper or lower are special, then the two input operands are
1772 // the same (basically, one of them is a "don't care")
1778 for (int i = 0; i < 4; ++i) {
1780 for (int j = 0; j < 4; ++j) {
1782 bool process_upper, process_lower;
1784 process_upper = (upper_special && (i & 1) == 0);
1785 process_lower = (lower_special && (i & 1) == 1);
1787 if (process_upper || process_lower) {
1788 if ((process_upper && upper == 0)
1789 || (process_lower && lower == 0))
1791 else if ((process_upper && upper == 0xffffffff)
1792 || (process_lower && lower == 0xffffffff))
1794 else if ((process_upper && upper == 0x80000000)
1795 || (process_lower && lower == 0x80000000))
1796 val |= (j == 0 ? 0xe0 : 0x80);
1798 val |= i * 4 + j + ((i & 1) * 16);
1801 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1804 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1805 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1806 &ShufBytes[0], ShufBytes.size()));
1810 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1811 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1812 /// permutation vector, V3, is monotonically increasing with one "exception"
1813 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1814 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1815 /// In either case, the net result is going to eventually invoke SHUFB to
1816 /// permute/shuffle the bytes from V1 and V2.
1818 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1819 /// control word for byte/halfword/word insertion. This takes care of a single
1820 /// element move from V2 into V1.
1822 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1823 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1824 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1825 SDValue V1 = Op.getOperand(0);
1826 SDValue V2 = Op.getOperand(1);
1827 DebugLoc dl = Op.getDebugLoc();
1829 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1831 // If we have a single element being moved from V1 to V2, this can be handled
1832 // using the C*[DX] compute mask instructions, but the vector elements have
1833 // to be monotonically increasing with one exception element, and the source
1834 // slot of the element to move must be the same as the destination.
1835 EVT VecVT = V1.getValueType();
1836 EVT EltVT = VecVT.getVectorElementType();
1837 unsigned EltsFromV2 = 0;
1838 unsigned V2EltOffset = 0;
1839 unsigned V2EltIdx0 = 0;
1840 unsigned CurrElt = 0;
1841 unsigned MaxElts = VecVT.getVectorNumElements();
1842 unsigned PrevElt = 0;
1843 bool monotonic = true;
1846 EVT maskVT; // which of the c?d instructions to use
1848 if (EltVT == MVT::i8) {
1850 maskVT = MVT::v16i8;
1851 } else if (EltVT == MVT::i16) {
1853 maskVT = MVT::v8i16;
1854 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1856 maskVT = MVT::v4i32;
1857 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1859 maskVT = MVT::v2i64;
1861 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1863 for (unsigned i = 0; i != MaxElts; ++i) {
1864 if (SVN->getMaskElt(i) < 0)
1867 unsigned SrcElt = SVN->getMaskElt(i);
1870 if (SrcElt >= V2EltIdx0) {
1871 // TODO: optimize for the monotonic case when several consecutive
1872 // elements are taken form V2. Do we ever get such a case?
1873 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1874 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1878 } else if (CurrElt != SrcElt) {
1886 if (PrevElt > 0 && SrcElt < MaxElts) {
1887 if ((PrevElt == SrcElt - 1)
1888 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1893 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1894 // First time or after a "wrap around"
1898 // This isn't a rotation, takes elements from vector 2
1904 if (EltsFromV2 == 1 && monotonic) {
1905 // Compute mask and shuffle
1906 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1908 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1909 // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1910 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1911 DAG.getRegister(SPU::R1, PtrVT),
1912 DAG.getConstant(V2EltOffset, MVT::i32));
1913 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1916 // Use shuffle mask in SHUFB synthetic instruction:
1917 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1919 } else if (rotate) {
1922 rotamt *= EltVT.getSizeInBits()/8;
1923 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1924 V1, DAG.getConstant(rotamt, MVT::i16));
1926 // Convert the SHUFFLE_VECTOR mask's input element units to the
1928 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1930 SmallVector<SDValue, 16> ResultMask;
1931 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1932 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1934 for (unsigned j = 0; j < BytesPerElement; ++j)
1935 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1937 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1938 &ResultMask[0], ResultMask.size());
1939 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1943 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1944 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1945 DebugLoc dl = Op.getDebugLoc();
1947 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1948 // For a constant, build the appropriate constant vector, which will
1949 // eventually simplify to a vector register load.
1951 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1952 SmallVector<SDValue, 16> ConstVecValues;
1956 // Create a constant vector:
1957 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1958 default: llvm_unreachable("Unexpected constant value type in "
1959 "LowerSCALAR_TO_VECTOR");
1960 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1961 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1962 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1963 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1964 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1965 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1968 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1969 for (size_t j = 0; j < n_copies; ++j)
1970 ConstVecValues.push_back(CValue);
1972 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1973 &ConstVecValues[0], ConstVecValues.size());
1975 // Otherwise, copy the value from one register to another:
1976 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1977 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1984 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1991 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1992 EVT VT = Op.getValueType();
1993 SDValue N = Op.getOperand(0);
1994 SDValue Elt = Op.getOperand(1);
1995 DebugLoc dl = Op.getDebugLoc();
1998 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1999 // Constant argument:
2000 int EltNo = (int) C->getZExtValue();
2003 if (VT == MVT::i8 && EltNo >= 16)
2004 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2005 else if (VT == MVT::i16 && EltNo >= 8)
2006 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2007 else if (VT == MVT::i32 && EltNo >= 4)
2008 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2009 else if (VT == MVT::i64 && EltNo >= 2)
2010 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2012 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2013 // i32 and i64: Element 0 is the preferred slot
2014 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2017 // Need to generate shuffle mask and extract:
2018 int prefslot_begin = -1, prefslot_end = -1;
2019 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2021 switch (VT.getSimpleVT().SimpleTy) {
2023 assert(false && "Invalid value type!");
2025 prefslot_begin = prefslot_end = 3;
2029 prefslot_begin = 2; prefslot_end = 3;
2034 prefslot_begin = 0; prefslot_end = 3;
2039 prefslot_begin = 0; prefslot_end = 7;
2044 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2045 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2047 unsigned int ShufBytes[16] = {
2048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2050 for (int i = 0; i < 16; ++i) {
2051 // zero fill uppper part of preferred slot, don't care about the
2053 unsigned int mask_val;
2054 if (i <= prefslot_end) {
2056 ((i < prefslot_begin)
2058 : elt_byte + (i - prefslot_begin));
2060 ShufBytes[i] = mask_val;
2062 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2065 SDValue ShufMask[4];
2066 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2067 unsigned bidx = i * 4;
2068 unsigned int bits = ((ShufBytes[bidx] << 24) |
2069 (ShufBytes[bidx+1] << 16) |
2070 (ShufBytes[bidx+2] << 8) |
2072 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2075 SDValue ShufMaskVec =
2076 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2077 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2079 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2080 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2081 N, N, ShufMaskVec));
2083 // Variable index: Rotate the requested element into slot 0, then replicate
2084 // slot 0 across the vector
2085 EVT VecVT = N.getValueType();
2086 if (!VecVT.isSimple() || !VecVT.isVector()) {
2087 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2091 // Make life easier by making sure the index is zero-extended to i32
2092 if (Elt.getValueType() != MVT::i32)
2093 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2095 // Scale the index to a bit/byte shift quantity
2097 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2098 unsigned scaleShift = scaleFactor.logBase2();
2101 if (scaleShift > 0) {
2102 // Scale the shift factor:
2103 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2104 DAG.getConstant(scaleShift, MVT::i32));
2107 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2109 // Replicate the bytes starting at byte 0 across the entire vector (for
2110 // consistency with the notion of a unified register set)
2113 switch (VT.getSimpleVT().SimpleTy) {
2115 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2119 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2120 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2121 factor, factor, factor, factor);
2125 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2126 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2127 factor, factor, factor, factor);
2132 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2133 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2134 factor, factor, factor, factor);
2139 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2140 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2141 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2142 loFactor, hiFactor, loFactor, hiFactor);
2147 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2148 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2149 vecShift, vecShift, replicate));
2155 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2156 SDValue VecOp = Op.getOperand(0);
2157 SDValue ValOp = Op.getOperand(1);
2158 SDValue IdxOp = Op.getOperand(2);
2159 DebugLoc dl = Op.getDebugLoc();
2160 EVT VT = Op.getValueType();
2161 EVT eltVT = ValOp.getValueType();
2163 // use 0 when the lane to insert to is 'undef'
2165 if (IdxOp.getOpcode() != ISD::UNDEF) {
2166 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2167 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2168 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2171 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2172 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2173 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2174 DAG.getRegister(SPU::R1, PtrVT),
2175 DAG.getConstant(Offset, PtrVT));
2176 // widen the mask when dealing with half vectors
2177 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2178 128/ VT.getVectorElementType().getSizeInBits());
2179 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2182 DAG.getNode(SPUISD::SHUFB, dl, VT,
2183 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2185 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2190 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2191 const TargetLowering &TLI)
2193 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2194 DebugLoc dl = Op.getDebugLoc();
2195 EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
2197 assert(Op.getValueType() == MVT::i8);
2200 llvm_unreachable("Unhandled i8 math operator");
2204 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2206 SDValue N1 = Op.getOperand(1);
2207 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2208 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2209 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2210 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2215 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2217 SDValue N1 = Op.getOperand(1);
2218 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2219 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2220 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2221 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2225 SDValue N1 = Op.getOperand(1);
2226 EVT N1VT = N1.getValueType();
2228 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2229 if (!N1VT.bitsEq(ShiftVT)) {
2230 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2233 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2236 // Replicate lower 8-bits into upper 8:
2238 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2239 DAG.getNode(ISD::SHL, dl, MVT::i16,
2240 N0, DAG.getConstant(8, MVT::i32)));
2242 // Truncate back down to i8
2243 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2244 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2248 SDValue N1 = Op.getOperand(1);
2249 EVT N1VT = N1.getValueType();
2251 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2252 if (!N1VT.bitsEq(ShiftVT)) {
2253 unsigned N1Opc = ISD::ZERO_EXTEND;
2255 if (N1.getValueType().bitsGT(ShiftVT))
2256 N1Opc = ISD::TRUNCATE;
2258 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2261 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2262 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2265 SDValue N1 = Op.getOperand(1);
2266 EVT N1VT = N1.getValueType();
2268 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2269 if (!N1VT.bitsEq(ShiftVT)) {
2270 unsigned N1Opc = ISD::SIGN_EXTEND;
2272 if (N1VT.bitsGT(ShiftVT))
2273 N1Opc = ISD::TRUNCATE;
2274 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2277 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2278 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2281 SDValue N1 = Op.getOperand(1);
2283 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2284 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2285 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2286 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2294 //! Lower byte immediate operations for v16i8 vectors:
2296 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2299 EVT VT = Op.getValueType();
2300 DebugLoc dl = Op.getDebugLoc();
2302 ConstVec = Op.getOperand(0);
2303 Arg = Op.getOperand(1);
2304 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2305 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2306 ConstVec = ConstVec.getOperand(0);
2308 ConstVec = Op.getOperand(1);
2309 Arg = Op.getOperand(0);
2310 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2311 ConstVec = ConstVec.getOperand(0);
2316 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2317 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2318 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2320 APInt APSplatBits, APSplatUndef;
2321 unsigned SplatBitSize;
2323 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2325 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2326 HasAnyUndefs, minSplatBits)
2327 && minSplatBits <= SplatBitSize) {
2328 uint64_t SplatBits = APSplatBits.getZExtValue();
2329 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2331 SmallVector<SDValue, 16> tcVec;
2332 tcVec.assign(16, tc);
2333 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2334 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2338 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2339 // lowered. Return the operation, rather than a null SDValue.
2343 //! Custom lowering for CTPOP (count population)
2345 Custom lowering code that counts the number ones in the input
2346 operand. SPU has such an instruction, but it counts the number of
2347 ones per byte, which then have to be accumulated.
2349 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2350 EVT VT = Op.getValueType();
2351 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2352 VT, (128 / VT.getSizeInBits()));
2353 DebugLoc dl = Op.getDebugLoc();
2355 switch (VT.getSimpleVT().SimpleTy) {
2357 assert(false && "Invalid value type!");
2359 SDValue N = Op.getOperand(0);
2360 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2362 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2363 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2365 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2369 MachineFunction &MF = DAG.getMachineFunction();
2370 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2372 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2374 SDValue N = Op.getOperand(0);
2375 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2376 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2377 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2379 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2380 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2382 // CNTB_result becomes the chain to which all of the virtual registers
2383 // CNTB_reg, SUM1_reg become associated:
2384 SDValue CNTB_result =
2385 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2387 SDValue CNTB_rescopy =
2388 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2390 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2392 return DAG.getNode(ISD::AND, dl, MVT::i16,
2393 DAG.getNode(ISD::ADD, dl, MVT::i16,
2394 DAG.getNode(ISD::SRL, dl, MVT::i16,
2401 MachineFunction &MF = DAG.getMachineFunction();
2402 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2404 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2405 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2407 SDValue N = Op.getOperand(0);
2408 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2409 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2410 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2411 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2413 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2414 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2416 // CNTB_result becomes the chain to which all of the virtual registers
2417 // CNTB_reg, SUM1_reg become associated:
2418 SDValue CNTB_result =
2419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2421 SDValue CNTB_rescopy =
2422 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2425 DAG.getNode(ISD::SRL, dl, MVT::i32,
2426 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2430 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2431 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2433 SDValue Sum1_rescopy =
2434 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2437 DAG.getNode(ISD::SRL, dl, MVT::i32,
2438 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2441 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2442 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2444 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2454 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2456 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2457 All conversions to i64 are expanded to a libcall.
2459 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2460 const SPUTargetLowering &TLI) {
2461 EVT OpVT = Op.getValueType();
2462 SDValue Op0 = Op.getOperand(0);
2463 EVT Op0VT = Op0.getValueType();
2465 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2466 || OpVT == MVT::i64) {
2467 // Convert f32 / f64 to i32 / i64 via libcall.
2469 (Op.getOpcode() == ISD::FP_TO_SINT)
2470 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2471 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2472 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2474 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2480 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2482 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2483 All conversions from i64 are expanded to a libcall.
2485 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2486 const SPUTargetLowering &TLI) {
2487 EVT OpVT = Op.getValueType();
2488 SDValue Op0 = Op.getOperand(0);
2489 EVT Op0VT = Op0.getValueType();
2491 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2492 || Op0VT == MVT::i64) {
2493 // Convert i32, i64 to f64 via libcall:
2495 (Op.getOpcode() == ISD::SINT_TO_FP)
2496 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2497 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2498 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2500 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2506 //! Lower ISD::SETCC
2508 This handles MVT::f64 (double floating point) condition lowering
2510 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2511 const TargetLowering &TLI) {
2512 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2513 DebugLoc dl = Op.getDebugLoc();
2514 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2516 SDValue lhs = Op.getOperand(0);
2517 SDValue rhs = Op.getOperand(1);
2518 EVT lhsVT = lhs.getValueType();
2519 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2521 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2522 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2523 EVT IntVT(MVT::i64);
2525 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2526 // selected to a NOP:
2527 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2529 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2530 DAG.getNode(ISD::SRL, dl, IntVT,
2531 i64lhs, DAG.getConstant(32, MVT::i32)));
2532 SDValue lhsHi32abs =
2533 DAG.getNode(ISD::AND, dl, MVT::i32,
2534 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2536 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2538 // SETO and SETUO only use the lhs operand:
2539 if (CC->get() == ISD::SETO) {
2540 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2542 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2543 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2544 DAG.getSetCC(dl, ccResultVT,
2545 lhs, DAG.getConstantFP(0.0, lhsVT),
2547 DAG.getConstant(ccResultAllOnes, ccResultVT));
2548 } else if (CC->get() == ISD::SETUO) {
2549 // Evaluates to true if Op0 is [SQ]NaN
2550 return DAG.getNode(ISD::AND, dl, ccResultVT,
2551 DAG.getSetCC(dl, ccResultVT,
2553 DAG.getConstant(0x7ff00000, MVT::i32),
2555 DAG.getSetCC(dl, ccResultVT,
2557 DAG.getConstant(0, MVT::i32),
2561 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2563 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2564 DAG.getNode(ISD::SRL, dl, IntVT,
2565 i64rhs, DAG.getConstant(32, MVT::i32)));
2567 // If a value is negative, subtract from the sign magnitude constant:
2568 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2570 // Convert the sign-magnitude representation into 2's complement:
2571 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2572 lhsHi32, DAG.getConstant(31, MVT::i32));
2573 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2575 DAG.getNode(ISD::SELECT, dl, IntVT,
2576 lhsSelectMask, lhsSignMag2TC, i64lhs);
2578 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2579 rhsHi32, DAG.getConstant(31, MVT::i32));
2580 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2582 DAG.getNode(ISD::SELECT, dl, IntVT,
2583 rhsSelectMask, rhsSignMag2TC, i64rhs);
2587 switch (CC->get()) {
2590 compareOp = ISD::SETEQ; break;
2593 compareOp = ISD::SETGT; break;
2596 compareOp = ISD::SETGE; break;
2599 compareOp = ISD::SETLT; break;
2602 compareOp = ISD::SETLE; break;
2605 compareOp = ISD::SETNE; break;
2607 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2611 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2612 (ISD::CondCode) compareOp);
2614 if ((CC->get() & 0x8) == 0) {
2615 // Ordered comparison:
2616 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2617 lhs, DAG.getConstantFP(0.0, MVT::f64),
2619 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2620 rhs, DAG.getConstantFP(0.0, MVT::f64),
2622 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2624 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2630 //! Lower ISD::SELECT_CC
2632 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2635 \note Need to revisit this in the future: if the code path through the true
2636 and false value computations is longer than the latency of a branch (6
2637 cycles), then it would be more advantageous to branch and insert a new basic
2638 block and branch on the condition. However, this code does not make that
2639 assumption, given the simplisitc uses so far.
2642 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2643 const TargetLowering &TLI) {
2644 EVT VT = Op.getValueType();
2645 SDValue lhs = Op.getOperand(0);
2646 SDValue rhs = Op.getOperand(1);
2647 SDValue trueval = Op.getOperand(2);
2648 SDValue falseval = Op.getOperand(3);
2649 SDValue condition = Op.getOperand(4);
2650 DebugLoc dl = Op.getDebugLoc();
2652 // NOTE: SELB's arguments: $rA, $rB, $mask
2654 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2655 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2656 // condition was true and 0s where the condition was false. Hence, the
2657 // arguments to SELB get reversed.
2659 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2660 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2661 // with another "cannot select select_cc" assert:
2663 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2664 TLI.getSetCCResultType(Op.getValueType()),
2665 lhs, rhs, condition);
2666 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2669 //! Custom lower ISD::TRUNCATE
2670 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2672 // Type to truncate to
2673 EVT VT = Op.getValueType();
2674 MVT simpleVT = VT.getSimpleVT();
2675 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2676 VT, (128 / VT.getSizeInBits()));
2677 DebugLoc dl = Op.getDebugLoc();
2679 // Type to truncate from
2680 SDValue Op0 = Op.getOperand(0);
2681 EVT Op0VT = Op0.getValueType();
2683 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2684 // Create shuffle mask, least significant doubleword of quadword
2685 unsigned maskHigh = 0x08090a0b;
2686 unsigned maskLow = 0x0c0d0e0f;
2687 // Use a shuffle to perform the truncation
2688 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2689 DAG.getConstant(maskHigh, MVT::i32),
2690 DAG.getConstant(maskLow, MVT::i32),
2691 DAG.getConstant(maskHigh, MVT::i32),
2692 DAG.getConstant(maskLow, MVT::i32));
2694 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2695 Op0, Op0, shufMask);
2697 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2700 return SDValue(); // Leave the truncate unmolested
2704 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2705 * algorithm is to duplicate the sign bit using rotmai to generate at
2706 * least one byte full of sign bits. Then propagate the "sign-byte" into
2707 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2709 * @param Op The sext operand
2710 * @param DAG The current DAG
2711 * @return The SDValue with the entire instruction sequence
2713 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2715 DebugLoc dl = Op.getDebugLoc();
2717 // Type to extend to
2718 MVT OpVT = Op.getValueType().getSimpleVT();
2720 // Type to extend from
2721 SDValue Op0 = Op.getOperand(0);
2722 MVT Op0VT = Op0.getValueType().getSimpleVT();
2724 // extend i8 & i16 via i32
2725 if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
2726 Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
2730 // The type to extend to needs to be a i128 and
2731 // the type to extend from needs to be i64 or i32.
2732 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2733 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2736 // Create shuffle mask
2737 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2738 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2739 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2740 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2741 DAG.getConstant(mask1, MVT::i32),
2742 DAG.getConstant(mask1, MVT::i32),
2743 DAG.getConstant(mask2, MVT::i32),
2744 DAG.getConstant(mask3, MVT::i32));
2746 // Word wise arithmetic right shift to generate at least one byte
2747 // that contains sign bits.
2748 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2749 SDValue sraVal = DAG.getNode(ISD::SRA,
2752 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2753 DAG.getConstant(31, MVT::i32));
2755 // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2756 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2758 DAG.getTargetConstant(
2759 SPU::GPRCRegClass.getID(),
2761 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2762 // and the input value into the lower 64 bits.
2763 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2764 extended, sraVal, shufMask);
2765 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2768 //! Custom (target-specific) lowering entry point
2770 This is where LLVM's DAG selection process calls to do target-specific
2774 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2776 unsigned Opc = (unsigned) Op.getOpcode();
2777 EVT VT = Op.getValueType();
2782 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2783 errs() << "Op.getOpcode() = " << Opc << "\n";
2784 errs() << "*Op.getNode():\n";
2785 Op.getNode()->dump();
2787 llvm_unreachable(0);
2793 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2795 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2796 case ISD::ConstantPool:
2797 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2798 case ISD::GlobalAddress:
2799 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2800 case ISD::JumpTable:
2801 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2802 case ISD::ConstantFP:
2803 return LowerConstantFP(Op, DAG);
2805 // i8, i64 math ops:
2814 return LowerI8Math(Op, DAG, Opc, *this);
2818 case ISD::FP_TO_SINT:
2819 case ISD::FP_TO_UINT:
2820 return LowerFP_TO_INT(Op, DAG, *this);
2822 case ISD::SINT_TO_FP:
2823 case ISD::UINT_TO_FP:
2824 return LowerINT_TO_FP(Op, DAG, *this);
2826 // Vector-related lowering.
2827 case ISD::BUILD_VECTOR:
2828 return LowerBUILD_VECTOR(Op, DAG);
2829 case ISD::SCALAR_TO_VECTOR:
2830 return LowerSCALAR_TO_VECTOR(Op, DAG);
2831 case ISD::VECTOR_SHUFFLE:
2832 return LowerVECTOR_SHUFFLE(Op, DAG);
2833 case ISD::EXTRACT_VECTOR_ELT:
2834 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2835 case ISD::INSERT_VECTOR_ELT:
2836 return LowerINSERT_VECTOR_ELT(Op, DAG);
2838 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2842 return LowerByteImmed(Op, DAG);
2844 // Vector and i8 multiply:
2847 return LowerI8Math(Op, DAG, Opc, *this);
2850 return LowerCTPOP(Op, DAG);
2852 case ISD::SELECT_CC:
2853 return LowerSELECT_CC(Op, DAG, *this);
2856 return LowerSETCC(Op, DAG, *this);
2859 return LowerTRUNCATE(Op, DAG);
2861 case ISD::SIGN_EXTEND:
2862 return LowerSIGN_EXTEND(Op, DAG);
2868 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2869 SmallVectorImpl<SDValue>&Results,
2870 SelectionDAG &DAG) const
2873 unsigned Opc = (unsigned) N->getOpcode();
2874 EVT OpVT = N->getValueType(0);
2878 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2879 errs() << "Op.getOpcode() = " << Opc << "\n";
2880 errs() << "*Op.getNode():\n";
2888 /* Otherwise, return unchanged */
2891 //===----------------------------------------------------------------------===//
2892 // Target Optimization Hooks
2893 //===----------------------------------------------------------------------===//
2896 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2899 TargetMachine &TM = getTargetMachine();
2901 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2902 SelectionDAG &DAG = DCI.DAG;
2903 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2904 EVT NodeVT = N->getValueType(0); // The node's value type
2905 EVT Op0VT = Op0.getValueType(); // The first operand's result
2906 SDValue Result; // Initially, empty result
2907 DebugLoc dl = N->getDebugLoc();
2909 switch (N->getOpcode()) {
2912 SDValue Op1 = N->getOperand(1);
2914 if (Op0.getOpcode() == SPUISD::IndirectAddr
2915 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2916 // Normalize the operands to reduce repeated code
2917 SDValue IndirectArg = Op0, AddArg = Op1;
2919 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2924 if (isa<ConstantSDNode>(AddArg)) {
2925 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2926 SDValue IndOp1 = IndirectArg.getOperand(1);
2928 if (CN0->isNullValue()) {
2929 // (add (SPUindirect <arg>, <arg>), 0) ->
2930 // (SPUindirect <arg>, <arg>)
2932 #if !defined(NDEBUG)
2933 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2935 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2936 << "With: (SPUindirect <arg>, <arg>)\n";
2941 } else if (isa<ConstantSDNode>(IndOp1)) {
2942 // (add (SPUindirect <arg>, <const>), <const>) ->
2943 // (SPUindirect <arg>, <const + const>)
2944 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2945 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2946 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2948 #if !defined(NDEBUG)
2949 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2951 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2952 << "), " << CN0->getSExtValue() << ")\n"
2953 << "With: (SPUindirect <arg>, "
2954 << combinedConst << ")\n";
2958 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2959 IndirectArg, combinedValue);
2965 case ISD::SIGN_EXTEND:
2966 case ISD::ZERO_EXTEND:
2967 case ISD::ANY_EXTEND: {
2968 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2969 // (any_extend (SPUextract_elt0 <arg>)) ->
2970 // (SPUextract_elt0 <arg>)
2971 // Types must match, however...
2972 #if !defined(NDEBUG)
2973 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2974 errs() << "\nReplace: ";
2976 errs() << "\nWith: ";
2977 Op0.getNode()->dump(&DAG);
2986 case SPUISD::IndirectAddr: {
2987 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2988 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2989 if (CN != 0 && CN->isNullValue()) {
2990 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2991 // (SPUaform <addr>, 0)
2993 DEBUG(errs() << "Replace: ");
2994 DEBUG(N->dump(&DAG));
2995 DEBUG(errs() << "\nWith: ");
2996 DEBUG(Op0.getNode()->dump(&DAG));
2997 DEBUG(errs() << "\n");
3001 } else if (Op0.getOpcode() == ISD::ADD) {
3002 SDValue Op1 = N->getOperand(1);
3003 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3004 // (SPUindirect (add <arg>, <arg>), 0) ->
3005 // (SPUindirect <arg>, <arg>)
3006 if (CN1->isNullValue()) {
3008 #if !defined(NDEBUG)
3009 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3011 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3012 << "With: (SPUindirect <arg>, <arg>)\n";
3016 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3017 Op0.getOperand(0), Op0.getOperand(1));
3023 case SPUISD::SHL_BITS:
3024 case SPUISD::SHL_BYTES:
3025 case SPUISD::ROTBYTES_LEFT: {
3026 SDValue Op1 = N->getOperand(1);
3028 // Kill degenerate vector shifts:
3029 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3030 if (CN->isNullValue()) {
3036 case SPUISD::PREFSLOT2VEC: {
3037 switch (Op0.getOpcode()) {
3040 case ISD::ANY_EXTEND:
3041 case ISD::ZERO_EXTEND:
3042 case ISD::SIGN_EXTEND: {
3043 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3045 // but only if the SPUprefslot2vec and <arg> types match.
3046 SDValue Op00 = Op0.getOperand(0);
3047 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3048 SDValue Op000 = Op00.getOperand(0);
3049 if (Op000.getValueType() == NodeVT) {
3055 case SPUISD::VEC2PREFSLOT: {
3056 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3058 Result = Op0.getOperand(0);
3066 // Otherwise, return unchanged.
3068 if (Result.getNode()) {
3069 DEBUG(errs() << "\nReplace.SPU: ");
3070 DEBUG(N->dump(&DAG));
3071 DEBUG(errs() << "\nWith: ");
3072 DEBUG(Result.getNode()->dump(&DAG));
3073 DEBUG(errs() << "\n");
3080 //===----------------------------------------------------------------------===//
3081 // Inline Assembly Support
3082 //===----------------------------------------------------------------------===//
3084 /// getConstraintType - Given a constraint letter, return the type of
3085 /// constraint it is for this target.
3086 SPUTargetLowering::ConstraintType
3087 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3088 if (ConstraintLetter.size() == 1) {
3089 switch (ConstraintLetter[0]) {
3096 return C_RegisterClass;
3099 return TargetLowering::getConstraintType(ConstraintLetter);
3102 /// Examine constraint type and operand type and determine a weight value.
3103 /// This object must already have been set up with the operand type
3104 /// and the current alternative constraint selected.
3105 TargetLowering::ConstraintWeight
3106 SPUTargetLowering::getSingleConstraintMatchWeight(
3107 AsmOperandInfo &info, const char *constraint) const {
3108 ConstraintWeight weight = CW_Invalid;
3109 Value *CallOperandVal = info.CallOperandVal;
3110 // If we don't have a value, we can't do a match,
3111 // but allow it at the lowest weight.
3112 if (CallOperandVal == NULL)
3114 // Look at the constraint type.
3115 switch (*constraint) {
3117 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3119 //FIXME: Seems like the supported constraint letters were just copied
3120 // from PPC, as the following doesn't correspond to the GCC docs.
3121 // I'm leaving it so until someone adds the corresponding lowering support.
3128 weight = CW_Register;
3134 std::pair<unsigned, const TargetRegisterClass*>
3135 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3138 if (Constraint.size() == 1) {
3139 // GCC RS6000 Constraint Letters
3140 switch (Constraint[0]) {
3144 return std::make_pair(0U, SPU::R64CRegisterClass);
3145 return std::make_pair(0U, SPU::R32CRegisterClass);
3148 return std::make_pair(0U, SPU::R32FPRegisterClass);
3149 else if (VT == MVT::f64)
3150 return std::make_pair(0U, SPU::R64FPRegisterClass);
3153 return std::make_pair(0U, SPU::GPRCRegisterClass);
3157 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3160 //! Compute used/known bits for a SPU operand
3162 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3166 const SelectionDAG &DAG,
3167 unsigned Depth ) const {
3169 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3171 switch (Op.getOpcode()) {
3173 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3179 case SPUISD::PREFSLOT2VEC:
3180 case SPUISD::LDRESULT:
3181 case SPUISD::VEC2PREFSLOT:
3182 case SPUISD::SHLQUAD_L_BITS:
3183 case SPUISD::SHLQUAD_L_BYTES:
3184 case SPUISD::VEC_ROTL:
3185 case SPUISD::VEC_ROTR:
3186 case SPUISD::ROTBYTES_LEFT:
3187 case SPUISD::SELECT_MASK:
3194 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3195 unsigned Depth) const {
3196 switch (Op.getOpcode()) {
3201 EVT VT = Op.getValueType();
3203 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3206 return VT.getSizeInBits();
3211 // LowerAsmOperandForConstraint
3213 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3214 std::string &Constraint,
3215 std::vector<SDValue> &Ops,
3216 SelectionDAG &DAG) const {
3217 // Default, for the time being, to the base class handler
3218 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3221 /// isLegalAddressImmediate - Return true if the integer value can be used
3222 /// as the offset of the target addressing mode.
3223 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3225 // SPU's addresses are 256K:
3226 return (V > -(1 << 18) && V < (1 << 18) - 1);
3229 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3234 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3235 // The SPU target isn't yet aware of offsets.
3239 // can we compare to Imm without writing it into a register?
3240 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3241 //ceqi, cgti, etc. all take s10 operand
3242 return isInt<10>(Imm);
3246 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3249 // A-form: 18bit absolute address.
3250 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3253 // D-form: reg + 14bit offset
3254 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3258 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)