2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "SPUMachineFunction.h"
19 #include "llvm/Constants.h"
20 #include "llvm/Function.h"
21 #include "llvm/Intrinsics.h"
22 #include "llvm/CallingConv.h"
23 #include "llvm/Type.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/Target/TargetOptions.h"
32 #include "llvm/ADT/VectorExtras.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
41 // Used in getTargetNodeName() below
43 std::map<unsigned, const char *> node_names;
45 // Byte offset of the preferred slot (counted from the MSB)
46 int prefslotOffset(EVT VT) {
48 if (VT==MVT::i1) retval=3;
49 if (VT==MVT::i8) retval=3;
50 if (VT==MVT::i16) retval=2;
55 //! Expand a library call into an actual call DAG node
58 This code is taken from SelectionDAGLegalize, since it is not exposed as
59 part of the LLVM SelectionDAG API.
63 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
64 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
65 // The input chain to this libcall is the entry node of the function.
66 // Legalizing the call will automatically add the previous call to the
68 SDValue InChain = DAG.getEntryNode();
70 TargetLowering::ArgListTy Args;
71 TargetLowering::ArgListEntry Entry;
72 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
73 EVT ArgVT = Op.getOperand(i).getValueType();
74 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
75 Entry.Node = Op.getOperand(i);
77 Entry.isSExt = isSigned;
78 Entry.isZExt = !isSigned;
79 Args.push_back(Entry);
81 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
84 // Splice the libcall in wherever FindInputOutputChains tells us to.
86 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
87 std::pair<SDValue, SDValue> CallInfo =
88 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
89 0, TLI.getLibcallCallingConv(LC), false,
90 /*isReturnValueUsed=*/true,
91 Callee, Args, DAG, Op.getDebugLoc());
93 return CallInfo.first;
97 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
98 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
100 // Fold away setcc operations if possible.
103 // Use _setjmp/_longjmp instead of setjmp/longjmp.
104 setUseUnderscoreSetJmp(true);
105 setUseUnderscoreLongJmp(true);
107 // Set RTLIB libcall names as used by SPU:
108 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
110 // Set up the SPU's register classes:
111 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
112 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
113 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
114 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
115 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
116 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
117 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
119 // SPU has no sign or zero extended loads for i1, i8, i16:
120 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
121 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
122 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
124 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
125 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
127 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
128 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
129 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
130 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
132 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
134 // SPU constant load actions are custom lowered:
135 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
136 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
138 // SPU's loads and stores have to be custom lowered:
139 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
141 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
143 setOperationAction(ISD::LOAD, VT, Custom);
144 setOperationAction(ISD::STORE, VT, Custom);
145 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
146 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
147 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
149 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
150 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
151 setTruncStoreAction(VT, StoreVT, Expand);
155 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
157 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
159 setOperationAction(ISD::LOAD, VT, Custom);
160 setOperationAction(ISD::STORE, VT, Custom);
162 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
163 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
164 setTruncStoreAction(VT, StoreVT, Expand);
168 // Expand the jumptable branches
169 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
170 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
172 // Custom lower SELECT_CC for most cases, but expand by default
173 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
174 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
175 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
176 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
177 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // SPU has no division/remainder instructions
183 setOperationAction(ISD::SREM, MVT::i8, Expand);
184 setOperationAction(ISD::UREM, MVT::i8, Expand);
185 setOperationAction(ISD::SDIV, MVT::i8, Expand);
186 setOperationAction(ISD::UDIV, MVT::i8, Expand);
187 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
188 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
189 setOperationAction(ISD::SREM, MVT::i16, Expand);
190 setOperationAction(ISD::UREM, MVT::i16, Expand);
191 setOperationAction(ISD::SDIV, MVT::i16, Expand);
192 setOperationAction(ISD::UDIV, MVT::i16, Expand);
193 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
194 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
195 setOperationAction(ISD::SREM, MVT::i32, Expand);
196 setOperationAction(ISD::UREM, MVT::i32, Expand);
197 setOperationAction(ISD::SDIV, MVT::i32, Expand);
198 setOperationAction(ISD::UDIV, MVT::i32, Expand);
199 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
200 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
201 setOperationAction(ISD::SREM, MVT::i64, Expand);
202 setOperationAction(ISD::UREM, MVT::i64, Expand);
203 setOperationAction(ISD::SDIV, MVT::i64, Expand);
204 setOperationAction(ISD::UDIV, MVT::i64, Expand);
205 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
206 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
207 setOperationAction(ISD::SREM, MVT::i128, Expand);
208 setOperationAction(ISD::UREM, MVT::i128, Expand);
209 setOperationAction(ISD::SDIV, MVT::i128, Expand);
210 setOperationAction(ISD::UDIV, MVT::i128, Expand);
211 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
212 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
214 // We don't support sin/cos/sqrt/fmod
215 setOperationAction(ISD::FSIN , MVT::f64, Expand);
216 setOperationAction(ISD::FCOS , MVT::f64, Expand);
217 setOperationAction(ISD::FREM , MVT::f64, Expand);
218 setOperationAction(ISD::FSIN , MVT::f32, Expand);
219 setOperationAction(ISD::FCOS , MVT::f32, Expand);
220 setOperationAction(ISD::FREM , MVT::f32, Expand);
222 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
224 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
225 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
230 // SPU can do rotate right and left, so legalize it... but customize for i8
231 // because instructions don't exist.
233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
239 setOperationAction(ISD::ROTL, MVT::i32, Legal);
240 setOperationAction(ISD::ROTL, MVT::i16, Legal);
241 setOperationAction(ISD::ROTL, MVT::i8, Custom);
243 // SPU has no native version of shift left/right for i8
244 setOperationAction(ISD::SHL, MVT::i8, Custom);
245 setOperationAction(ISD::SRL, MVT::i8, Custom);
246 setOperationAction(ISD::SRA, MVT::i8, Custom);
248 // Make these operations legal and handle them during instruction selection:
249 setOperationAction(ISD::SHL, MVT::i64, Legal);
250 setOperationAction(ISD::SRL, MVT::i64, Legal);
251 setOperationAction(ISD::SRA, MVT::i64, Legal);
253 // Custom lower i8, i32 and i64 multiplications
254 setOperationAction(ISD::MUL, MVT::i8, Custom);
255 setOperationAction(ISD::MUL, MVT::i32, Legal);
256 setOperationAction(ISD::MUL, MVT::i64, Legal);
258 // Expand double-width multiplication
259 // FIXME: It would probably be reasonable to support some of these operations
260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
262 setOperationAction(ISD::MULHU, MVT::i8, Expand);
263 setOperationAction(ISD::MULHS, MVT::i8, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
266 setOperationAction(ISD::MULHU, MVT::i16, Expand);
267 setOperationAction(ISD::MULHS, MVT::i16, Expand);
268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
270 setOperationAction(ISD::MULHU, MVT::i32, Expand);
271 setOperationAction(ISD::MULHS, MVT::i32, Expand);
272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
274 setOperationAction(ISD::MULHU, MVT::i64, Expand);
275 setOperationAction(ISD::MULHS, MVT::i64, Expand);
277 // Need to custom handle (some) common i8, i64 math ops
278 setOperationAction(ISD::ADD, MVT::i8, Custom);
279 setOperationAction(ISD::ADD, MVT::i64, Legal);
280 setOperationAction(ISD::SUB, MVT::i8, Custom);
281 setOperationAction(ISD::SUB, MVT::i64, Legal);
283 // SPU does not have BSWAP. It does have i32 support CTLZ.
284 // CTPOP has to be custom lowered.
285 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
286 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
288 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
289 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
294 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
295 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
296 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
297 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
298 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
300 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
301 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
302 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
303 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
304 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
306 // SPU has a version of select that implements (a&~c)|(b&c), just like
307 // select ought to work:
308 setOperationAction(ISD::SELECT, MVT::i8, Legal);
309 setOperationAction(ISD::SELECT, MVT::i16, Legal);
310 setOperationAction(ISD::SELECT, MVT::i32, Legal);
311 setOperationAction(ISD::SELECT, MVT::i64, Legal);
313 setOperationAction(ISD::SETCC, MVT::i8, Legal);
314 setOperationAction(ISD::SETCC, MVT::i16, Legal);
315 setOperationAction(ISD::SETCC, MVT::i32, Legal);
316 setOperationAction(ISD::SETCC, MVT::i64, Legal);
317 setOperationAction(ISD::SETCC, MVT::f64, Custom);
319 // Custom lower i128 -> i64 truncates
320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
322 // Custom lower i32/i64 -> i128 sign extend
323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
330 // to expand to a libcall, hence the custom lowering:
331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
338 // FDIV on SPU requires custom lowering
339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
351 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
352 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
353 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
354 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
356 // We cannot sextinreg(i1). Expand to shifts.
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
359 // We want to legalize GlobalAddress and ConstantPool nodes into the
360 // appropriate instructions to materialize the address.
361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
365 setOperationAction(ISD::GlobalAddress, VT, Custom);
366 setOperationAction(ISD::ConstantPool, VT, Custom);
367 setOperationAction(ISD::JumpTable, VT, Custom);
370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
371 setOperationAction(ISD::VASTART , MVT::Other, Custom);
373 // Use the default implementation.
374 setOperationAction(ISD::VAARG , MVT::Other, Expand);
375 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
376 setOperationAction(ISD::VAEND , MVT::Other, Expand);
377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
382 // Cell SPU has instructions for converting between i64 and fp.
383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
392 // First set operation action for all vector types to expand. Then we
393 // will selectively turn on ones that can be effectively codegen'd.
394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
405 // add/sub are legal for all supported vector VT's.
406 setOperationAction(ISD::ADD, VT, Legal);
407 setOperationAction(ISD::SUB, VT, Legal);
408 // mul has to be custom lowered.
409 setOperationAction(ISD::MUL, VT, Legal);
411 setOperationAction(ISD::AND, VT, Legal);
412 setOperationAction(ISD::OR, VT, Legal);
413 setOperationAction(ISD::XOR, VT, Legal);
414 setOperationAction(ISD::LOAD, VT, Custom);
415 setOperationAction(ISD::SELECT, VT, Legal);
416 setOperationAction(ISD::STORE, VT, Custom);
418 // These operations need to be expanded:
419 setOperationAction(ISD::SDIV, VT, Expand);
420 setOperationAction(ISD::SREM, VT, Expand);
421 setOperationAction(ISD::UDIV, VT, Expand);
422 setOperationAction(ISD::UREM, VT, Expand);
424 // Custom lower build_vector, constant pool spills, insert and
425 // extract vector elements:
426 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
427 setOperationAction(ISD::ConstantPool, VT, Custom);
428 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
429 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
430 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
431 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
434 setOperationAction(ISD::AND, MVT::v16i8, Custom);
435 setOperationAction(ISD::OR, MVT::v16i8, Custom);
436 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
437 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
439 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
441 setShiftAmountType(MVT::i32);
442 setBooleanContents(ZeroOrNegativeOneBooleanContent);
444 setStackPointerRegisterToSaveRestore(SPU::R1);
446 // We have target-specific dag combine patterns for the following nodes:
447 setTargetDAGCombine(ISD::ADD);
448 setTargetDAGCombine(ISD::ZERO_EXTEND);
449 setTargetDAGCombine(ISD::SIGN_EXTEND);
450 setTargetDAGCombine(ISD::ANY_EXTEND);
452 computeRegisterProperties();
454 // Set pre-RA register scheduler default to BURR, which produces slightly
455 // better code than the default (could also be TDRR, but TargetLowering.h
456 // needs a mod to support that model):
457 setSchedulingPreference(Sched::RegPressure);
461 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
463 if (node_names.empty()) {
464 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
465 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
466 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
467 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
468 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
469 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
470 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
471 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
472 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
473 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
474 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
475 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
476 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
477 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
478 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
479 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
480 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
481 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
482 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
483 "SPUISD::ROTBYTES_LEFT_BITS";
484 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
485 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
486 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
487 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
488 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
491 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
493 return ((i != node_names.end()) ? i->second : 0);
496 /// getFunctionAlignment - Return the Log2 alignment of this function.
497 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
501 //===----------------------------------------------------------------------===//
502 // Return the Cell SPU's SETCC result type
503 //===----------------------------------------------------------------------===//
505 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
506 // i16 and i32 are valid SETCC result types
507 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
508 VT.getSimpleVT().SimpleTy :
512 //===----------------------------------------------------------------------===//
513 // Calling convention code:
514 //===----------------------------------------------------------------------===//
516 #include "SPUGenCallingConv.inc"
518 //===----------------------------------------------------------------------===//
519 // LowerOperation implementation
520 //===----------------------------------------------------------------------===//
522 /// Custom lower loads for CellSPU
524 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
525 within a 16-byte block, we have to rotate to extract the requested element.
527 For extending loads, we also want to ensure that the following sequence is
528 emitted, e.g. for MVT::f32 extending load to MVT::f64:
532 %2 v16i8,ch = rotate %1
533 %3 v4f8, ch = bitconvert %2
534 %4 f32 = vec2perfslot %3
535 %5 f64 = fp_extend %4
539 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
540 LoadSDNode *LN = cast<LoadSDNode>(Op);
541 SDValue the_chain = LN->getChain();
542 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
543 EVT InVT = LN->getMemoryVT();
544 EVT OutVT = Op.getValueType();
545 ISD::LoadExtType ExtType = LN->getExtensionType();
546 unsigned alignment = LN->getAlignment();
547 int pso = prefslotOffset(InVT);
548 DebugLoc dl = Op.getDebugLoc();
549 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
550 (128 / InVT.getSizeInBits()));
553 assert( LN->getAddressingMode() == ISD::UNINDEXED
554 && "we should get only UNINDEXED adresses");
555 // clean aligned loads can be selected as-is
556 if (InVT.getSizeInBits() == 128 && alignment == 16)
559 // Get pointerinfos to the memory chunk(s) that contain the data to load
560 uint64_t mpi_offset = LN->getPointerInfo().Offset;
561 mpi_offset -= mpi_offset%16;
562 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
563 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
566 SDValue basePtr = LN->getBasePtr();
569 if (alignment == 16) {
572 // Special cases for a known aligned load to simplify the base pointer
573 // and the rotation amount:
574 if (basePtr.getOpcode() == ISD::ADD
575 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
576 // Known offset into basePtr
577 int64_t offset = CN->getSExtValue();
578 int64_t rotamt = int64_t((offset & 0xf) - pso);
583 rotate = DAG.getConstant(rotamt, MVT::i16);
585 // Simplify the base pointer for this case:
586 basePtr = basePtr.getOperand(0);
587 if ((offset & ~0xf) > 0) {
588 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
590 DAG.getConstant((offset & ~0xf), PtrVT));
592 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
593 || (basePtr.getOpcode() == SPUISD::IndirectAddr
594 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
595 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
596 // Plain aligned a-form address: rotate into preferred slot
597 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
598 int64_t rotamt = -pso;
601 rotate = DAG.getConstant(rotamt, MVT::i16);
603 // Offset the rotate amount by the basePtr and the preferred slot
605 int64_t rotamt = -pso;
608 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
610 DAG.getConstant(rotamt, PtrVT));
613 // Unaligned load: must be more pessimistic about addressing modes:
614 if (basePtr.getOpcode() == ISD::ADD) {
615 MachineFunction &MF = DAG.getMachineFunction();
616 MachineRegisterInfo &RegInfo = MF.getRegInfo();
617 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
620 SDValue Op0 = basePtr.getOperand(0);
621 SDValue Op1 = basePtr.getOperand(1);
623 if (isa<ConstantSDNode>(Op1)) {
624 // Convert the (add <ptr>, <const>) to an indirect address contained
625 // in a register. Note that this is done because we need to avoid
626 // creating a 0(reg) d-form address due to the SPU's block loads.
627 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
628 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
629 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
631 // Convert the (add <arg1>, <arg2>) to an indirect address, which
632 // will likely be lowered as a reg(reg) x-form address.
633 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
636 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
638 DAG.getConstant(0, PtrVT));
641 // Offset the rotate amount by the basePtr and the preferred slot
643 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
645 DAG.getConstant(-pso, PtrVT));
648 // Do the load as a i128 to allow possible shifting
649 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
651 LN->isVolatile(), LN->isNonTemporal(), 16);
653 // When the size is not greater than alignment we get all data with just
655 if (alignment >= InVT.getSizeInBits()/8) {
657 the_chain = low.getValue(1);
659 // Rotate into the preferred slot:
660 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
661 low.getValue(0), rotate);
663 // Convert the loaded v16i8 vector to the appropriate vector type
664 // specified by the operand:
665 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
666 InVT, (128 / InVT.getSizeInBits()));
667 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
668 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
670 // When alignment is less than the size, we might need (known only at
671 // run-time) two loads
672 // TODO: if the memory address is composed only from constants, we have
673 // extra kowledge, and might avoid the second load
675 // storage position offset from lower 16 byte aligned memory chunk
676 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
677 basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
679 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
680 DAG.getConstant( 16, MVT::i32),
682 // get a registerfull of ones. (this implementation is a workaround: LLVM
683 // cannot handle 128 bit signed int constants)
684 SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
685 ones = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, ones);
687 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
688 DAG.getNode(ISD::ADD, dl, PtrVT,
690 DAG.getConstant(16, PtrVT)),
692 LN->isVolatile(), LN->isNonTemporal(), 16);
694 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
697 // Shift the (possible) high part right to compensate the misalignemnt.
698 // if there is no highpart (i.e. value is i64 and offset is 4), this
699 // will zero out the high value.
700 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
701 DAG.getNode(ISD::SUB, dl, MVT::i32,
702 DAG.getConstant( 16, MVT::i32),
706 // Shift the low similarily
707 // TODO: add SPUISD::SHL_BYTES
708 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
710 // Merge the two parts
711 result = DAG.getNode(ISD::BIT_CONVERT, dl, vecVT,
712 DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
714 if (!InVT.isVector()) {
715 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
719 // Handle extending loads by extending the scalar result:
720 if (ExtType == ISD::SEXTLOAD) {
721 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
722 } else if (ExtType == ISD::ZEXTLOAD) {
723 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
724 } else if (ExtType == ISD::EXTLOAD) {
725 unsigned NewOpc = ISD::ANY_EXTEND;
727 if (OutVT.isFloatingPoint())
728 NewOpc = ISD::FP_EXTEND;
730 result = DAG.getNode(NewOpc, dl, OutVT, result);
733 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
734 SDValue retops[2] = {
739 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
740 retops, sizeof(retops) / sizeof(retops[0]));
744 /// Custom lower stores for CellSPU
746 All CellSPU stores are aligned to 16-byte boundaries, so for elements
747 within a 16-byte block, we have to generate a shuffle to insert the
748 requested element into its place, then store the resulting block.
751 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
752 StoreSDNode *SN = cast<StoreSDNode>(Op);
753 SDValue Value = SN->getValue();
754 EVT VT = Value.getValueType();
755 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
756 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
757 DebugLoc dl = Op.getDebugLoc();
758 unsigned alignment = SN->getAlignment();
760 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
761 (128 / StVT.getSizeInBits()));
762 // Get pointerinfos to the memory chunk(s) that contain the data to load
763 uint64_t mpi_offset = SN->getPointerInfo().Offset;
764 mpi_offset -= mpi_offset%16;
765 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
766 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
770 assert( SN->getAddressingMode() == ISD::UNINDEXED
771 && "we should get only UNINDEXED adresses");
772 // clean aligned loads can be selected as-is
773 if (StVT.getSizeInBits() == 128 && alignment == 16)
776 SDValue alignLoadVec;
777 SDValue basePtr = SN->getBasePtr();
778 SDValue the_chain = SN->getChain();
779 SDValue insertEltOffs;
781 if (alignment == 16) {
783 // Special cases for a known aligned load to simplify the base pointer
784 // and insertion byte:
785 if (basePtr.getOpcode() == ISD::ADD
786 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
787 // Known offset into basePtr
788 int64_t offset = CN->getSExtValue();
790 // Simplify the base pointer for this case:
791 basePtr = basePtr.getOperand(0);
792 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
794 DAG.getConstant((offset & 0xf), PtrVT));
796 if ((offset & ~0xf) > 0) {
797 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
799 DAG.getConstant((offset & ~0xf), PtrVT));
802 // Otherwise, assume it's at byte 0 of basePtr
803 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
805 DAG.getConstant(0, PtrVT));
806 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
808 DAG.getConstant(0, PtrVT));
811 // Unaligned load: must be more pessimistic about addressing modes:
812 if (basePtr.getOpcode() == ISD::ADD) {
813 MachineFunction &MF = DAG.getMachineFunction();
814 MachineRegisterInfo &RegInfo = MF.getRegInfo();
815 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
818 SDValue Op0 = basePtr.getOperand(0);
819 SDValue Op1 = basePtr.getOperand(1);
821 if (isa<ConstantSDNode>(Op1)) {
822 // Convert the (add <ptr>, <const>) to an indirect address contained
823 // in a register. Note that this is done because we need to avoid
824 // creating a 0(reg) d-form address due to the SPU's block loads.
825 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
826 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
827 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
829 // Convert the (add <arg1>, <arg2>) to an indirect address, which
830 // will likely be lowered as a reg(reg) x-form address.
831 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
834 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
836 DAG.getConstant(0, PtrVT));
839 // Insertion point is solely determined by basePtr's contents
840 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
842 DAG.getConstant(0, PtrVT));
845 // Load the lower part of the memory to which to store.
846 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
847 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
849 // if we don't need to store over the 16 byte boundary, one store suffices
850 if (alignment >= StVT.getSizeInBits()/8) {
852 the_chain = low.getValue(1);
854 LoadSDNode *LN = cast<LoadSDNode>(low);
855 SDValue theValue = SN->getValue();
858 && (theValue.getOpcode() == ISD::AssertZext
859 || theValue.getOpcode() == ISD::AssertSext)) {
860 // Drill down and get the value for zero- and sign-extended
862 theValue = theValue.getOperand(0);
865 // If the base pointer is already a D-form address, then just create
866 // a new D-form address with a slot offset and the orignal base pointer.
867 // Otherwise generate a D-form address with the slot offset relative
868 // to the stack pointer, which is always aligned.
870 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
871 errs() << "CellSPU LowerSTORE: basePtr = ";
872 basePtr.getNode()->dump(&DAG);
877 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
879 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
882 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
884 DAG.getNode(ISD::BIT_CONVERT, dl,
885 MVT::v4i32, insertEltOp));
887 result = DAG.getStore(the_chain, dl, result, basePtr,
889 LN->isVolatile(), LN->isNonTemporal(),
893 // do the store when it might cross the 16 byte memory access boundary.
895 // TODO issue a warning if SN->isVolatile()== true? This is likely not
896 // what the user wanted.
898 // address offset from nearest lower 16byte alinged address
899 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
901 DAG.getConstant(0xf, MVT::i32));
903 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
904 DAG.getConstant( 16, MVT::i32),
906 SDValue hi_shift = DAG.getNode(ISD::SUB, dl, MVT::i32,
907 DAG.getConstant( VT.getSizeInBits()/8,
910 // 16 - sizeof(Value)
911 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
912 DAG.getConstant( 16, MVT::i32),
913 DAG.getConstant( VT.getSizeInBits()/8,
915 // get a registerfull of ones
916 SDValue ones = DAG.getConstant(-1, MVT::v4i32);
917 ones = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, ones);
919 // Create the 128 bit masks that have ones where the data to store is
921 SDValue lowmask, himask;
922 // if the value to store don't fill up the an entire 128 bits, zero
923 // out the last bits of the mask so that only the value we want to store
925 // this is e.g. in the case of store i32, align 2
927 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
928 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
929 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
931 Value = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, Value);
932 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
937 Value = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, Value);
939 // this will zero, if there are no data that goes to the high quad
940 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
942 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
945 // Load in the old data and zero out the parts that will be overwritten with
946 // the new data to store.
947 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
948 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
949 DAG.getConstant( 16, PtrVT)),
951 SN->isVolatile(), SN->isNonTemporal(), 16);
952 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
955 low = DAG.getNode(ISD::AND, dl, MVT::i128,
956 DAG.getNode( ISD::BIT_CONVERT, dl, MVT::i128, low),
957 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
958 hi = DAG.getNode(ISD::AND, dl, MVT::i128,
959 DAG.getNode( ISD::BIT_CONVERT, dl, MVT::i128, hi),
960 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
962 // Shift the Value to store into place. rlow contains the parts that go to
963 // the lower memory chunk, rhi has the parts that go to the upper one.
964 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
965 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
966 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
969 // Merge the old data and the new data and store the results
970 // Need to convert vectors here to integer as 'OR'ing floats assert
971 rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
972 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, low),
973 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, rlow));
974 rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
975 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, hi),
976 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, rhi));
978 low = DAG.getStore(the_chain, dl, rlow, basePtr,
980 SN->isVolatile(), SN->isNonTemporal(), 16);
981 hi = DAG.getStore(the_chain, dl, rhi,
982 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
983 DAG.getConstant( 16, PtrVT)),
985 SN->isVolatile(), SN->isNonTemporal(), 16);
986 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
993 //! Generate the address of a constant pool entry.
995 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
996 EVT PtrVT = Op.getValueType();
997 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
998 const Constant *C = CP->getConstVal();
999 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1000 SDValue Zero = DAG.getConstant(0, PtrVT);
1001 const TargetMachine &TM = DAG.getTarget();
1002 // FIXME there is no actual debug info here
1003 DebugLoc dl = Op.getDebugLoc();
1005 if (TM.getRelocationModel() == Reloc::Static) {
1006 if (!ST->usingLargeMem()) {
1007 // Just return the SDValue with the constant pool address in it.
1008 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1010 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1011 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1012 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1016 llvm_unreachable("LowerConstantPool: Relocation model other than static"
1021 //! Alternate entry point for generating the address of a constant pool entry
1023 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1024 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1028 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1029 EVT PtrVT = Op.getValueType();
1030 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1031 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1032 SDValue Zero = DAG.getConstant(0, PtrVT);
1033 const TargetMachine &TM = DAG.getTarget();
1034 // FIXME there is no actual debug info here
1035 DebugLoc dl = Op.getDebugLoc();
1037 if (TM.getRelocationModel() == Reloc::Static) {
1038 if (!ST->usingLargeMem()) {
1039 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1041 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1042 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1043 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1047 llvm_unreachable("LowerJumpTable: Relocation model other than static"
1053 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1054 EVT PtrVT = Op.getValueType();
1055 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1056 const GlobalValue *GV = GSDN->getGlobal();
1057 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1058 PtrVT, GSDN->getOffset());
1059 const TargetMachine &TM = DAG.getTarget();
1060 SDValue Zero = DAG.getConstant(0, PtrVT);
1061 // FIXME there is no actual debug info here
1062 DebugLoc dl = Op.getDebugLoc();
1064 if (TM.getRelocationModel() == Reloc::Static) {
1065 if (!ST->usingLargeMem()) {
1066 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1068 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1069 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1070 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1073 report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1081 //! Custom lower double precision floating point constants
1083 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1084 EVT VT = Op.getValueType();
1085 // FIXME there is no actual debug info here
1086 DebugLoc dl = Op.getDebugLoc();
1088 if (VT == MVT::f64) {
1089 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1092 "LowerConstantFP: Node is not ConstantFPSDNode");
1094 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1095 SDValue T = DAG.getConstant(dbits, MVT::i64);
1096 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1097 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1098 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1105 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1106 CallingConv::ID CallConv, bool isVarArg,
1107 const SmallVectorImpl<ISD::InputArg>
1109 DebugLoc dl, SelectionDAG &DAG,
1110 SmallVectorImpl<SDValue> &InVals)
1113 MachineFunction &MF = DAG.getMachineFunction();
1114 MachineFrameInfo *MFI = MF.getFrameInfo();
1115 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1116 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1118 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1119 unsigned ArgRegIdx = 0;
1120 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1122 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1124 SmallVector<CCValAssign, 16> ArgLocs;
1125 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1127 // FIXME: allow for other calling conventions
1128 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1130 // Add DAG nodes to load the arguments or copy them out of registers.
1131 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1132 EVT ObjectVT = Ins[ArgNo].VT;
1133 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1135 CCValAssign &VA = ArgLocs[ArgNo];
1137 if (VA.isRegLoc()) {
1138 const TargetRegisterClass *ArgRegClass;
1140 switch (ObjectVT.getSimpleVT().SimpleTy) {
1142 report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1143 Twine(ObjectVT.getEVTString()));
1145 ArgRegClass = &SPU::R8CRegClass;
1148 ArgRegClass = &SPU::R16CRegClass;
1151 ArgRegClass = &SPU::R32CRegClass;
1154 ArgRegClass = &SPU::R64CRegClass;
1157 ArgRegClass = &SPU::GPRCRegClass;
1160 ArgRegClass = &SPU::R32FPRegClass;
1163 ArgRegClass = &SPU::R64FPRegClass;
1171 ArgRegClass = &SPU::VECREGRegClass;
1175 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1176 RegInfo.addLiveIn(VA.getLocReg(), VReg);
1177 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1180 // We need to load the argument to a virtual register if we determined
1181 // above that we ran out of physical registers of the appropriate type
1182 // or we're forced to do vararg
1183 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1184 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1185 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1187 ArgOffset += StackSlotSize;
1190 InVals.push_back(ArgVal);
1192 Chain = ArgVal.getOperand(0);
1197 // FIXME: we should be able to query the argument registers from
1198 // tablegen generated code.
1199 static const unsigned ArgRegs[] = {
1200 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1201 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1202 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1203 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1204 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1205 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1206 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1207 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1208 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1209 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1210 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1212 // size of ArgRegs array
1213 unsigned NumArgRegs = 77;
1215 // We will spill (79-3)+1 registers to the stack
1216 SmallVector<SDValue, 79-3+1> MemOps;
1218 // Create the frame slot
1219 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1220 FuncInfo->setVarArgsFrameIndex(
1221 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1222 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1223 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1224 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1225 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1227 Chain = Store.getOperand(0);
1228 MemOps.push_back(Store);
1230 // Increment address by stack slot size for the next stored argument
1231 ArgOffset += StackSlotSize;
1233 if (!MemOps.empty())
1234 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1235 &MemOps[0], MemOps.size());
1241 /// isLSAAddress - Return the immediate to use if the specified
1242 /// value is representable as a LSA address.
1243 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1244 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1247 int Addr = C->getZExtValue();
1248 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1249 (Addr << 14 >> 14) != Addr)
1250 return 0; // Top 14 bits have to be sext of immediate.
1252 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1256 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1257 CallingConv::ID CallConv, bool isVarArg,
1259 const SmallVectorImpl<ISD::OutputArg> &Outs,
1260 const SmallVectorImpl<SDValue> &OutVals,
1261 const SmallVectorImpl<ISD::InputArg> &Ins,
1262 DebugLoc dl, SelectionDAG &DAG,
1263 SmallVectorImpl<SDValue> &InVals) const {
1264 // CellSPU target does not yet support tail call optimization.
1267 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1268 unsigned NumOps = Outs.size();
1269 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1271 SmallVector<CCValAssign, 16> ArgLocs;
1272 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1274 // FIXME: allow for other calling conventions
1275 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1277 const unsigned NumArgRegs = ArgLocs.size();
1280 // Handy pointer type
1281 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1283 // Set up a copy of the stack pointer for use loading and storing any
1284 // arguments that may not fit in the registers available for argument
1286 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1288 // Figure out which arguments are going to go in registers, and which in
1290 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1291 unsigned ArgRegIdx = 0;
1293 // Keep track of registers passing arguments
1294 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1295 // And the arguments passed on the stack
1296 SmallVector<SDValue, 8> MemOpChains;
1298 for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1299 SDValue Arg = OutVals[ArgRegIdx];
1300 CCValAssign &VA = ArgLocs[ArgRegIdx];
1302 // PtrOff will be used to store the current argument to the stack if a
1303 // register cannot be found for it.
1304 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1305 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1307 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1308 default: llvm_unreachable("Unexpected ValueType for argument!");
1322 if (ArgRegIdx != NumArgRegs) {
1323 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1325 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1326 MachinePointerInfo(),
1328 ArgOffset += StackSlotSize;
1334 // Accumulate how many bytes are to be pushed on the stack, including the
1335 // linkage area, and parameter passing area. According to the SPU ABI,
1336 // we minimally need space for [LR] and [SP].
1337 unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1339 // Insert a call sequence start
1340 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1343 if (!MemOpChains.empty()) {
1344 // Adjust the stack pointer for the stack arguments.
1345 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1346 &MemOpChains[0], MemOpChains.size());
1349 // Build a sequence of copy-to-reg nodes chained together with token chain
1350 // and flag operands which copy the outgoing args into the appropriate regs.
1352 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1353 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1354 RegsToPass[i].second, InFlag);
1355 InFlag = Chain.getValue(1);
1358 SmallVector<SDValue, 8> Ops;
1359 unsigned CallOpc = SPUISD::CALL;
1361 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1362 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1363 // node so that legalize doesn't hack it.
1364 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1365 const GlobalValue *GV = G->getGlobal();
1366 EVT CalleeVT = Callee.getValueType();
1367 SDValue Zero = DAG.getConstant(0, PtrVT);
1368 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1370 if (!ST->usingLargeMem()) {
1371 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1372 // style calls, otherwise, external symbols are BRASL calls. This assumes
1373 // that declared/defined symbols are in the same compilation unit and can
1374 // be reached through PC-relative jumps.
1377 // This may be an unsafe assumption for JIT and really large compilation
1379 if (GV->isDeclaration()) {
1380 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1382 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1385 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1387 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1389 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1390 EVT CalleeVT = Callee.getValueType();
1391 SDValue Zero = DAG.getConstant(0, PtrVT);
1392 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1393 Callee.getValueType());
1395 if (!ST->usingLargeMem()) {
1396 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1398 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1400 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1401 // If this is an absolute destination address that appears to be a legal
1402 // local store address, use the munged value.
1403 Callee = SDValue(Dest, 0);
1406 Ops.push_back(Chain);
1407 Ops.push_back(Callee);
1409 // Add argument registers to the end of the list so that they are known live
1411 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1412 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1413 RegsToPass[i].second.getValueType()));
1415 if (InFlag.getNode())
1416 Ops.push_back(InFlag);
1417 // Returns a chain and a flag for retval copy to use.
1418 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1419 &Ops[0], Ops.size());
1420 InFlag = Chain.getValue(1);
1422 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1423 DAG.getIntPtrConstant(0, true), InFlag);
1425 InFlag = Chain.getValue(1);
1427 // If the function returns void, just return the chain.
1431 // Now handle the return value(s)
1432 SmallVector<CCValAssign, 16> RVLocs;
1433 CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
1434 RVLocs, *DAG.getContext());
1435 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1438 // If the call has results, copy the values out of the ret val registers.
1439 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1440 CCValAssign VA = RVLocs[i];
1442 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1444 Chain = Val.getValue(1);
1445 InFlag = Val.getValue(2);
1446 InVals.push_back(Val);
1453 SPUTargetLowering::LowerReturn(SDValue Chain,
1454 CallingConv::ID CallConv, bool isVarArg,
1455 const SmallVectorImpl<ISD::OutputArg> &Outs,
1456 const SmallVectorImpl<SDValue> &OutVals,
1457 DebugLoc dl, SelectionDAG &DAG) const {
1459 SmallVector<CCValAssign, 16> RVLocs;
1460 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1461 RVLocs, *DAG.getContext());
1462 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1464 // If this is the first return lowered for this function, add the regs to the
1465 // liveout set for the function.
1466 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1467 for (unsigned i = 0; i != RVLocs.size(); ++i)
1468 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1473 // Copy the result values into the output registers.
1474 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1475 CCValAssign &VA = RVLocs[i];
1476 assert(VA.isRegLoc() && "Can only return in registers!");
1477 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1479 Flag = Chain.getValue(1);
1483 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1485 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1489 //===----------------------------------------------------------------------===//
1490 // Vector related lowering:
1491 //===----------------------------------------------------------------------===//
1493 static ConstantSDNode *
1494 getVecImm(SDNode *N) {
1495 SDValue OpVal(0, 0);
1497 // Check to see if this buildvec has a single non-undef value in its elements.
1498 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1499 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1500 if (OpVal.getNode() == 0)
1501 OpVal = N->getOperand(i);
1502 else if (OpVal != N->getOperand(i))
1506 if (OpVal.getNode() != 0) {
1507 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1515 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1516 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1518 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1520 if (ConstantSDNode *CN = getVecImm(N)) {
1521 uint64_t Value = CN->getZExtValue();
1522 if (ValueType == MVT::i64) {
1523 uint64_t UValue = CN->getZExtValue();
1524 uint32_t upper = uint32_t(UValue >> 32);
1525 uint32_t lower = uint32_t(UValue);
1528 Value = Value >> 32;
1530 if (Value <= 0x3ffff)
1531 return DAG.getTargetConstant(Value, ValueType);
1537 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1538 /// and the value fits into a signed 16-bit constant, and if so, return the
1540 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1542 if (ConstantSDNode *CN = getVecImm(N)) {
1543 int64_t Value = CN->getSExtValue();
1544 if (ValueType == MVT::i64) {
1545 uint64_t UValue = CN->getZExtValue();
1546 uint32_t upper = uint32_t(UValue >> 32);
1547 uint32_t lower = uint32_t(UValue);
1550 Value = Value >> 32;
1552 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1553 return DAG.getTargetConstant(Value, ValueType);
1560 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1561 /// and the value fits into a signed 10-bit constant, and if so, return the
1563 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1565 if (ConstantSDNode *CN = getVecImm(N)) {
1566 int64_t Value = CN->getSExtValue();
1567 if (ValueType == MVT::i64) {
1568 uint64_t UValue = CN->getZExtValue();
1569 uint32_t upper = uint32_t(UValue >> 32);
1570 uint32_t lower = uint32_t(UValue);
1573 Value = Value >> 32;
1575 if (isInt<10>(Value))
1576 return DAG.getTargetConstant(Value, ValueType);
1582 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1583 /// and the value fits into a signed 8-bit constant, and if so, return the
1586 /// @note: The incoming vector is v16i8 because that's the only way we can load
1587 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1589 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1591 if (ConstantSDNode *CN = getVecImm(N)) {
1592 int Value = (int) CN->getZExtValue();
1593 if (ValueType == MVT::i16
1594 && Value <= 0xffff /* truncated from uint64_t */
1595 && ((short) Value >> 8) == ((short) Value & 0xff))
1596 return DAG.getTargetConstant(Value & 0xff, ValueType);
1597 else if (ValueType == MVT::i8
1598 && (Value & 0xff) == Value)
1599 return DAG.getTargetConstant(Value, ValueType);
1605 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1606 /// and the value fits into a signed 16-bit constant, and if so, return the
1608 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1610 if (ConstantSDNode *CN = getVecImm(N)) {
1611 uint64_t Value = CN->getZExtValue();
1612 if ((ValueType == MVT::i32
1613 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1614 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1615 return DAG.getTargetConstant(Value >> 16, ValueType);
1621 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1622 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1623 if (ConstantSDNode *CN = getVecImm(N)) {
1624 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1630 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1631 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1632 if (ConstantSDNode *CN = getVecImm(N)) {
1633 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1639 //! Lower a BUILD_VECTOR instruction creatively:
1641 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1642 EVT VT = Op.getValueType();
1643 EVT EltVT = VT.getVectorElementType();
1644 DebugLoc dl = Op.getDebugLoc();
1645 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1646 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1647 unsigned minSplatBits = EltVT.getSizeInBits();
1649 if (minSplatBits < 16)
1652 APInt APSplatBits, APSplatUndef;
1653 unsigned SplatBitSize;
1656 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1657 HasAnyUndefs, minSplatBits)
1658 || minSplatBits < SplatBitSize)
1659 return SDValue(); // Wasn't a constant vector or splat exceeded min
1661 uint64_t SplatBits = APSplatBits.getZExtValue();
1663 switch (VT.getSimpleVT().SimpleTy) {
1665 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1666 Twine(VT.getEVTString()));
1669 uint32_t Value32 = uint32_t(SplatBits);
1670 assert(SplatBitSize == 32
1671 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1672 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1673 SDValue T = DAG.getConstant(Value32, MVT::i32);
1674 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1675 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1679 uint64_t f64val = uint64_t(SplatBits);
1680 assert(SplatBitSize == 64
1681 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1682 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1683 SDValue T = DAG.getConstant(f64val, MVT::i64);
1684 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1685 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1689 // 8-bit constants have to be expanded to 16-bits
1690 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1691 SmallVector<SDValue, 8> Ops;
1693 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1694 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1695 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1698 unsigned short Value16 = SplatBits;
1699 SDValue T = DAG.getConstant(Value16, EltVT);
1700 SmallVector<SDValue, 8> Ops;
1703 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1706 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1707 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1710 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1720 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1722 uint32_t upper = uint32_t(SplatVal >> 32);
1723 uint32_t lower = uint32_t(SplatVal);
1725 if (upper == lower) {
1726 // Magic constant that can be matched by IL, ILA, et. al.
1727 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1728 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1729 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1730 Val, Val, Val, Val));
1732 bool upper_special, lower_special;
1734 // NOTE: This code creates common-case shuffle masks that can be easily
1735 // detected as common expressions. It is not attempting to create highly
1736 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1738 // Detect if the upper or lower half is a special shuffle mask pattern:
1739 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1740 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1742 // Both upper and lower are special, lower to a constant pool load:
1743 if (lower_special && upper_special) {
1744 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1745 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1746 SplatValCN, SplatValCN);
1751 SmallVector<SDValue, 16> ShufBytes;
1754 // Create lower vector if not a special pattern
1755 if (!lower_special) {
1756 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1757 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1758 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1759 LO32C, LO32C, LO32C, LO32C));
1762 // Create upper vector if not a special pattern
1763 if (!upper_special) {
1764 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1765 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1766 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1767 HI32C, HI32C, HI32C, HI32C));
1770 // If either upper or lower are special, then the two input operands are
1771 // the same (basically, one of them is a "don't care")
1777 for (int i = 0; i < 4; ++i) {
1779 for (int j = 0; j < 4; ++j) {
1781 bool process_upper, process_lower;
1783 process_upper = (upper_special && (i & 1) == 0);
1784 process_lower = (lower_special && (i & 1) == 1);
1786 if (process_upper || process_lower) {
1787 if ((process_upper && upper == 0)
1788 || (process_lower && lower == 0))
1790 else if ((process_upper && upper == 0xffffffff)
1791 || (process_lower && lower == 0xffffffff))
1793 else if ((process_upper && upper == 0x80000000)
1794 || (process_lower && lower == 0x80000000))
1795 val |= (j == 0 ? 0xe0 : 0x80);
1797 val |= i * 4 + j + ((i & 1) * 16);
1800 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1803 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1804 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1805 &ShufBytes[0], ShufBytes.size()));
1809 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1810 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1811 /// permutation vector, V3, is monotonically increasing with one "exception"
1812 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1813 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1814 /// In either case, the net result is going to eventually invoke SHUFB to
1815 /// permute/shuffle the bytes from V1 and V2.
1817 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1818 /// control word for byte/halfword/word insertion. This takes care of a single
1819 /// element move from V2 into V1.
1821 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1822 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1823 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1824 SDValue V1 = Op.getOperand(0);
1825 SDValue V2 = Op.getOperand(1);
1826 DebugLoc dl = Op.getDebugLoc();
1828 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1830 // If we have a single element being moved from V1 to V2, this can be handled
1831 // using the C*[DX] compute mask instructions, but the vector elements have
1832 // to be monotonically increasing with one exception element, and the source
1833 // slot of the element to move must be the same as the destination.
1834 EVT VecVT = V1.getValueType();
1835 EVT EltVT = VecVT.getVectorElementType();
1836 unsigned EltsFromV2 = 0;
1837 unsigned V2EltOffset = 0;
1838 unsigned V2EltIdx0 = 0;
1839 unsigned CurrElt = 0;
1840 unsigned MaxElts = VecVT.getVectorNumElements();
1841 unsigned PrevElt = 0;
1842 bool monotonic = true;
1845 EVT maskVT; // which of the c?d instructions to use
1847 if (EltVT == MVT::i8) {
1849 maskVT = MVT::v16i8;
1850 } else if (EltVT == MVT::i16) {
1852 maskVT = MVT::v8i16;
1853 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1855 maskVT = MVT::v4i32;
1856 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1858 maskVT = MVT::v2i64;
1860 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1862 for (unsigned i = 0; i != MaxElts; ++i) {
1863 if (SVN->getMaskElt(i) < 0)
1866 unsigned SrcElt = SVN->getMaskElt(i);
1869 if (SrcElt >= V2EltIdx0) {
1870 // TODO: optimize for the monotonic case when several consecutive
1871 // elements are taken form V2. Do we ever get such a case?
1872 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1873 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1877 } else if (CurrElt != SrcElt) {
1885 if (PrevElt > 0 && SrcElt < MaxElts) {
1886 if ((PrevElt == SrcElt - 1)
1887 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1892 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1893 // First time or after a "wrap around"
1897 // This isn't a rotation, takes elements from vector 2
1903 if (EltsFromV2 == 1 && monotonic) {
1904 // Compute mask and shuffle
1905 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1907 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1908 // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1909 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1910 DAG.getRegister(SPU::R1, PtrVT),
1911 DAG.getConstant(V2EltOffset, MVT::i32));
1912 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1915 // Use shuffle mask in SHUFB synthetic instruction:
1916 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1918 } else if (rotate) {
1921 rotamt *= EltVT.getSizeInBits()/8;
1922 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1923 V1, DAG.getConstant(rotamt, MVT::i16));
1925 // Convert the SHUFFLE_VECTOR mask's input element units to the
1927 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1929 SmallVector<SDValue, 16> ResultMask;
1930 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1931 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1933 for (unsigned j = 0; j < BytesPerElement; ++j)
1934 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1936 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1937 &ResultMask[0], ResultMask.size());
1938 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1942 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1943 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1944 DebugLoc dl = Op.getDebugLoc();
1946 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1947 // For a constant, build the appropriate constant vector, which will
1948 // eventually simplify to a vector register load.
1950 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1951 SmallVector<SDValue, 16> ConstVecValues;
1955 // Create a constant vector:
1956 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1957 default: llvm_unreachable("Unexpected constant value type in "
1958 "LowerSCALAR_TO_VECTOR");
1959 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1960 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1961 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1962 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1963 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1964 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1967 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1968 for (size_t j = 0; j < n_copies; ++j)
1969 ConstVecValues.push_back(CValue);
1971 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1972 &ConstVecValues[0], ConstVecValues.size());
1974 // Otherwise, copy the value from one register to another:
1975 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1976 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1983 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1990 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1991 EVT VT = Op.getValueType();
1992 SDValue N = Op.getOperand(0);
1993 SDValue Elt = Op.getOperand(1);
1994 DebugLoc dl = Op.getDebugLoc();
1997 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1998 // Constant argument:
1999 int EltNo = (int) C->getZExtValue();
2002 if (VT == MVT::i8 && EltNo >= 16)
2003 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2004 else if (VT == MVT::i16 && EltNo >= 8)
2005 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2006 else if (VT == MVT::i32 && EltNo >= 4)
2007 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2008 else if (VT == MVT::i64 && EltNo >= 2)
2009 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2011 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2012 // i32 and i64: Element 0 is the preferred slot
2013 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2016 // Need to generate shuffle mask and extract:
2017 int prefslot_begin = -1, prefslot_end = -1;
2018 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2020 switch (VT.getSimpleVT().SimpleTy) {
2022 assert(false && "Invalid value type!");
2024 prefslot_begin = prefslot_end = 3;
2028 prefslot_begin = 2; prefslot_end = 3;
2033 prefslot_begin = 0; prefslot_end = 3;
2038 prefslot_begin = 0; prefslot_end = 7;
2043 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2044 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2046 unsigned int ShufBytes[16] = {
2047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2049 for (int i = 0; i < 16; ++i) {
2050 // zero fill uppper part of preferred slot, don't care about the
2052 unsigned int mask_val;
2053 if (i <= prefslot_end) {
2055 ((i < prefslot_begin)
2057 : elt_byte + (i - prefslot_begin));
2059 ShufBytes[i] = mask_val;
2061 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2064 SDValue ShufMask[4];
2065 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2066 unsigned bidx = i * 4;
2067 unsigned int bits = ((ShufBytes[bidx] << 24) |
2068 (ShufBytes[bidx+1] << 16) |
2069 (ShufBytes[bidx+2] << 8) |
2071 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2074 SDValue ShufMaskVec =
2075 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2076 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2078 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2079 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2080 N, N, ShufMaskVec));
2082 // Variable index: Rotate the requested element into slot 0, then replicate
2083 // slot 0 across the vector
2084 EVT VecVT = N.getValueType();
2085 if (!VecVT.isSimple() || !VecVT.isVector()) {
2086 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2090 // Make life easier by making sure the index is zero-extended to i32
2091 if (Elt.getValueType() != MVT::i32)
2092 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2094 // Scale the index to a bit/byte shift quantity
2096 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2097 unsigned scaleShift = scaleFactor.logBase2();
2100 if (scaleShift > 0) {
2101 // Scale the shift factor:
2102 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2103 DAG.getConstant(scaleShift, MVT::i32));
2106 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2108 // Replicate the bytes starting at byte 0 across the entire vector (for
2109 // consistency with the notion of a unified register set)
2112 switch (VT.getSimpleVT().SimpleTy) {
2114 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2118 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2119 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2120 factor, factor, factor, factor);
2124 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2125 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2126 factor, factor, factor, factor);
2131 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2132 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2133 factor, factor, factor, factor);
2138 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2139 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2140 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2141 loFactor, hiFactor, loFactor, hiFactor);
2146 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2147 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2148 vecShift, vecShift, replicate));
2154 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2155 SDValue VecOp = Op.getOperand(0);
2156 SDValue ValOp = Op.getOperand(1);
2157 SDValue IdxOp = Op.getOperand(2);
2158 DebugLoc dl = Op.getDebugLoc();
2159 EVT VT = Op.getValueType();
2160 EVT eltVT = ValOp.getValueType();
2162 // use 0 when the lane to insert to is 'undef'
2164 if (IdxOp.getOpcode() != ISD::UNDEF) {
2165 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2166 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2167 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2170 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2171 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2172 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2173 DAG.getRegister(SPU::R1, PtrVT),
2174 DAG.getConstant(Offset, PtrVT));
2175 // widen the mask when dealing with half vectors
2176 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2177 128/ VT.getVectorElementType().getSizeInBits());
2178 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2181 DAG.getNode(SPUISD::SHUFB, dl, VT,
2182 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2184 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2189 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2190 const TargetLowering &TLI)
2192 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2193 DebugLoc dl = Op.getDebugLoc();
2194 EVT ShiftVT = TLI.getShiftAmountTy();
2196 assert(Op.getValueType() == MVT::i8);
2199 llvm_unreachable("Unhandled i8 math operator");
2203 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2205 SDValue N1 = Op.getOperand(1);
2206 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2207 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2208 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2209 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2214 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2216 SDValue N1 = Op.getOperand(1);
2217 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2218 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2219 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2220 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2224 SDValue N1 = Op.getOperand(1);
2225 EVT N1VT = N1.getValueType();
2227 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2228 if (!N1VT.bitsEq(ShiftVT)) {
2229 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2232 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2235 // Replicate lower 8-bits into upper 8:
2237 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2238 DAG.getNode(ISD::SHL, dl, MVT::i16,
2239 N0, DAG.getConstant(8, MVT::i32)));
2241 // Truncate back down to i8
2242 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2243 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2247 SDValue N1 = Op.getOperand(1);
2248 EVT N1VT = N1.getValueType();
2250 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2251 if (!N1VT.bitsEq(ShiftVT)) {
2252 unsigned N1Opc = ISD::ZERO_EXTEND;
2254 if (N1.getValueType().bitsGT(ShiftVT))
2255 N1Opc = ISD::TRUNCATE;
2257 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2260 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2261 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2264 SDValue N1 = Op.getOperand(1);
2265 EVT N1VT = N1.getValueType();
2267 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2268 if (!N1VT.bitsEq(ShiftVT)) {
2269 unsigned N1Opc = ISD::SIGN_EXTEND;
2271 if (N1VT.bitsGT(ShiftVT))
2272 N1Opc = ISD::TRUNCATE;
2273 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2276 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2277 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2280 SDValue N1 = Op.getOperand(1);
2282 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2283 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2284 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2285 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2293 //! Lower byte immediate operations for v16i8 vectors:
2295 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2298 EVT VT = Op.getValueType();
2299 DebugLoc dl = Op.getDebugLoc();
2301 ConstVec = Op.getOperand(0);
2302 Arg = Op.getOperand(1);
2303 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2304 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2305 ConstVec = ConstVec.getOperand(0);
2307 ConstVec = Op.getOperand(1);
2308 Arg = Op.getOperand(0);
2309 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2310 ConstVec = ConstVec.getOperand(0);
2315 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2316 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2317 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2319 APInt APSplatBits, APSplatUndef;
2320 unsigned SplatBitSize;
2322 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2324 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2325 HasAnyUndefs, minSplatBits)
2326 && minSplatBits <= SplatBitSize) {
2327 uint64_t SplatBits = APSplatBits.getZExtValue();
2328 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2330 SmallVector<SDValue, 16> tcVec;
2331 tcVec.assign(16, tc);
2332 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2333 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2337 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2338 // lowered. Return the operation, rather than a null SDValue.
2342 //! Custom lowering for CTPOP (count population)
2344 Custom lowering code that counts the number ones in the input
2345 operand. SPU has such an instruction, but it counts the number of
2346 ones per byte, which then have to be accumulated.
2348 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2349 EVT VT = Op.getValueType();
2350 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2351 VT, (128 / VT.getSizeInBits()));
2352 DebugLoc dl = Op.getDebugLoc();
2354 switch (VT.getSimpleVT().SimpleTy) {
2356 assert(false && "Invalid value type!");
2358 SDValue N = Op.getOperand(0);
2359 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2361 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2362 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2364 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2368 MachineFunction &MF = DAG.getMachineFunction();
2369 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2371 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2373 SDValue N = Op.getOperand(0);
2374 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2375 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2376 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2378 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2379 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2381 // CNTB_result becomes the chain to which all of the virtual registers
2382 // CNTB_reg, SUM1_reg become associated:
2383 SDValue CNTB_result =
2384 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2386 SDValue CNTB_rescopy =
2387 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2389 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2391 return DAG.getNode(ISD::AND, dl, MVT::i16,
2392 DAG.getNode(ISD::ADD, dl, MVT::i16,
2393 DAG.getNode(ISD::SRL, dl, MVT::i16,
2400 MachineFunction &MF = DAG.getMachineFunction();
2401 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2403 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2404 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406 SDValue N = Op.getOperand(0);
2407 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2408 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2409 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2410 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2412 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2413 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2415 // CNTB_result becomes the chain to which all of the virtual registers
2416 // CNTB_reg, SUM1_reg become associated:
2417 SDValue CNTB_result =
2418 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2420 SDValue CNTB_rescopy =
2421 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2424 DAG.getNode(ISD::SRL, dl, MVT::i32,
2425 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2429 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2430 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2432 SDValue Sum1_rescopy =
2433 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2436 DAG.getNode(ISD::SRL, dl, MVT::i32,
2437 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2440 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2441 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2443 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2453 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2455 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2456 All conversions to i64 are expanded to a libcall.
2458 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2459 const SPUTargetLowering &TLI) {
2460 EVT OpVT = Op.getValueType();
2461 SDValue Op0 = Op.getOperand(0);
2462 EVT Op0VT = Op0.getValueType();
2464 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2465 || OpVT == MVT::i64) {
2466 // Convert f32 / f64 to i32 / i64 via libcall.
2468 (Op.getOpcode() == ISD::FP_TO_SINT)
2469 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2470 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2471 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2473 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2479 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2481 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2482 All conversions from i64 are expanded to a libcall.
2484 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2485 const SPUTargetLowering &TLI) {
2486 EVT OpVT = Op.getValueType();
2487 SDValue Op0 = Op.getOperand(0);
2488 EVT Op0VT = Op0.getValueType();
2490 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2491 || Op0VT == MVT::i64) {
2492 // Convert i32, i64 to f64 via libcall:
2494 (Op.getOpcode() == ISD::SINT_TO_FP)
2495 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2496 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2497 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2499 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2505 //! Lower ISD::SETCC
2507 This handles MVT::f64 (double floating point) condition lowering
2509 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2510 const TargetLowering &TLI) {
2511 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2512 DebugLoc dl = Op.getDebugLoc();
2513 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2515 SDValue lhs = Op.getOperand(0);
2516 SDValue rhs = Op.getOperand(1);
2517 EVT lhsVT = lhs.getValueType();
2518 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2520 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2521 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2522 EVT IntVT(MVT::i64);
2524 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2525 // selected to a NOP:
2526 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2528 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2529 DAG.getNode(ISD::SRL, dl, IntVT,
2530 i64lhs, DAG.getConstant(32, MVT::i32)));
2531 SDValue lhsHi32abs =
2532 DAG.getNode(ISD::AND, dl, MVT::i32,
2533 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2535 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2537 // SETO and SETUO only use the lhs operand:
2538 if (CC->get() == ISD::SETO) {
2539 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2541 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2542 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2543 DAG.getSetCC(dl, ccResultVT,
2544 lhs, DAG.getConstantFP(0.0, lhsVT),
2546 DAG.getConstant(ccResultAllOnes, ccResultVT));
2547 } else if (CC->get() == ISD::SETUO) {
2548 // Evaluates to true if Op0 is [SQ]NaN
2549 return DAG.getNode(ISD::AND, dl, ccResultVT,
2550 DAG.getSetCC(dl, ccResultVT,
2552 DAG.getConstant(0x7ff00000, MVT::i32),
2554 DAG.getSetCC(dl, ccResultVT,
2556 DAG.getConstant(0, MVT::i32),
2560 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2562 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2563 DAG.getNode(ISD::SRL, dl, IntVT,
2564 i64rhs, DAG.getConstant(32, MVT::i32)));
2566 // If a value is negative, subtract from the sign magnitude constant:
2567 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2569 // Convert the sign-magnitude representation into 2's complement:
2570 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2571 lhsHi32, DAG.getConstant(31, MVT::i32));
2572 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2574 DAG.getNode(ISD::SELECT, dl, IntVT,
2575 lhsSelectMask, lhsSignMag2TC, i64lhs);
2577 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2578 rhsHi32, DAG.getConstant(31, MVT::i32));
2579 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2581 DAG.getNode(ISD::SELECT, dl, IntVT,
2582 rhsSelectMask, rhsSignMag2TC, i64rhs);
2586 switch (CC->get()) {
2589 compareOp = ISD::SETEQ; break;
2592 compareOp = ISD::SETGT; break;
2595 compareOp = ISD::SETGE; break;
2598 compareOp = ISD::SETLT; break;
2601 compareOp = ISD::SETLE; break;
2604 compareOp = ISD::SETNE; break;
2606 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2610 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2611 (ISD::CondCode) compareOp);
2613 if ((CC->get() & 0x8) == 0) {
2614 // Ordered comparison:
2615 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2616 lhs, DAG.getConstantFP(0.0, MVT::f64),
2618 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2619 rhs, DAG.getConstantFP(0.0, MVT::f64),
2621 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2623 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2629 //! Lower ISD::SELECT_CC
2631 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2634 \note Need to revisit this in the future: if the code path through the true
2635 and false value computations is longer than the latency of a branch (6
2636 cycles), then it would be more advantageous to branch and insert a new basic
2637 block and branch on the condition. However, this code does not make that
2638 assumption, given the simplisitc uses so far.
2641 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2642 const TargetLowering &TLI) {
2643 EVT VT = Op.getValueType();
2644 SDValue lhs = Op.getOperand(0);
2645 SDValue rhs = Op.getOperand(1);
2646 SDValue trueval = Op.getOperand(2);
2647 SDValue falseval = Op.getOperand(3);
2648 SDValue condition = Op.getOperand(4);
2649 DebugLoc dl = Op.getDebugLoc();
2651 // NOTE: SELB's arguments: $rA, $rB, $mask
2653 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2654 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2655 // condition was true and 0s where the condition was false. Hence, the
2656 // arguments to SELB get reversed.
2658 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2659 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2660 // with another "cannot select select_cc" assert:
2662 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2663 TLI.getSetCCResultType(Op.getValueType()),
2664 lhs, rhs, condition);
2665 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2668 //! Custom lower ISD::TRUNCATE
2669 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2671 // Type to truncate to
2672 EVT VT = Op.getValueType();
2673 MVT simpleVT = VT.getSimpleVT();
2674 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2675 VT, (128 / VT.getSizeInBits()));
2676 DebugLoc dl = Op.getDebugLoc();
2678 // Type to truncate from
2679 SDValue Op0 = Op.getOperand(0);
2680 EVT Op0VT = Op0.getValueType();
2682 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2683 // Create shuffle mask, least significant doubleword of quadword
2684 unsigned maskHigh = 0x08090a0b;
2685 unsigned maskLow = 0x0c0d0e0f;
2686 // Use a shuffle to perform the truncation
2687 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2688 DAG.getConstant(maskHigh, MVT::i32),
2689 DAG.getConstant(maskLow, MVT::i32),
2690 DAG.getConstant(maskHigh, MVT::i32),
2691 DAG.getConstant(maskLow, MVT::i32));
2693 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2694 Op0, Op0, shufMask);
2696 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2699 return SDValue(); // Leave the truncate unmolested
2703 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2704 * algorithm is to duplicate the sign bit using rotmai to generate at
2705 * least one byte full of sign bits. Then propagate the "sign-byte" into
2706 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2708 * @param Op The sext operand
2709 * @param DAG The current DAG
2710 * @return The SDValue with the entire instruction sequence
2712 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2714 DebugLoc dl = Op.getDebugLoc();
2716 // Type to extend to
2717 MVT OpVT = Op.getValueType().getSimpleVT();
2719 // Type to extend from
2720 SDValue Op0 = Op.getOperand(0);
2721 MVT Op0VT = Op0.getValueType().getSimpleVT();
2723 // The type to extend to needs to be a i128 and
2724 // the type to extend from needs to be i64 or i32.
2725 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2726 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2728 // Create shuffle mask
2729 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2730 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2731 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2732 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2733 DAG.getConstant(mask1, MVT::i32),
2734 DAG.getConstant(mask1, MVT::i32),
2735 DAG.getConstant(mask2, MVT::i32),
2736 DAG.getConstant(mask3, MVT::i32));
2738 // Word wise arithmetic right shift to generate at least one byte
2739 // that contains sign bits.
2740 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2741 SDValue sraVal = DAG.getNode(ISD::SRA,
2744 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2745 DAG.getConstant(31, MVT::i32));
2747 // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2748 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2750 DAG.getTargetConstant(
2751 SPU::GPRCRegClass.getID(),
2753 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2754 // and the input value into the lower 64 bits.
2755 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2756 extended, sraVal, shufMask);
2757 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2760 //! Custom (target-specific) lowering entry point
2762 This is where LLVM's DAG selection process calls to do target-specific
2766 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2768 unsigned Opc = (unsigned) Op.getOpcode();
2769 EVT VT = Op.getValueType();
2774 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2775 errs() << "Op.getOpcode() = " << Opc << "\n";
2776 errs() << "*Op.getNode():\n";
2777 Op.getNode()->dump();
2779 llvm_unreachable(0);
2785 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2787 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2788 case ISD::ConstantPool:
2789 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2790 case ISD::GlobalAddress:
2791 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2792 case ISD::JumpTable:
2793 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2794 case ISD::ConstantFP:
2795 return LowerConstantFP(Op, DAG);
2797 // i8, i64 math ops:
2806 return LowerI8Math(Op, DAG, Opc, *this);
2810 case ISD::FP_TO_SINT:
2811 case ISD::FP_TO_UINT:
2812 return LowerFP_TO_INT(Op, DAG, *this);
2814 case ISD::SINT_TO_FP:
2815 case ISD::UINT_TO_FP:
2816 return LowerINT_TO_FP(Op, DAG, *this);
2818 // Vector-related lowering.
2819 case ISD::BUILD_VECTOR:
2820 return LowerBUILD_VECTOR(Op, DAG);
2821 case ISD::SCALAR_TO_VECTOR:
2822 return LowerSCALAR_TO_VECTOR(Op, DAG);
2823 case ISD::VECTOR_SHUFFLE:
2824 return LowerVECTOR_SHUFFLE(Op, DAG);
2825 case ISD::EXTRACT_VECTOR_ELT:
2826 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2827 case ISD::INSERT_VECTOR_ELT:
2828 return LowerINSERT_VECTOR_ELT(Op, DAG);
2830 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2834 return LowerByteImmed(Op, DAG);
2836 // Vector and i8 multiply:
2839 return LowerI8Math(Op, DAG, Opc, *this);
2842 return LowerCTPOP(Op, DAG);
2844 case ISD::SELECT_CC:
2845 return LowerSELECT_CC(Op, DAG, *this);
2848 return LowerSETCC(Op, DAG, *this);
2851 return LowerTRUNCATE(Op, DAG);
2853 case ISD::SIGN_EXTEND:
2854 return LowerSIGN_EXTEND(Op, DAG);
2860 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2861 SmallVectorImpl<SDValue>&Results,
2862 SelectionDAG &DAG) const
2865 unsigned Opc = (unsigned) N->getOpcode();
2866 EVT OpVT = N->getValueType(0);
2870 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2871 errs() << "Op.getOpcode() = " << Opc << "\n";
2872 errs() << "*Op.getNode():\n";
2880 /* Otherwise, return unchanged */
2883 //===----------------------------------------------------------------------===//
2884 // Target Optimization Hooks
2885 //===----------------------------------------------------------------------===//
2888 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2891 TargetMachine &TM = getTargetMachine();
2893 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2894 SelectionDAG &DAG = DCI.DAG;
2895 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2896 EVT NodeVT = N->getValueType(0); // The node's value type
2897 EVT Op0VT = Op0.getValueType(); // The first operand's result
2898 SDValue Result; // Initially, empty result
2899 DebugLoc dl = N->getDebugLoc();
2901 switch (N->getOpcode()) {
2904 SDValue Op1 = N->getOperand(1);
2906 if (Op0.getOpcode() == SPUISD::IndirectAddr
2907 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2908 // Normalize the operands to reduce repeated code
2909 SDValue IndirectArg = Op0, AddArg = Op1;
2911 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2916 if (isa<ConstantSDNode>(AddArg)) {
2917 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2918 SDValue IndOp1 = IndirectArg.getOperand(1);
2920 if (CN0->isNullValue()) {
2921 // (add (SPUindirect <arg>, <arg>), 0) ->
2922 // (SPUindirect <arg>, <arg>)
2924 #if !defined(NDEBUG)
2925 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2927 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2928 << "With: (SPUindirect <arg>, <arg>)\n";
2933 } else if (isa<ConstantSDNode>(IndOp1)) {
2934 // (add (SPUindirect <arg>, <const>), <const>) ->
2935 // (SPUindirect <arg>, <const + const>)
2936 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2937 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2938 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2940 #if !defined(NDEBUG)
2941 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2943 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2944 << "), " << CN0->getSExtValue() << ")\n"
2945 << "With: (SPUindirect <arg>, "
2946 << combinedConst << ")\n";
2950 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2951 IndirectArg, combinedValue);
2957 case ISD::SIGN_EXTEND:
2958 case ISD::ZERO_EXTEND:
2959 case ISD::ANY_EXTEND: {
2960 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2961 // (any_extend (SPUextract_elt0 <arg>)) ->
2962 // (SPUextract_elt0 <arg>)
2963 // Types must match, however...
2964 #if !defined(NDEBUG)
2965 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2966 errs() << "\nReplace: ";
2968 errs() << "\nWith: ";
2969 Op0.getNode()->dump(&DAG);
2978 case SPUISD::IndirectAddr: {
2979 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2980 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2981 if (CN != 0 && CN->isNullValue()) {
2982 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2983 // (SPUaform <addr>, 0)
2985 DEBUG(errs() << "Replace: ");
2986 DEBUG(N->dump(&DAG));
2987 DEBUG(errs() << "\nWith: ");
2988 DEBUG(Op0.getNode()->dump(&DAG));
2989 DEBUG(errs() << "\n");
2993 } else if (Op0.getOpcode() == ISD::ADD) {
2994 SDValue Op1 = N->getOperand(1);
2995 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2996 // (SPUindirect (add <arg>, <arg>), 0) ->
2997 // (SPUindirect <arg>, <arg>)
2998 if (CN1->isNullValue()) {
3000 #if !defined(NDEBUG)
3001 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3003 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3004 << "With: (SPUindirect <arg>, <arg>)\n";
3008 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3009 Op0.getOperand(0), Op0.getOperand(1));
3015 case SPUISD::SHL_BITS:
3016 case SPUISD::SHL_BYTES:
3017 case SPUISD::ROTBYTES_LEFT: {
3018 SDValue Op1 = N->getOperand(1);
3020 // Kill degenerate vector shifts:
3021 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3022 if (CN->isNullValue()) {
3028 case SPUISD::PREFSLOT2VEC: {
3029 switch (Op0.getOpcode()) {
3032 case ISD::ANY_EXTEND:
3033 case ISD::ZERO_EXTEND:
3034 case ISD::SIGN_EXTEND: {
3035 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3037 // but only if the SPUprefslot2vec and <arg> types match.
3038 SDValue Op00 = Op0.getOperand(0);
3039 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3040 SDValue Op000 = Op00.getOperand(0);
3041 if (Op000.getValueType() == NodeVT) {
3047 case SPUISD::VEC2PREFSLOT: {
3048 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3050 Result = Op0.getOperand(0);
3058 // Otherwise, return unchanged.
3060 if (Result.getNode()) {
3061 DEBUG(errs() << "\nReplace.SPU: ");
3062 DEBUG(N->dump(&DAG));
3063 DEBUG(errs() << "\nWith: ");
3064 DEBUG(Result.getNode()->dump(&DAG));
3065 DEBUG(errs() << "\n");
3072 //===----------------------------------------------------------------------===//
3073 // Inline Assembly Support
3074 //===----------------------------------------------------------------------===//
3076 /// getConstraintType - Given a constraint letter, return the type of
3077 /// constraint it is for this target.
3078 SPUTargetLowering::ConstraintType
3079 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3080 if (ConstraintLetter.size() == 1) {
3081 switch (ConstraintLetter[0]) {
3088 return C_RegisterClass;
3091 return TargetLowering::getConstraintType(ConstraintLetter);
3094 /// Examine constraint type and operand type and determine a weight value.
3095 /// This object must already have been set up with the operand type
3096 /// and the current alternative constraint selected.
3097 TargetLowering::ConstraintWeight
3098 SPUTargetLowering::getSingleConstraintMatchWeight(
3099 AsmOperandInfo &info, const char *constraint) const {
3100 ConstraintWeight weight = CW_Invalid;
3101 Value *CallOperandVal = info.CallOperandVal;
3102 // If we don't have a value, we can't do a match,
3103 // but allow it at the lowest weight.
3104 if (CallOperandVal == NULL)
3106 // Look at the constraint type.
3107 switch (*constraint) {
3109 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3111 //FIXME: Seems like the supported constraint letters were just copied
3112 // from PPC, as the following doesn't correspond to the GCC docs.
3113 // I'm leaving it so until someone adds the corresponding lowering support.
3120 weight = CW_Register;
3126 std::pair<unsigned, const TargetRegisterClass*>
3127 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3130 if (Constraint.size() == 1) {
3131 // GCC RS6000 Constraint Letters
3132 switch (Constraint[0]) {
3136 return std::make_pair(0U, SPU::R64CRegisterClass);
3137 return std::make_pair(0U, SPU::R32CRegisterClass);
3140 return std::make_pair(0U, SPU::R32FPRegisterClass);
3141 else if (VT == MVT::f64)
3142 return std::make_pair(0U, SPU::R64FPRegisterClass);
3145 return std::make_pair(0U, SPU::GPRCRegisterClass);
3149 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3152 //! Compute used/known bits for a SPU operand
3154 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3158 const SelectionDAG &DAG,
3159 unsigned Depth ) const {
3161 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3163 switch (Op.getOpcode()) {
3165 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3171 case SPUISD::PREFSLOT2VEC:
3172 case SPUISD::LDRESULT:
3173 case SPUISD::VEC2PREFSLOT:
3174 case SPUISD::SHLQUAD_L_BITS:
3175 case SPUISD::SHLQUAD_L_BYTES:
3176 case SPUISD::VEC_ROTL:
3177 case SPUISD::VEC_ROTR:
3178 case SPUISD::ROTBYTES_LEFT:
3179 case SPUISD::SELECT_MASK:
3186 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3187 unsigned Depth) const {
3188 switch (Op.getOpcode()) {
3193 EVT VT = Op.getValueType();
3195 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3198 return VT.getSizeInBits();
3203 // LowerAsmOperandForConstraint
3205 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3206 char ConstraintLetter,
3207 std::vector<SDValue> &Ops,
3208 SelectionDAG &DAG) const {
3209 // Default, for the time being, to the base class handler
3210 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3213 /// isLegalAddressImmediate - Return true if the integer value can be used
3214 /// as the offset of the target addressing mode.
3215 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3216 const Type *Ty) const {
3217 // SPU's addresses are 256K:
3218 return (V > -(1 << 18) && V < (1 << 18) - 1);
3221 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3226 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3227 // The SPU target isn't yet aware of offsets.
3231 // can we compare to Imm without writing it into a register?
3232 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3233 //ceqi, cgti, etc. all take s10 operand
3234 return isInt<10>(Imm);
3238 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3239 const Type * ) const{
3241 // A-form: 18bit absolute address.
3242 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3245 // D-form: reg + 14bit offset
3246 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3250 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)