1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation and is distributed under the
7 // University of Illinois Open Source License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 // This file implements the SPUTargetLowering class.
13 //===----------------------------------------------------------------------===//
15 #include "SPURegisterNames.h"
16 #include "SPUISelLowering.h"
17 #include "SPUTargetMachine.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
108 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
112 // Fold away setcc operations if possible.
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
123 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
124 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
125 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
126 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
127 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
129 // SPU has no sign or zero extended loads for i1, i8, i16:
130 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
131 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
132 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
133 setStoreXAction(MVT::i1, Custom);
135 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
136 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
138 setStoreXAction(MVT::i8, Custom);
140 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
141 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
144 // SPU constant load actions are custom lowered:
145 setOperationAction(ISD::Constant, MVT::i64, Custom);
146 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
149 // SPU's loads and stores have to be custom lowered:
150 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
152 setOperationAction(ISD::LOAD, sctype, Custom);
153 setOperationAction(ISD::STORE, sctype, Custom);
156 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
157 // into BR_CCs. BR_CC instructions are custom selected in
159 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
161 // Expand the jumptable branches
162 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
163 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
164 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
166 // SPU has no intrinsics for these particular operations:
167 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
168 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
169 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
171 // PowerPC has no SREM/UREM instructions
172 setOperationAction(ISD::SREM, MVT::i32, Expand);
173 setOperationAction(ISD::UREM, MVT::i32, Expand);
174 setOperationAction(ISD::SREM, MVT::i64, Expand);
175 setOperationAction(ISD::UREM, MVT::i64, Expand);
177 // We don't support sin/cos/sqrt/fmod
178 setOperationAction(ISD::FSIN , MVT::f64, Expand);
179 setOperationAction(ISD::FCOS , MVT::f64, Expand);
180 setOperationAction(ISD::FREM , MVT::f64, Expand);
181 setOperationAction(ISD::FSIN , MVT::f32, Expand);
182 setOperationAction(ISD::FCOS , MVT::f32, Expand);
183 setOperationAction(ISD::FREM , MVT::f32, Expand);
185 // If we're enabling GP optimizations, use hardware square root
186 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
187 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
189 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
192 // SPU can do rotate right and left, so legalize it... but customize for i8
193 // because instructions don't exist.
194 setOperationAction(ISD::ROTR, MVT::i32, Legal);
195 setOperationAction(ISD::ROTR, MVT::i16, Legal);
196 setOperationAction(ISD::ROTR, MVT::i8, Custom);
197 setOperationAction(ISD::ROTL, MVT::i32, Legal);
198 setOperationAction(ISD::ROTL, MVT::i16, Legal);
199 setOperationAction(ISD::ROTL, MVT::i8, Custom);
200 // SPU has no native version of shift left/right for i8
201 setOperationAction(ISD::SHL, MVT::i8, Custom);
202 setOperationAction(ISD::SRL, MVT::i8, Custom);
203 setOperationAction(ISD::SRA, MVT::i8, Custom);
205 // Custom lower i32 multiplications
206 setOperationAction(ISD::MUL, MVT::i32, Custom);
208 // Need to custom handle (some) common i8 math ops
209 setOperationAction(ISD::SUB, MVT::i8, Custom);
210 setOperationAction(ISD::MUL, MVT::i8, Custom);
212 // SPU does not have BSWAP. It does have i32 support CTLZ.
213 // CTPOP has to be custom lowered.
214 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
215 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
217 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
218 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
222 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
223 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
225 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
227 // SPU does not have select or setcc
228 setOperationAction(ISD::SELECT, MVT::i1, Expand);
229 setOperationAction(ISD::SELECT, MVT::i8, Expand);
230 setOperationAction(ISD::SELECT, MVT::i16, Expand);
231 setOperationAction(ISD::SELECT, MVT::i32, Expand);
232 setOperationAction(ISD::SELECT, MVT::i64, Expand);
233 setOperationAction(ISD::SELECT, MVT::f32, Expand);
234 setOperationAction(ISD::SELECT, MVT::f64, Expand);
236 setOperationAction(ISD::SETCC, MVT::i1, Expand);
237 setOperationAction(ISD::SETCC, MVT::i8, Expand);
238 setOperationAction(ISD::SETCC, MVT::i16, Expand);
239 setOperationAction(ISD::SETCC, MVT::i32, Expand);
240 setOperationAction(ISD::SETCC, MVT::i64, Expand);
241 setOperationAction(ISD::SETCC, MVT::f32, Expand);
242 setOperationAction(ISD::SETCC, MVT::f64, Expand);
244 // SPU has a legal FP -> signed INT instruction
245 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
246 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
247 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
250 // FDIV on SPU requires custom lowering
251 setOperationAction(ISD::FDIV, MVT::f32, Custom);
252 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
254 // SPU has [U|S]INT_TO_FP
255 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
256 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
258 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
261 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
266 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
269 // We cannot sextinreg(i1). Expand to shifts.
270 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
272 // Support label based line numbers.
273 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
274 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
276 // We want to legalize GlobalAddress and ConstantPool nodes into the
277 // appropriate instructions to materialize the address.
278 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
279 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
281 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
282 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
283 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
285 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
287 // RET must be custom lowered, to meet ABI requirements
288 setOperationAction(ISD::RET, MVT::Other, Custom);
290 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
291 setOperationAction(ISD::VASTART , MVT::Other, Custom);
293 // Use the default implementation.
294 setOperationAction(ISD::VAARG , MVT::Other, Expand);
295 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
296 setOperationAction(ISD::VAEND , MVT::Other, Expand);
297 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
298 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
299 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
302 // Cell SPU has instructions for converting between i64 and fp.
303 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
307 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
309 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
310 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
312 // First set operation action for all vector types to expand. Then we
313 // will selectively turn on ones that can be effectively codegen'd.
314 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
315 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
321 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
322 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
323 // add/sub are legal for all supported vector VT's.
324 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
325 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
326 // mul has to be custom lowered.
327 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
329 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
330 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
336 // These operations need to be expanded:
337 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
338 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
343 // Custom lower build_vector, constant pool spills, insert and
344 // extract vector elements:
345 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
354 setOperationAction(ISD::AND, MVT::v16i8, Custom);
355 setOperationAction(ISD::OR, MVT::v16i8, Custom);
356 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
357 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
359 setSetCCResultType(MVT::i32);
360 setShiftAmountType(MVT::i32);
361 setSetCCResultContents(ZeroOrOneSetCCResult);
363 setStackPointerRegisterToSaveRestore(SPU::R1);
365 // We have target-specific dag combine patterns for the following nodes:
366 // e.g., setTargetDAGCombine(ISD::SUB);
368 computeRegisterProperties();
372 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
374 if (node_names.empty()) {
375 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
376 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
377 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
378 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
379 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
380 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
381 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
382 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
383 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
384 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
385 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
386 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
387 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
389 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
393 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
394 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
395 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
396 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
397 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
398 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
399 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
400 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
401 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
402 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
403 "SPUISD::ROTBYTES_RIGHT_Z";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
405 "SPUISD::ROTBYTES_RIGHT_S";
406 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
408 "SPUISD::ROTBYTES_LEFT_CHAINED";
409 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
410 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
411 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
412 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
413 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
414 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
419 return ((i != node_names.end()) ? i->second : 0);
422 //===----------------------------------------------------------------------===//
423 // Calling convention code:
424 //===----------------------------------------------------------------------===//
426 #include "SPUGenCallingConv.inc"
428 //===----------------------------------------------------------------------===//
429 // LowerOperation implementation
430 //===----------------------------------------------------------------------===//
432 /// Custom lower loads for CellSPU
434 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
435 within a 16-byte block, we have to rotate to extract the requested element.
438 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
439 LoadSDNode *LN = cast<LoadSDNode>(Op);
440 SDOperand basep = LN->getBasePtr();
441 SDOperand the_chain = LN->getChain();
442 MVT::ValueType VT = LN->getLoadedVT();
443 MVT::ValueType OpVT = Op.Val->getValueType(0);
444 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
445 ISD::LoadExtType ExtType = LN->getExtensionType();
446 unsigned alignment = LN->getAlignment();
447 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
450 // For an extending load of an i1 variable, just call it i8 (or whatever we
451 // were passed) and make it zero-extended:
454 ExtType = ISD::ZEXTLOAD;
457 switch (LN->getAddressingMode()) {
458 case ISD::UNINDEXED: {
460 SDOperand rot_op, rotamt;
465 // The vector type we really want to be when we load the 16-byte chunk
466 MVT::ValueType vecVT, opVecVT;
469 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
473 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
475 if (basep.getOpcode() == ISD::ADD) {
476 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
479 && "LowerLOAD: ISD::ADD operand 1 is not constant");
481 c_offset = (int) CN->getValue();
482 c_rotamt = (int) (c_offset & 0xf);
484 // Adjust the rotation amount to ensure that the final result ends up in
485 // the preferred slot:
486 c_rotamt -= vtm->prefslot_byte;
487 ptrp = basep.getOperand(0);
490 c_rotamt = -vtm->prefslot_byte;
494 if (alignment == 16) {
495 // 16-byte aligned load into preferred slot, no rotation
497 if (isMemoryOperand(ptrp))
501 // Return modified D-Form address for pointer:
502 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
503 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
505 return DAG.getLoad(VT, LN->getChain(), ptrp,
506 LN->getSrcValue(), LN->getSrcValueOffset(),
507 LN->isVolatile(), 16);
509 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
510 LN->getSrcValueOffset(), OpVT,
511 LN->isVolatile(), 16);
517 // Realign the base pointer, with a D-Form address
518 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
519 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
520 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
525 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
526 LN->getSrcValue(), LN->getSrcValueOffset(),
527 LN->isVolatile(), 16);
528 the_chain = rot_op.getValue(1);
529 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
531 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
536 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
537 the_chain = result.getValue(1);
539 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
544 scalarvts = DAG.getVTList(VT, MVT::Other);
546 scalarvts = DAG.getVTList(OpVT, MVT::Other);
549 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
553 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
554 the_chain = result.getValue(1);
556 // Handle the sign and zero-extending loads for i1 and i8:
559 if (ExtType == ISD::SEXTLOAD) {
560 NewOpC = (OpVT == MVT::i1
561 ? SPUISD::EXTRACT_I1_SEXT
562 : SPUISD::EXTRACT_I8_SEXT);
563 } else if (ExtType == ISD::ZEXTLOAD) {
564 NewOpC = (OpVT == MVT::i1
565 ? SPUISD::EXTRACT_I1_ZEXT
566 : SPUISD::EXTRACT_I8_ZEXT);
569 result = DAG.getNode(NewOpC, OpVT, result);
572 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
573 SDOperand retops[2] = { result, the_chain };
575 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
580 // Misaligned 16-byte load:
581 if (basep.getOpcode() == ISD::LOAD) {
582 LN = cast<LoadSDNode>(basep);
583 if (LN->getAlignment() == 16) {
584 // We can verify that we're really loading from a 16-byte aligned
585 // chunk. Encapsulate basep as a D-Form address and return a new
587 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
588 DAG.getConstant(0, PtrVT));
590 return DAG.getLoad(VT, LN->getChain(), basep,
591 LN->getSrcValue(), LN->getSrcValueOffset(),
592 LN->isVolatile(), 16);
594 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
595 LN->getSrcValue(), LN->getSrcValueOffset(),
596 OpVT, LN->isVolatile(), 16);
600 // Catch all other cases where we can't guarantee that we have a
601 // 16-byte aligned entity, which means resorting to an X-form
604 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
605 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
606 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
608 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
610 SDOperand alignLoad =
611 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
612 LN->getSrcValue(), LN->getSrcValueOffset(),
613 LN->isVolatile(), 16);
615 SDOperand insertEltOp =
616 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
618 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
621 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
623 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
625 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
626 SDOperand retops[2] = { result, the_chain };
628 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
637 case ISD::LAST_INDEXED_MODE:
638 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
640 cerr << (unsigned) LN->getAddressingMode() << "\n";
648 /// Custom lower stores for CellSPU
650 All CellSPU stores are aligned to 16-byte boundaries, so for elements
651 within a 16-byte block, we have to generate a shuffle to insert the
652 requested element into its place, then store the resulting block.
655 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
656 StoreSDNode *SN = cast<StoreSDNode>(Op);
657 SDOperand Value = SN->getValue();
658 MVT::ValueType VT = Value.getValueType();
659 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
660 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
661 SDOperand the_chain = SN->getChain();
662 //unsigned alignment = SN->getAlignment();
663 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
665 switch (SN->getAddressingMode()) {
666 case ISD::UNINDEXED: {
667 SDOperand basep = SN->getBasePtr();
671 if (basep.getOpcode() == ISD::ADD) {
672 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
674 && "LowerSTORE: ISD::ADD operand 1 is not constant");
675 offset = unsigned(CN->getValue());
676 ptrOp = basep.getOperand(0);
677 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
685 // The vector type we really want to load from the 16-byte chunk, except
686 // in the case of MVT::i1, which has to be v16i8.
687 unsigned vecVT, stVecVT;
690 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
692 stVecVT = MVT::v16i8;
693 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
695 // Realign the pointer as a D-Form address (ptrOp is the pointer,
696 // to force a register load with the address; basep is the actual
697 // dform addr offs($reg).
698 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
699 DAG.getConstant(0, PtrVT));
700 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
701 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
703 // Create the 16-byte aligned vector load
704 SDOperand alignLoad =
705 DAG.getLoad(vecVT, the_chain, basep,
706 SN->getSrcValue(), SN->getSrcValueOffset(),
707 SN->isVolatile(), 16);
708 the_chain = alignLoad.getValue(1);
710 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
711 SDOperand theValue = SN->getValue();
715 && (theValue.getOpcode() == ISD::AssertZext
716 || theValue.getOpcode() == ISD::AssertSext)) {
717 // Drill down and get the value for zero- and sign-extended
719 theValue = theValue.getOperand(0);
722 SDOperand insertEltOp =
723 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
724 DAG.getNode(SPUISD::DFormAddr, PtrVT,
726 DAG.getConstant((offset & 0xf), PtrVT)));
728 result = DAG.getNode(SPUISD::SHUFB, vecVT,
729 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
731 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
733 result = DAG.getStore(the_chain, result, basep,
734 LN->getSrcValue(), LN->getSrcValueOffset(),
735 LN->isVolatile(), LN->getAlignment());
744 case ISD::LAST_INDEXED_MODE:
745 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
747 cerr << (unsigned) SN->getAddressingMode() << "\n";
755 /// Generate the address of a constant pool entry.
757 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
758 MVT::ValueType PtrVT = Op.getValueType();
759 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
760 Constant *C = CP->getConstVal();
761 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
762 const TargetMachine &TM = DAG.getTarget();
763 SDOperand Zero = DAG.getConstant(0, PtrVT);
765 if (TM.getRelocationModel() == Reloc::Static) {
766 if (!ST->usingLargeMem()) {
767 // Just return the SDOperand with the constant pool address in it.
770 // Generate hi/lo address pair
771 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
772 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
774 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
779 "LowerConstantPool: Relocation model other than static not supported.");
784 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
785 MVT::ValueType PtrVT = Op.getValueType();
786 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
787 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
788 SDOperand Zero = DAG.getConstant(0, PtrVT);
789 const TargetMachine &TM = DAG.getTarget();
791 if (TM.getRelocationModel() == Reloc::Static) {
792 if (!ST->usingLargeMem()) {
793 // Just return the SDOperand with the jump table address in it.
796 // Generate hi/lo address pair
797 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
798 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
800 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
805 "LowerJumpTable: Relocation model other than static not supported.");
810 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT::ValueType PtrVT = Op.getValueType();
812 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
813 GlobalValue *GV = GSDN->getGlobal();
814 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
815 SDOperand Zero = DAG.getConstant(0, PtrVT);
816 const TargetMachine &TM = DAG.getTarget();
818 if (TM.getRelocationModel() == Reloc::Static) {
819 if (!ST->usingLargeMem()) {
820 // Generate a local store address
823 // Generate hi/lo address pair
824 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
825 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
827 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
830 cerr << "LowerGlobalAddress: Relocation model other than static not "
839 //! Custom lower i64 integer constants
841 This code inserts all of the necessary juggling that needs to occur to load
842 a 64-bit constant into a register.
845 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
846 unsigned VT = Op.getValueType();
847 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
849 if (VT == MVT::i64) {
850 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
851 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
852 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
855 cerr << "LowerConstant: unhandled constant type "
856 << MVT::getValueTypeString(VT)
865 //! Custom lower single precision floating point constants
867 "float" immediates can be lowered as if they were unsigned 32-bit integers.
868 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
872 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
873 unsigned VT = Op.getValueType();
874 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
877 "LowerConstantFP: Node is not ConstantFPSDNode");
879 const APFloat &apf = FP->getValueAPF();
881 if (VT == MVT::f32) {
882 return DAG.getNode(SPUISD::SFPConstant, VT,
883 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
884 } else if (VT == MVT::f64) {
885 uint64_t dbits = DoubleToBits(apf.convertToDouble());
886 return DAG.getNode(ISD::BIT_CONVERT, VT,
887 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
894 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
896 MachineFunction &MF = DAG.getMachineFunction();
897 MachineFrameInfo *MFI = MF.getFrameInfo();
898 SSARegMap *RegMap = MF.getSSARegMap();
899 SmallVector<SDOperand, 8> ArgValues;
900 SDOperand Root = Op.getOperand(0);
901 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
903 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
904 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
906 unsigned ArgOffset = SPUFrameInfo::minStackSize();
907 unsigned ArgRegIdx = 0;
908 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
910 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
912 // Add DAG nodes to load the arguments or copy them out of registers.
913 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
915 bool needsLoad = false;
916 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
917 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
921 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
922 << MVT::getValueTypeString(ObjectVT)
927 if (!isVarArg && ArgRegIdx < NumArgRegs) {
928 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
929 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
930 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
937 if (!isVarArg && ArgRegIdx < NumArgRegs) {
938 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
939 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
940 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
947 if (!isVarArg && ArgRegIdx < NumArgRegs) {
948 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
949 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
950 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
957 if (!isVarArg && ArgRegIdx < NumArgRegs) {
958 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
959 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
960 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
967 if (!isVarArg && ArgRegIdx < NumArgRegs) {
968 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
969 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
970 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
977 if (!isVarArg && ArgRegIdx < NumArgRegs) {
978 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
979 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
980 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
991 if (!isVarArg && ArgRegIdx < NumArgRegs) {
992 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
993 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
994 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1002 // We need to load the argument to a virtual register if we determined above
1003 // that we ran out of physical registers of the appropriate type
1005 // If the argument is actually used, emit a load from the right stack
1007 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1008 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1009 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1010 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1012 // Don't emit a dead load.
1013 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1016 ArgOffset += StackSlotSize;
1019 ArgValues.push_back(ArgVal);
1022 // If the function takes variable number of arguments, make a frame index for
1023 // the start of the first vararg value... for expansion of llvm.va_start.
1025 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1027 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1028 // If this function is vararg, store any remaining integer argument regs to
1029 // their spots on the stack so that they may be loaded by deferencing the
1030 // result of va_next.
1031 SmallVector<SDOperand, 8> MemOps;
1032 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1033 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1034 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1035 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1036 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1037 MemOps.push_back(Store);
1038 // Increment the address by four for the next argument to store
1039 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1040 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1042 if (!MemOps.empty())
1043 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1046 ArgValues.push_back(Root);
1048 // Return the new list of results.
1049 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1050 Op.Val->value_end());
1051 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1054 /// isLSAAddress - Return the immediate to use if the specified
1055 /// value is representable as a LSA address.
1056 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1057 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1060 int Addr = C->getValue();
1061 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1062 (Addr << 14 >> 14) != Addr)
1063 return 0; // Top 14 bits have to be sext of immediate.
1065 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1070 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1071 SDOperand Chain = Op.getOperand(0);
1073 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1074 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1076 SDOperand Callee = Op.getOperand(4);
1077 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1078 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1079 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1080 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1082 // Handy pointer type
1083 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1085 // Accumulate how many bytes are to be pushed on the stack, including the
1086 // linkage area, and parameter passing area. According to the SPU ABI,
1087 // we minimally need space for [LR] and [SP]
1088 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1090 // Set up a copy of the stack pointer for use loading and storing any
1091 // arguments that may not fit in the registers available for argument
1093 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1095 // Figure out which arguments are going to go in registers, and which in
1097 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1098 unsigned ArgRegIdx = 0;
1100 // Keep track of registers passing arguments
1101 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1102 // And the arguments passed on the stack
1103 SmallVector<SDOperand, 8> MemOpChains;
1105 for (unsigned i = 0; i != NumOps; ++i) {
1106 SDOperand Arg = Op.getOperand(5+2*i);
1108 // PtrOff will be used to store the current argument to the stack if a
1109 // register cannot be found for it.
1110 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1111 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1113 switch (Arg.getValueType()) {
1114 default: assert(0 && "Unexpected ValueType for argument!");
1118 if (ArgRegIdx != NumArgRegs) {
1119 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1121 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1122 ArgOffset += StackSlotSize;
1127 if (ArgRegIdx != NumArgRegs) {
1128 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131 ArgOffset += StackSlotSize;
1138 if (ArgRegIdx != NumArgRegs) {
1139 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1141 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1142 ArgOffset += StackSlotSize;
1148 // Update number of stack bytes actually used, insert a call sequence start
1149 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1150 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1152 if (!MemOpChains.empty()) {
1153 // Adjust the stack pointer for the stack arguments.
1154 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1155 &MemOpChains[0], MemOpChains.size());
1158 // Build a sequence of copy-to-reg nodes chained together with token chain
1159 // and flag operands which copy the outgoing args into the appropriate regs.
1161 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1162 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1164 InFlag = Chain.getValue(1);
1167 std::vector<MVT::ValueType> NodeTys;
1168 NodeTys.push_back(MVT::Other); // Returns a chain
1169 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1171 SmallVector<SDOperand, 8> Ops;
1172 unsigned CallOpc = SPUISD::CALL;
1174 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1175 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1176 // node so that legalize doesn't hack it.
1177 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1178 GlobalValue *GV = G->getGlobal();
1179 unsigned CalleeVT = Callee.getValueType();
1181 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1182 // style calls, otherwise, external symbols are BRASL calls.
1184 // This may be an unsafe assumption for JIT and really large compilation
1186 if (GV->isDeclaration()) {
1187 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1189 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1190 DAG.getTargetGlobalAddress(GV, CalleeVT),
1191 DAG.getConstant(0, PtrVT));
1193 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1194 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1195 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1196 // If this is an absolute destination address that appears to be a legal
1197 // local store address, use the munged value.
1198 Callee = SDOperand(Dest, 0);
1200 Ops.push_back(Chain);
1201 Ops.push_back(Callee);
1203 // Add argument registers to the end of the list so that they are known live
1205 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1206 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1207 RegsToPass[i].second.getValueType()));
1210 Ops.push_back(InFlag);
1211 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1214 SDOperand ResultVals[3];
1215 unsigned NumResults = 0;
1218 // If the call has results, copy the values out of the ret val registers.
1219 switch (Op.Val->getValueType(0)) {
1220 default: assert(0 && "Unexpected ret value!");
1221 case MVT::Other: break;
1223 if (Op.Val->getValueType(1) == MVT::i32) {
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1227 Chain.getValue(2)).getValue(1);
1228 ResultVals[1] = Chain.getValue(0);
1230 NodeTys.push_back(MVT::i32);
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1236 NodeTys.push_back(MVT::i32);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1242 NodeTys.push_back(MVT::i64);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1247 InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1250 NodeTys.push_back(Op.Val->getValueType(0));
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1261 NodeTys.push_back(Op.Val->getValueType(0));
1265 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1266 DAG.getConstant(NumStackBytes, PtrVT));
1267 NodeTys.push_back(MVT::Other);
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1276 ResultVals, NumResults);
1277 return Res.getValue(Op.ResNo);
1281 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
1290 if (DAG.getMachineFunction().liveout_empty()) {
1291 for (unsigned i = 0; i != RVLocs.size(); ++i)
1292 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1295 SDOperand Chain = Op.getOperand(0);
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319 SDOperand OpVal(0, 0);
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1330 if (OpVal.Val != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1342 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 MVT::ValueType ValueType) {
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getValue();
1346 if (Value <= 0x3ffff)
1347 return DAG.getConstant(Value, ValueType);
1353 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1354 /// and the value fits into a signed 16-bit constant, and if so, return the
1356 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357 MVT::ValueType ValueType) {
1358 if (ConstantSDNode *CN = getVecImm(N)) {
1359 if (ValueType == MVT::i32) {
1360 int Value = (int) CN->getValue();
1361 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1363 if (Value == SExtValue)
1364 return DAG.getConstant(Value, ValueType);
1365 } else if (ValueType == MVT::i16) {
1366 short Value = (short) CN->getValue();
1367 int SExtValue = ((int) Value << 16) >> 16;
1369 if (Value == (short) SExtValue)
1370 return DAG.getConstant(Value, ValueType);
1371 } else if (ValueType == MVT::i64) {
1372 int64_t Value = CN->getValue();
1373 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1375 if (Value == SExtValue)
1376 return DAG.getConstant(Value, ValueType);
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387 MVT::ValueType ValueType) {
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int Value = (int) CN->getValue();
1390 if ((ValueType == MVT::i32 && isS10Constant(Value))
1391 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1392 return DAG.getConstant(Value, ValueType);
1398 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 8-bit constant, and if so, return the
1402 /// @note: The incoming vector is v16i8 because that's the only way we can load
1403 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1405 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406 MVT::ValueType ValueType) {
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 int Value = (int) CN->getValue();
1409 if (ValueType == MVT::i16
1410 && Value <= 0xffff /* truncated from uint64_t */
1411 && ((short) Value >> 8) == ((short) Value & 0xff))
1412 return DAG.getConstant(Value & 0xff, ValueType);
1413 else if (ValueType == MVT::i8
1414 && (Value & 0xff) == Value)
1415 return DAG.getConstant(Value, ValueType);
1421 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1422 /// and the value fits into a signed 16-bit constant, and if so, return the
1424 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425 MVT::ValueType ValueType) {
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 uint64_t Value = CN->getValue();
1428 if ((ValueType == MVT::i32
1429 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1430 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1431 return DAG.getConstant(Value >> 16, ValueType);
1437 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1438 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1446 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1447 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1455 // If this is a vector of constants or undefs, get the bits. A bit in
1456 // UndefBits is set if the corresponding element of the vector is an
1457 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1458 // zero. Return true if this is not an array of constants, false if it is.
1460 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1461 uint64_t UndefBits[2]) {
1462 // Start with zero'd results.
1463 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1465 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1466 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1467 SDOperand OpVal = BV->getOperand(i);
1469 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1470 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1472 uint64_t EltBits = 0;
1473 if (OpVal.getOpcode() == ISD::UNDEF) {
1474 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1475 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1477 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1478 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1479 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1480 const APFloat &apf = CN->getValueAPF();
1481 EltBits = (CN->getValueType(0) == MVT::f32
1482 ? FloatToBits(apf.convertToFloat())
1483 : DoubleToBits(apf.convertToDouble()));
1485 // Nonconstant element.
1489 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1492 //printf("%llx %llx %llx %llx\n",
1493 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1497 /// If this is a splat (repetition) of a value across the whole vector, return
1498 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1499 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1500 /// SplatSize = 1 byte.
1501 static bool isConstantSplat(const uint64_t Bits128[2],
1502 const uint64_t Undef128[2],
1504 uint64_t &SplatBits, uint64_t &SplatUndef,
1506 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1507 // the same as the lower 64-bits, ignoring undefs.
1508 uint64_t Bits64 = Bits128[0] | Bits128[1];
1509 uint64_t Undef64 = Undef128[0] & Undef128[1];
1510 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1511 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1512 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1513 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1515 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1516 if (MinSplatBits < 64) {
1518 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1520 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1521 if (MinSplatBits < 32) {
1523 // If the top 16-bits are different than the lower 16-bits, ignoring
1524 // undefs, we have an i32 splat.
1525 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1526 if (MinSplatBits < 16) {
1527 // If the top 8-bits are different than the lower 8-bits, ignoring
1528 // undefs, we have an i16 splat.
1529 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1530 // Otherwise, we have an 8-bit splat.
1531 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1532 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1538 SplatUndef = Undef16;
1545 SplatUndef = Undef32;
1551 SplatBits = Bits128[0];
1552 SplatUndef = Undef128[0];
1558 return false; // Can't be a splat if two pieces don't match.
1561 // If this is a case we can't handle, return null and let the default
1562 // expansion code take care of it. If we CAN select this case, and if it
1563 // selects to a single instruction, return Op. Otherwise, if we can codegen
1564 // this case more efficiently than a constant pool load, lower it to the
1565 // sequence of ops that should be used.
1566 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1567 MVT::ValueType VT = Op.getValueType();
1568 // If this is a vector of constants or undefs, get the bits. A bit in
1569 // UndefBits is set if the corresponding element of the vector is an
1570 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1572 uint64_t VectorBits[2];
1573 uint64_t UndefBits[2];
1574 uint64_t SplatBits, SplatUndef;
1576 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1577 || !isConstantSplat(VectorBits, UndefBits,
1578 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1579 SplatBits, SplatUndef, SplatSize))
1580 return SDOperand(); // Not a constant vector, not a splat.
1585 uint32_t Value32 = SplatBits;
1586 assert(SplatSize == 4
1587 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1590 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1595 uint64_t f64val = SplatBits;
1596 assert(SplatSize == 8
1597 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1605 // 8-bit constants have to be expanded to 16-bits
1606 unsigned short Value16 = SplatBits | (SplatBits << 8);
1608 for (int i = 0; i < 8; ++i)
1609 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610 return DAG.getNode(ISD::BIT_CONVERT, VT,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1614 unsigned short Value16;
1616 Value16 = (unsigned short) (SplatBits & 0xffff);
1618 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1621 for (int i = 0; i < 8; ++i) Ops[i] = T;
1622 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1625 unsigned int Value = SplatBits;
1626 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1627 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1630 uint64_t val = SplatBits;
1631 uint32_t upper = uint32_t(val >> 32);
1632 uint32_t lower = uint32_t(val);
1637 SmallVector<SDOperand, 16> ShufBytes;
1639 bool upper_special, lower_special;
1641 // NOTE: This code creates common-case shuffle masks that can be easily
1642 // detected as common expressions. It is not attempting to create highly
1643 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1645 // Detect if the upper or lower half is a special shuffle mask pattern:
1646 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1647 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1649 // Create lower vector if not a special pattern
1650 if (!lower_special) {
1651 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1652 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 LO32C, LO32C, LO32C, LO32C));
1657 // Create upper vector if not a special pattern
1658 if (!upper_special) {
1659 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1660 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 HI32C, HI32C, HI32C, HI32C));
1665 // If either upper or lower are special, then the two input operands are
1666 // the same (basically, one of them is a "don't care")
1671 if (lower_special && upper_special) {
1672 // Unhappy situation... both upper and lower are special, so punt with
1673 // a target constant:
1674 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1675 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1679 for (int i = 0; i < 4; ++i) {
1680 for (int j = 0; j < 4; ++j) {
1682 bool process_upper, process_lower;
1685 process_upper = (upper_special && (i & 1) == 0);
1686 process_lower = (lower_special && (i & 1) == 1);
1688 if (process_upper || process_lower) {
1689 if ((process_upper && upper == 0)
1690 || (process_lower && lower == 0))
1692 else if ((process_upper && upper == 0xffffffff)
1693 || (process_lower && lower == 0xffffffff))
1695 else if ((process_upper && upper == 0x80000000)
1696 || (process_lower && lower == 0x80000000))
1697 val = (j == 0 ? 0xe0 : 0x80);
1699 val = i * 4 + j + ((i & 1) * 16);
1701 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1705 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1706 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1707 &ShufBytes[0], ShufBytes.size()));
1709 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1710 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1711 return DAG.getNode(ISD::BIT_CONVERT, VT,
1712 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713 Zero, Zero, Zero, Zero));
1721 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1722 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1723 /// permutation vector, V3, is monotonically increasing with one "exception"
1724 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1725 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1726 /// In either case, the net result is going to eventually invoke SHUFB to
1727 /// permute/shuffle the bytes from V1 and V2.
1729 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1730 /// control word for byte/halfword/word insertion. This takes care of a single
1731 /// element move from V2 into V1.
1733 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1734 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1735 SDOperand V1 = Op.getOperand(0);
1736 SDOperand V2 = Op.getOperand(1);
1737 SDOperand PermMask = Op.getOperand(2);
1739 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1741 // If we have a single element being moved from V1 to V2, this can be handled
1742 // using the C*[DX] compute mask instructions, but the vector elements have
1743 // to be monotonically increasing with one exception element.
1744 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1745 unsigned EltsFromV2 = 0;
1747 unsigned V2EltIdx0 = 0;
1748 unsigned CurrElt = 0;
1749 bool monotonic = true;
1750 if (EltVT == MVT::i8)
1752 else if (EltVT == MVT::i16)
1754 else if (EltVT == MVT::i32)
1757 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1759 for (unsigned i = 0, e = PermMask.getNumOperands();
1760 EltsFromV2 <= 1 && monotonic && i != e;
1763 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1766 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1768 if (SrcElt >= V2EltIdx0) {
1770 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1771 } else if (CurrElt != SrcElt) {
1778 if (EltsFromV2 == 1 && monotonic) {
1779 // Compute mask and shuffle
1780 MachineFunction &MF = DAG.getMachineFunction();
1781 SSARegMap *RegMap = MF.getSSARegMap();
1782 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1783 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1784 // Initialize temporary register to 0
1785 SDOperand InitTempReg =
1786 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1787 // Copy register's contents as index in INSERT_MASK:
1788 SDOperand ShufMaskOp =
1789 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1790 DAG.getTargetConstant(V2Elt, MVT::i32),
1791 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1792 // Use shuffle mask in SHUFB synthetic instruction:
1793 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1795 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1796 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1798 SmallVector<SDOperand, 16> ResultMask;
1799 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1801 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1804 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1806 for (unsigned j = 0; j != BytesPerElement; ++j) {
1807 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1812 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1813 &ResultMask[0], ResultMask.size());
1814 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1818 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1819 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1821 if (Op0.Val->getOpcode() == ISD::Constant) {
1822 // For a constant, build the appropriate constant vector, which will
1823 // eventually simplify to a vector register load.
1825 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1826 SmallVector<SDOperand, 16> ConstVecValues;
1830 // Create a constant vector:
1831 switch (Op.getValueType()) {
1832 default: assert(0 && "Unexpected constant value type in "
1833 "LowerSCALAR_TO_VECTOR");
1834 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1835 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1836 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1837 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1838 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1839 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1842 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1843 for (size_t j = 0; j < n_copies; ++j)
1844 ConstVecValues.push_back(CValue);
1846 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1847 &ConstVecValues[0], ConstVecValues.size());
1849 // Otherwise, copy the value from one register to another:
1850 switch (Op0.getValueType()) {
1851 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1858 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1865 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1866 switch (Op.getValueType()) {
1868 SDOperand rA = Op.getOperand(0);
1869 SDOperand rB = Op.getOperand(1);
1870 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1871 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1872 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1873 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1875 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1879 // Multiply two v8i16 vectors (pipeline friendly version):
1880 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1881 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1882 // c) Use SELB to select upper and lower halves from the intermediate results
1884 // NOTE: We really want to move the FSMBI to earlier to actually get the
1885 // dual-issue. This code does manage to do this, even if it's a little on
1888 MachineFunction &MF = DAG.getMachineFunction();
1889 SSARegMap *RegMap = MF.getSSARegMap();
1890 SDOperand Chain = Op.getOperand(0);
1891 SDOperand rA = Op.getOperand(0);
1892 SDOperand rB = Op.getOperand(1);
1893 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1894 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1897 DAG.getCopyToReg(Chain, FSMBIreg,
1898 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1899 DAG.getConstant(0xcccc, MVT::i32)));
1902 DAG.getCopyToReg(FSMBOp, HiProdReg,
1903 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1905 SDOperand HHProd_v4i32 =
1906 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1907 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1909 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1910 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1911 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1912 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1914 DAG.getConstant(16, MVT::i16))),
1915 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1918 // This M00sE is N@stI! (apologies to Monty Python)
1920 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1921 // is to break it all apart, sign extend, and reassemble the various
1922 // intermediate products.
1924 MachineFunction &MF = DAG.getMachineFunction();
1925 SSARegMap *RegMap = MF.getSSARegMap();
1926 SDOperand Chain = Op.getOperand(0);
1927 SDOperand rA = Op.getOperand(0);
1928 SDOperand rB = Op.getOperand(1);
1929 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1930 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1932 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1933 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1938 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1941 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1943 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1946 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1947 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1949 SDOperand FSMBdef_2222 =
1950 DAG.getCopyToReg(Chain, FSMBreg_2222,
1951 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1952 DAG.getConstant(0x2222, MVT::i32)));
1954 SDOperand FSMBuse_2222 =
1955 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1957 SDOperand LoProd_1 =
1958 DAG.getCopyToReg(Chain, LoProd_reg,
1959 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1962 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1965 DAG.getNode(ISD::AND, MVT::v4i32,
1966 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1967 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1968 LoProdMask, LoProdMask,
1969 LoProdMask, LoProdMask));
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1980 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1984 SDOperand HHProd_1 =
1985 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1992 DAG.getCopyToReg(Chain, HiProd_reg,
1993 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1995 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2000 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2002 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2003 DAG.getNode(ISD::OR, MVT::v4i32,
2008 cerr << "CellSPU: Unknown vector multiplication, got "
2009 << MVT::getValueTypeString(Op.getValueType())
2018 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2019 MachineFunction &MF = DAG.getMachineFunction();
2020 SSARegMap *RegMap = MF.getSSARegMap();
2022 SDOperand A = Op.getOperand(0);
2023 SDOperand B = Op.getOperand(1);
2024 unsigned VT = Op.getValueType();
2026 unsigned VRegBR, VRegC;
2028 if (VT == MVT::f32) {
2029 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2030 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2032 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2033 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2035 // TODO: make sure we're feeding FPInterp the right arguments
2036 // Right now: fi B, frest(B)
2039 // (Floating Interpolate (FP Reciprocal Estimate B))
2041 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042 DAG.getNode(SPUISD::FPInterp, VT, B,
2043 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2045 // Computes A * BRcpl and stores in a temporary register
2047 DAG.getCopyToReg(BRcpl, VRegC,
2048 DAG.getNode(ISD::FMUL, VT, A,
2049 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050 // What's the Chain variable do? It's magic!
2051 // TODO: set Chain = Op(0).getEntryNode()
2053 return DAG.getNode(ISD::FADD, VT,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055 DAG.getNode(ISD::FMUL, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057 DAG.getNode(ISD::FSUB, VT, A,
2058 DAG.getNode(ISD::FMUL, VT, B,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2062 // Expands double-precision FDIV
2063 // Expects two doubles as inputs X and Y, does a floating point
2064 // reciprocal estimate, and three iterations of Newton-Raphson
2065 // to increase accuracy.
2066 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2067 // MachineFunction &MF = DAG.getMachineFunction();
2068 // SSARegMap *RegMap = MF.getSSARegMap();
2070 // SDOperand X = Op.getOperand(0);
2071 // SDOperand Y = Op.getOperand(1);
2074 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2075 unsigned VT = Op.getValueType();
2076 SDOperand N = Op.getOperand(0);
2077 SDOperand Elt = Op.getOperand(1);
2078 SDOperand ShufMask[16];
2079 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2081 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2083 int EltNo = (int) C->getValue();
2086 if (VT == MVT::i8 && EltNo >= 16)
2087 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2088 else if (VT == MVT::i16 && EltNo >= 8)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2090 else if (VT == MVT::i32 && EltNo >= 4)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2092 else if (VT == MVT::i64 && EltNo >= 2)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2095 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2096 // i32 and i64: Element 0 is the preferred slot
2097 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2100 // Need to generate shuffle mask and extract:
2101 int prefslot_begin, prefslot_end;
2102 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2106 prefslot_begin = prefslot_end = 3;
2110 prefslot_begin = 2; prefslot_end = 3;
2114 prefslot_begin = 0; prefslot_end = 3;
2118 prefslot_begin = 0; prefslot_end = 7;
2123 for (int i = 0; i < 16; ++i) {
2124 // zero fill uppper part of preferred slot, don't care about the
2126 unsigned int mask_val;
2128 if (i <= prefslot_end) {
2130 ((i < prefslot_begin)
2132 : elt_byte + (i - prefslot_begin));
2134 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2136 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2139 SDOperand ShufMaskVec =
2140 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2142 sizeof(ShufMask) / sizeof(ShufMask[0]));
2144 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2145 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2146 N, N, ShufMaskVec));
2150 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2151 SDOperand VecOp = Op.getOperand(0);
2152 SDOperand ValOp = Op.getOperand(1);
2153 SDOperand IdxOp = Op.getOperand(2);
2154 MVT::ValueType VT = Op.getValueType();
2156 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2157 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2159 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2160 // Use $2 because it's always 16-byte aligned and it's available:
2161 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2164 DAG.getNode(SPUISD::SHUFB, VT,
2165 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2167 DAG.getNode(SPUISD::INSERT_MASK, VT,
2168 DAG.getNode(ISD::ADD, PtrVT,
2170 DAG.getConstant(CN->getValue(),
2176 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2177 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2179 assert(Op.getValueType() == MVT::i8);
2182 assert(0 && "Unhandled i8 math operator");
2186 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2188 SDOperand N1 = Op.getOperand(1);
2189 N0 = (N0.getOpcode() != ISD::Constant
2190 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2191 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2192 N1 = (N1.getOpcode() != ISD::Constant
2193 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2194 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2195 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2196 DAG.getNode(Opc, MVT::i16, N0, N1));
2200 SDOperand N1 = Op.getOperand(1);
2202 N0 = (N0.getOpcode() != ISD::Constant
2203 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2204 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2205 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2206 N1 = (N1.getOpcode() != ISD::Constant
2207 ? DAG.getNode(N1Opc, MVT::i16, N1)
2208 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2209 SDOperand ExpandArg =
2210 DAG.getNode(ISD::OR, MVT::i16, N0,
2211 DAG.getNode(ISD::SHL, MVT::i16,
2212 N0, DAG.getConstant(8, MVT::i16)));
2213 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2214 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2218 SDOperand N1 = Op.getOperand(1);
2220 N0 = (N0.getOpcode() != ISD::Constant
2221 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2222 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2223 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2224 N1 = (N1.getOpcode() != ISD::Constant
2225 ? DAG.getNode(N1Opc, MVT::i16, N1)
2226 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2227 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2228 DAG.getNode(Opc, MVT::i16, N0, N1));
2231 SDOperand N1 = Op.getOperand(1);
2233 N0 = (N0.getOpcode() != ISD::Constant
2234 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2235 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2236 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2237 N1 = (N1.getOpcode() != ISD::Constant
2238 ? DAG.getNode(N1Opc, MVT::i16, N1)
2239 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2240 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2241 DAG.getNode(Opc, MVT::i16, N0, N1));
2244 SDOperand N1 = Op.getOperand(1);
2246 N0 = (N0.getOpcode() != ISD::Constant
2247 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2248 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2249 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2250 N1 = (N1.getOpcode() != ISD::Constant
2251 ? DAG.getNode(N1Opc, MVT::i16, N1)
2252 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2253 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2254 DAG.getNode(Opc, MVT::i16, N0, N1));
2262 //! Lower byte immediate operations for v16i8 vectors:
2264 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2267 MVT::ValueType VT = Op.getValueType();
2269 ConstVec = Op.getOperand(0);
2270 Arg = Op.getOperand(1);
2271 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2272 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2273 ConstVec = ConstVec.getOperand(0);
2275 ConstVec = Op.getOperand(1);
2276 Arg = Op.getOperand(0);
2277 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2278 ConstVec = ConstVec.getOperand(0);
2283 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2284 uint64_t VectorBits[2];
2285 uint64_t UndefBits[2];
2286 uint64_t SplatBits, SplatUndef;
2289 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2290 && isConstantSplat(VectorBits, UndefBits,
2291 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2292 SplatBits, SplatUndef, SplatSize)) {
2293 SDOperand tcVec[16];
2294 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2295 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2297 // Turn the BUILD_VECTOR into a set of target constants:
2298 for (size_t i = 0; i < tcVecSize; ++i)
2301 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2302 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2309 //! Lower i32 multiplication
2310 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2314 cerr << "CellSPU: Unknown LowerMUL value type, got "
2315 << MVT::getValueTypeString(Op.getValueType())
2321 SDOperand rA = Op.getOperand(0);
2322 SDOperand rB = Op.getOperand(1);
2324 return DAG.getNode(ISD::ADD, MVT::i32,
2325 DAG.getNode(ISD::ADD, MVT::i32,
2326 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2327 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2328 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2335 //! Custom lowering for CTPOP (count population)
2337 Custom lowering code that counts the number ones in the input
2338 operand. SPU has such an instruction, but it counts the number of
2339 ones per byte, which then have to be accumulated.
2341 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2342 unsigned VT = Op.getValueType();
2343 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2347 SDOperand N = Op.getOperand(0);
2348 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2350 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2351 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2353 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2357 MachineFunction &MF = DAG.getMachineFunction();
2358 SSARegMap *RegMap = MF.getSSARegMap();
2360 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2362 SDOperand N = Op.getOperand(0);
2363 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2364 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2365 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2367 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2368 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2370 // CNTB_result becomes the chain to which all of the virtual registers
2371 // CNTB_reg, SUM1_reg become associated:
2372 SDOperand CNTB_result =
2373 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2375 SDOperand CNTB_rescopy =
2376 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2378 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2380 return DAG.getNode(ISD::AND, MVT::i16,
2381 DAG.getNode(ISD::ADD, MVT::i16,
2382 DAG.getNode(ISD::SRL, MVT::i16,
2389 MachineFunction &MF = DAG.getMachineFunction();
2390 SSARegMap *RegMap = MF.getSSARegMap();
2392 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2393 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2395 SDOperand N = Op.getOperand(0);
2396 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2397 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2398 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2399 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2401 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2402 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2404 // CNTB_result becomes the chain to which all of the virtual registers
2405 // CNTB_reg, SUM1_reg become associated:
2406 SDOperand CNTB_result =
2407 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2409 SDOperand CNTB_rescopy =
2410 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2413 DAG.getNode(ISD::SRL, MVT::i32,
2414 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2417 DAG.getNode(ISD::ADD, MVT::i32,
2418 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2420 SDOperand Sum1_rescopy =
2421 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2424 DAG.getNode(ISD::SRL, MVT::i32,
2425 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2428 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2429 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2431 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2441 /// LowerOperation - Provide custom lowering hooks for some operations.
2444 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2446 switch (Op.getOpcode()) {
2448 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2449 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2450 cerr << "*Op.Val:\n";
2457 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2459 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2460 case ISD::ConstantPool:
2461 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::GlobalAddress:
2463 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::JumpTable:
2465 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2467 return LowerConstant(Op, DAG);
2468 case ISD::ConstantFP:
2469 return LowerConstantFP(Op, DAG);
2470 case ISD::FORMAL_ARGUMENTS:
2471 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2473 return LowerCALL(Op, DAG);
2475 return LowerRET(Op, DAG, getTargetMachine());
2484 return LowerI8Math(Op, DAG, Op.getOpcode());
2486 // Vector-related lowering.
2487 case ISD::BUILD_VECTOR:
2488 return LowerBUILD_VECTOR(Op, DAG);
2489 case ISD::SCALAR_TO_VECTOR:
2490 return LowerSCALAR_TO_VECTOR(Op, DAG);
2491 case ISD::VECTOR_SHUFFLE:
2492 return LowerVECTOR_SHUFFLE(Op, DAG);
2493 case ISD::EXTRACT_VECTOR_ELT:
2494 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2495 case ISD::INSERT_VECTOR_ELT:
2496 return LowerINSERT_VECTOR_ELT(Op, DAG);
2498 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2502 return LowerByteImmed(Op, DAG);
2504 // Vector and i8 multiply:
2506 if (MVT::isVector(Op.getValueType()))
2507 return LowerVectorMUL(Op, DAG);
2508 else if (Op.getValueType() == MVT::i8)
2509 return LowerI8Math(Op, DAG, Op.getOpcode());
2511 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2514 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2515 return LowerFDIVf32(Op, DAG);
2516 // else if (Op.getValueType() == MVT::f64)
2517 // return LowerFDIVf64(Op, DAG);
2519 assert(0 && "Calling FDIV on unsupported MVT");
2522 return LowerCTPOP(Op, DAG);
2528 //===----------------------------------------------------------------------===//
2529 // Other Lowering Code
2530 //===----------------------------------------------------------------------===//
2533 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2534 MachineBasicBlock *BB)
2539 //===----------------------------------------------------------------------===//
2540 // Target Optimization Hooks
2541 //===----------------------------------------------------------------------===//
2544 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2547 TargetMachine &TM = getTargetMachine();
2548 SelectionDAG &DAG = DCI.DAG;
2550 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2552 switch (N->getOpcode()) {
2555 // Look for obvious optimizations for shift left:
2556 // a) Replace 0 << V with 0
2557 // b) Replace V << 0 with V
2559 // N.B: llvm will generate an undef node if the shift amount is greater than
2560 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2563 case SPU::SHLQBIIvec:
2565 case SPU::ROTHIr16_i32:
2567 case SPU::ROTIr32_i16:
2568 case SPU::ROTQBYIvec:
2569 case SPU::ROTQBYBIvec:
2570 case SPU::ROTQBIIvec:
2571 case SPU::ROTHMIr16:
2573 case SPU::ROTQMBYIvec: {
2574 if (N0.getOpcode() == ISD::Constant) {
2575 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2576 if (C->getValue() == 0) // 0 << V -> 0.
2580 SDOperand N1 = N->getOperand(1);
2581 if (N1.getOpcode() == ISD::Constant) {
2582 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2583 if (C->getValue() == 0) // V << 0 -> V
2594 //===----------------------------------------------------------------------===//
2595 // Inline Assembly Support
2596 //===----------------------------------------------------------------------===//
2598 /// getConstraintType - Given a constraint letter, return the type of
2599 /// constraint it is for this target.
2600 SPUTargetLowering::ConstraintType
2601 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2602 if (ConstraintLetter.size() == 1) {
2603 switch (ConstraintLetter[0]) {
2610 return C_RegisterClass;
2613 return TargetLowering::getConstraintType(ConstraintLetter);
2616 std::pair<unsigned, const TargetRegisterClass*>
2617 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2618 MVT::ValueType VT) const
2620 if (Constraint.size() == 1) {
2621 // GCC RS6000 Constraint Letters
2622 switch (Constraint[0]) {
2626 return std::make_pair(0U, SPU::R64CRegisterClass);
2627 return std::make_pair(0U, SPU::R32CRegisterClass);
2630 return std::make_pair(0U, SPU::R32FPRegisterClass);
2631 else if (VT == MVT::f64)
2632 return std::make_pair(0U, SPU::R64FPRegisterClass);
2635 return std::make_pair(0U, SPU::GPRCRegisterClass);
2639 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2643 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2645 uint64_t &KnownZero,
2647 const SelectionDAG &DAG,
2648 unsigned Depth ) const {
2653 // LowerAsmOperandForConstraint
2655 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2656 char ConstraintLetter,
2657 std::vector<SDOperand> &Ops,
2658 SelectionDAG &DAG) {
2659 // Default, for the time being, to the base class handler
2660 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2663 /// isLegalAddressImmediate - Return true if the integer value can be used
2664 /// as the offset of the target addressing mode.
2665 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2666 // SPU's addresses are 256K:
2667 return (V > -(1 << 18) && V < (1 << 18) - 1);
2670 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {