1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
105 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
106 : TargetLowering(TM),
109 // Fold away setcc operations if possible.
112 // Use _setjmp/_longjmp instead of setjmp/longjmp.
113 setUseUnderscoreSetJmp(true);
114 setUseUnderscoreLongJmp(true);
116 // Set up the SPU's register classes:
117 // NOTE: i8 register class is not registered because we cannot determine when
118 // we need to zero or sign extend for custom-lowered loads and stores.
119 // NOTE: Ignore the previous note. For now. :-)
120 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
121 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
122 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
123 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
124 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
125 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
126 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
128 // SPU has no sign or zero extended loads for i1, i8, i16:
129 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
130 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
131 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
132 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
133 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
134 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
135 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
136 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
138 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
140 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
141 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
142 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
143 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
144 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
145 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
147 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
148 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
149 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
151 // SPU constant load actions are custom lowered:
152 setOperationAction(ISD::Constant, MVT::i64, Custom);
153 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
154 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
156 // SPU's loads and stores have to be custom lowered:
157 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
159 setOperationAction(ISD::LOAD, sctype, Custom);
160 setOperationAction(ISD::STORE, sctype, Custom);
163 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
164 // into BR_CCs. BR_CC instructions are custom selected in
166 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
168 // Expand the jumptable branches
169 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
170 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
171 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
173 // SPU has no intrinsics for these particular operations:
174 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
175 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
176 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
178 // PowerPC has no SREM/UREM instructions
179 setOperationAction(ISD::SREM, MVT::i32, Expand);
180 setOperationAction(ISD::UREM, MVT::i32, Expand);
181 setOperationAction(ISD::SREM, MVT::i64, Expand);
182 setOperationAction(ISD::UREM, MVT::i64, Expand);
184 // We don't support sin/cos/sqrt/fmod
185 setOperationAction(ISD::FSIN , MVT::f64, Expand);
186 setOperationAction(ISD::FCOS , MVT::f64, Expand);
187 setOperationAction(ISD::FREM , MVT::f64, Expand);
188 setOperationAction(ISD::FSIN , MVT::f32, Expand);
189 setOperationAction(ISD::FCOS , MVT::f32, Expand);
190 setOperationAction(ISD::FREM , MVT::f32, Expand);
192 // If we're enabling GP optimizations, use hardware square root
193 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
194 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
196 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
197 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
199 // SPU can do rotate right and left, so legalize it... but customize for i8
200 // because instructions don't exist.
201 setOperationAction(ISD::ROTR, MVT::i32, Legal);
202 setOperationAction(ISD::ROTR, MVT::i16, Legal);
203 setOperationAction(ISD::ROTR, MVT::i8, Custom);
204 setOperationAction(ISD::ROTL, MVT::i32, Legal);
205 setOperationAction(ISD::ROTL, MVT::i16, Legal);
206 setOperationAction(ISD::ROTL, MVT::i8, Custom);
207 // SPU has no native version of shift left/right for i8
208 setOperationAction(ISD::SHL, MVT::i8, Custom);
209 setOperationAction(ISD::SRL, MVT::i8, Custom);
210 setOperationAction(ISD::SRA, MVT::i8, Custom);
212 // Custom lower i32 multiplications
213 setOperationAction(ISD::MUL, MVT::i32, Custom);
215 // Need to custom handle (some) common i8 math ops
216 setOperationAction(ISD::SUB, MVT::i8, Custom);
217 setOperationAction(ISD::MUL, MVT::i8, Custom);
219 // SPU does not have BSWAP. It does have i32 support CTLZ.
220 // CTPOP has to be custom lowered.
221 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
222 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
224 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
225 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
226 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
227 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
229 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
230 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
232 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
234 // SPU does not have select or setcc
235 setOperationAction(ISD::SELECT, MVT::i1, Expand);
236 setOperationAction(ISD::SELECT, MVT::i8, Expand);
237 setOperationAction(ISD::SELECT, MVT::i16, Expand);
238 setOperationAction(ISD::SELECT, MVT::i32, Expand);
239 setOperationAction(ISD::SELECT, MVT::i64, Expand);
240 setOperationAction(ISD::SELECT, MVT::f32, Expand);
241 setOperationAction(ISD::SELECT, MVT::f64, Expand);
243 setOperationAction(ISD::SETCC, MVT::i1, Expand);
244 setOperationAction(ISD::SETCC, MVT::i8, Expand);
245 setOperationAction(ISD::SETCC, MVT::i16, Expand);
246 setOperationAction(ISD::SETCC, MVT::i32, Expand);
247 setOperationAction(ISD::SETCC, MVT::i64, Expand);
248 setOperationAction(ISD::SETCC, MVT::f32, Expand);
249 setOperationAction(ISD::SETCC, MVT::f64, Expand);
251 // SPU has a legal FP -> signed INT instruction
252 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
253 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
254 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
255 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
257 // FDIV on SPU requires custom lowering
258 setOperationAction(ISD::FDIV, MVT::f32, Custom);
259 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
261 // SPU has [U|S]INT_TO_FP
262 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
264 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
265 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
266 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
267 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
268 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
269 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
271 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
272 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
273 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
274 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
276 // We cannot sextinreg(i1). Expand to shifts.
277 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
279 // Support label based line numbers.
280 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
281 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
283 // We want to legalize GlobalAddress and ConstantPool nodes into the
284 // appropriate instructions to materialize the address.
285 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
287 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
288 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
289 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
290 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
291 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
292 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
294 // RET must be custom lowered, to meet ABI requirements
295 setOperationAction(ISD::RET, MVT::Other, Custom);
297 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
298 setOperationAction(ISD::VASTART , MVT::Other, Custom);
300 // Use the default implementation.
301 setOperationAction(ISD::VAARG , MVT::Other, Expand);
302 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
303 setOperationAction(ISD::VAEND , MVT::Other, Expand);
304 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
305 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
306 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
307 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
309 // Cell SPU has instructions for converting between i64 and fp.
310 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
311 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
313 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
314 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
316 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
317 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
319 // First set operation action for all vector types to expand. Then we
320 // will selectively turn on ones that can be effectively codegen'd.
321 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
322 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
323 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
324 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
325 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
326 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
328 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
329 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
330 // add/sub are legal for all supported vector VT's.
331 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
333 // mul has to be custom lowered.
334 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
336 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
337 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
338 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
339 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
340 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
341 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
343 // These operations need to be expanded:
344 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
345 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
346 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
347 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
348 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
350 // Custom lower build_vector, constant pool spills, insert and
351 // extract vector elements:
352 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
353 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
356 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
357 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
360 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
361 setOperationAction(ISD::AND, MVT::v16i8, Custom);
362 setOperationAction(ISD::OR, MVT::v16i8, Custom);
363 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
364 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
366 setSetCCResultType(MVT::i32);
367 setShiftAmountType(MVT::i32);
368 setSetCCResultContents(ZeroOrOneSetCCResult);
370 setStackPointerRegisterToSaveRestore(SPU::R1);
372 // We have target-specific dag combine patterns for the following nodes:
373 // e.g., setTargetDAGCombine(ISD::SUB);
375 computeRegisterProperties();
379 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
381 if (node_names.empty()) {
382 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
383 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
384 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
385 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
386 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
387 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
388 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
389 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
390 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
391 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
392 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
393 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
394 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
395 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
396 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
397 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
398 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
399 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
400 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
401 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
402 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
403 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
404 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
405 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
406 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
407 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
408 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
409 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
410 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
411 "SPUISD::ROTBYTES_RIGHT_Z";
412 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
413 "SPUISD::ROTBYTES_RIGHT_S";
414 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
415 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
416 "SPUISD::ROTBYTES_LEFT_CHAINED";
417 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
418 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
419 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
420 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
421 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
422 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
425 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
427 return ((i != node_names.end()) ? i->second : 0);
430 //===----------------------------------------------------------------------===//
431 // Calling convention code:
432 //===----------------------------------------------------------------------===//
434 #include "SPUGenCallingConv.inc"
436 //===----------------------------------------------------------------------===//
437 // LowerOperation implementation
438 //===----------------------------------------------------------------------===//
440 /// Aligned load common code for CellSPU
442 \param[in] Op The SelectionDAG load or store operand
443 \param[in] DAG The selection DAG
444 \param[in] ST CellSPU subtarget information structure
445 \param[in,out] alignment Caller initializes this to the load or store node's
446 value from getAlignment(), may be updated while generating the aligned load
447 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
448 offset (divisible by 16, modulo 16 == 0)
449 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
450 offset of the preferred slot (modulo 16 != 0)
451 \param[in,out] VT Caller initializes this value type to the the load or store
452 node's loaded or stored value type; may be updated if an i1-extended load or
454 \param[out] was16aligned true if the base pointer had 16-byte alignment,
455 otherwise false. Can help to determine if the chunk needs to be rotated.
457 Both load and store lowering load a block of data aligned on a 16-byte
458 boundary. This is the common aligned load code shared between both.
461 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
463 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
464 MVT::ValueType &VT, bool &was16aligned)
466 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
467 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
468 SDOperand basePtr = LSN->getBasePtr();
469 SDOperand chain = LSN->getChain();
471 if (basePtr.getOpcode() == ISD::ADD) {
472 SDOperand Op1 = basePtr.Val->getOperand(1);
474 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
475 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
477 alignOffs = (int) CN->getValue();
478 prefSlotOffs = (int) (alignOffs & 0xf);
480 // Adjust the rotation amount to ensure that the final result ends up in
481 // the preferred slot:
482 prefSlotOffs -= vtm->prefslot_byte;
483 basePtr = basePtr.getOperand(0);
485 // Modify alignment, since the ADD is likely from getElementPtr:
486 switch (basePtr.getOpcode()) {
487 case ISD::GlobalAddress:
488 case ISD::TargetGlobalAddress: {
489 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
490 const GlobalValue *GV = GN->getGlobal();
491 alignment = GV->getAlignment();
497 prefSlotOffs = -vtm->prefslot_byte;
501 prefSlotOffs = -vtm->prefslot_byte;
504 if (alignment == 16) {
505 // Realign the base pointer as a D-Form address:
506 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
507 if (isMemoryOperand(basePtr)) {
508 SDOperand Zero = DAG.getConstant(0, PtrVT);
509 unsigned Opc = (!ST->usingLargeMem()
511 : SPUISD::XFormAddr);
512 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
514 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
515 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
518 // Emit the vector load:
520 return DAG.getLoad(MVT::v16i8, chain, basePtr,
521 LSN->getSrcValue(), LSN->getSrcValueOffset(),
522 LSN->isVolatile(), 16);
525 // Unaligned load or we're using the "large memory" model, which means that
526 // we have to be very pessimistic:
527 if (isMemoryOperand(basePtr)) {
528 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
532 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
533 was16aligned = false;
534 return DAG.getLoad(MVT::v16i8, chain, basePtr,
535 LSN->getSrcValue(), LSN->getSrcValueOffset(),
536 LSN->isVolatile(), 16);
539 /// Custom lower loads for CellSPU
541 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
542 within a 16-byte block, we have to rotate to extract the requested element.
545 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
546 LoadSDNode *LN = cast<LoadSDNode>(Op);
547 SDOperand the_chain = LN->getChain();
548 MVT::ValueType VT = LN->getLoadedVT();
549 MVT::ValueType OpVT = Op.Val->getValueType(0);
550 ISD::LoadExtType ExtType = LN->getExtensionType();
551 unsigned alignment = LN->getAlignment();
554 // For an extending load of an i1 variable, just call it i8 (or whatever we
555 // were passed) and make it zero-extended:
558 ExtType = ISD::ZEXTLOAD;
561 switch (LN->getAddressingMode()) {
562 case ISD::UNINDEXED: {
566 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
571 the_chain = result.getValue(1);
572 // Rotate the chunk if necessary
575 if (rotamt != 0 || !was16aligned) {
576 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
581 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
583 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
584 LoadSDNode *LN1 = cast<LoadSDNode>(result);
587 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
588 DAG.getConstant(rotamt, PtrVT));
591 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
592 the_chain = result.getValue(1);
595 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
597 MVT::ValueType vecVT = MVT::v16i8;
599 // Convert the loaded v16i8 vector to the appropriate vector type
600 // specified by the operand:
603 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
605 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
608 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
609 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
610 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
611 the_chain = result.getValue(1);
613 // Handle the sign and zero-extending loads for i1 and i8:
616 if (ExtType == ISD::SEXTLOAD) {
617 NewOpC = (OpVT == MVT::i1
618 ? SPUISD::EXTRACT_I1_SEXT
619 : SPUISD::EXTRACT_I8_SEXT);
621 assert(ExtType == ISD::ZEXTLOAD);
622 NewOpC = (OpVT == MVT::i1
623 ? SPUISD::EXTRACT_I1_ZEXT
624 : SPUISD::EXTRACT_I8_ZEXT);
627 result = DAG.getNode(NewOpC, OpVT, result);
630 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
631 SDOperand retops[2] = { result, the_chain };
633 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
640 case ISD::LAST_INDEXED_MODE:
641 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
643 cerr << (unsigned) LN->getAddressingMode() << "\n";
651 /// Custom lower stores for CellSPU
653 All CellSPU stores are aligned to 16-byte boundaries, so for elements
654 within a 16-byte block, we have to generate a shuffle to insert the
655 requested element into its place, then store the resulting block.
658 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
659 StoreSDNode *SN = cast<StoreSDNode>(Op);
660 SDOperand Value = SN->getValue();
661 MVT::ValueType VT = Value.getValueType();
662 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
663 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
664 unsigned alignment = SN->getAlignment();
666 switch (SN->getAddressingMode()) {
667 case ISD::UNINDEXED: {
668 int chunk_offset, slot_offset;
671 // The vector type we really want to load from the 16-byte chunk, except
672 // in the case of MVT::i1, which has to be v16i8.
673 unsigned vecVT, stVecVT = MVT::v16i8;
676 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
677 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
679 SDOperand alignLoadVec =
680 AlignedLoad(Op, DAG, ST, SN, alignment,
681 chunk_offset, slot_offset, VT, was16aligned);
683 if (alignLoadVec.Val == 0)
686 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
687 SDOperand basePtr = LN->getBasePtr();
688 SDOperand the_chain = alignLoadVec.getValue(1);
689 SDOperand theValue = SN->getValue();
693 && (theValue.getOpcode() == ISD::AssertZext
694 || theValue.getOpcode() == ISD::AssertSext)) {
695 // Drill down and get the value for zero- and sign-extended
697 theValue = theValue.getOperand(0);
702 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
703 SDOperand insertEltPtr;
704 SDOperand insertEltOp;
706 // If the base pointer is already a D-form address, then just create
707 // a new D-form address with a slot offset and the orignal base pointer.
708 // Otherwise generate a D-form address with the slot offset relative
709 // to the stack pointer, which is always aligned.
710 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
711 DEBUG(basePtr.Val->dump(&DAG));
714 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
715 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
716 basePtr.getOperand(0),
718 } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
719 (basePtr.getOpcode() == ISD::ADD
720 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
721 insertEltPtr = basePtr;
723 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
724 DAG.getRegister(SPU::R1, PtrVT),
728 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
729 result = DAG.getNode(SPUISD::SHUFB, vecVT,
730 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
732 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
734 result = DAG.getStore(the_chain, result, basePtr,
735 LN->getSrcValue(), LN->getSrcValueOffset(),
736 LN->isVolatile(), LN->getAlignment());
745 case ISD::LAST_INDEXED_MODE:
746 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
748 cerr << (unsigned) SN->getAddressingMode() << "\n";
756 /// Generate the address of a constant pool entry.
758 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
759 MVT::ValueType PtrVT = Op.getValueType();
760 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
761 Constant *C = CP->getConstVal();
762 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
763 SDOperand Zero = DAG.getConstant(0, PtrVT);
764 const TargetMachine &TM = DAG.getTarget();
766 if (TM.getRelocationModel() == Reloc::Static) {
767 if (!ST->usingLargeMem()) {
768 // Just return the SDOperand with the constant pool address in it.
772 // Generate hi/lo address pair
773 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
774 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
776 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
778 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
784 "LowerConstantPool: Relocation model other than static not supported.");
789 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
790 MVT::ValueType PtrVT = Op.getValueType();
791 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
792 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
793 SDOperand Zero = DAG.getConstant(0, PtrVT);
794 const TargetMachine &TM = DAG.getTarget();
796 if (TM.getRelocationModel() == Reloc::Static) {
797 return (!ST->usingLargeMem()
799 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
803 "LowerJumpTable: Relocation model other than static not supported.");
808 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
809 MVT::ValueType PtrVT = Op.getValueType();
810 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
811 GlobalValue *GV = GSDN->getGlobal();
812 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
813 const TargetMachine &TM = DAG.getTarget();
814 SDOperand Zero = DAG.getConstant(0, PtrVT);
816 if (TM.getRelocationModel() == Reloc::Static) {
817 return (!ST->usingLargeMem()
819 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
821 cerr << "LowerGlobalAddress: Relocation model other than static not "
830 //! Custom lower i64 integer constants
832 This code inserts all of the necessary juggling that needs to occur to load
833 a 64-bit constant into a register.
836 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
837 unsigned VT = Op.getValueType();
838 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
840 if (VT == MVT::i64) {
841 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
842 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
843 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
846 cerr << "LowerConstant: unhandled constant type "
847 << MVT::getValueTypeString(VT)
856 //! Custom lower single precision floating point constants
858 "float" immediates can be lowered as if they were unsigned 32-bit integers.
859 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
863 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
864 unsigned VT = Op.getValueType();
865 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
868 "LowerConstantFP: Node is not ConstantFPSDNode");
870 if (VT == MVT::f32) {
871 float targetConst = FP->getValueAPF().convertToFloat();
872 return DAG.getNode(SPUISD::SFPConstant, VT,
873 DAG.getTargetConstantFP(targetConst, VT));
874 } else if (VT == MVT::f64) {
875 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
876 return DAG.getNode(ISD::BIT_CONVERT, VT,
877 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
884 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
886 MachineFunction &MF = DAG.getMachineFunction();
887 MachineFrameInfo *MFI = MF.getFrameInfo();
888 MachineRegisterInfo &RegInfo = MF.getRegInfo();
889 SmallVector<SDOperand, 8> ArgValues;
890 SDOperand Root = Op.getOperand(0);
891 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
893 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
894 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
896 unsigned ArgOffset = SPUFrameInfo::minStackSize();
897 unsigned ArgRegIdx = 0;
898 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
900 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
902 // Add DAG nodes to load the arguments or copy them out of registers.
903 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
905 bool needsLoad = false;
906 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
907 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
911 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
912 << MVT::getValueTypeString(ObjectVT)
917 if (!isVarArg && ArgRegIdx < NumArgRegs) {
918 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
919 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
920 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
927 if (!isVarArg && ArgRegIdx < NumArgRegs) {
928 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
929 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
930 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
937 if (!isVarArg && ArgRegIdx < NumArgRegs) {
938 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
939 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
940 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
947 if (!isVarArg && ArgRegIdx < NumArgRegs) {
948 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
949 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
950 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
957 if (!isVarArg && ArgRegIdx < NumArgRegs) {
958 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
959 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
960 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
967 if (!isVarArg && ArgRegIdx < NumArgRegs) {
968 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
969 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
970 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
981 if (!isVarArg && ArgRegIdx < NumArgRegs) {
982 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
983 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
984 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
992 // We need to load the argument to a virtual register if we determined above
993 // that we ran out of physical registers of the appropriate type
995 // If the argument is actually used, emit a load from the right stack
997 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
998 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
999 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1000 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1002 // Don't emit a dead load.
1003 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1006 ArgOffset += StackSlotSize;
1009 ArgValues.push_back(ArgVal);
1012 // If the function takes variable number of arguments, make a frame index for
1013 // the start of the first vararg value... for expansion of llvm.va_start.
1015 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1017 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1018 // If this function is vararg, store any remaining integer argument regs to
1019 // their spots on the stack so that they may be loaded by deferencing the
1020 // result of va_next.
1021 SmallVector<SDOperand, 8> MemOps;
1022 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1023 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1024 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1025 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1026 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1027 MemOps.push_back(Store);
1028 // Increment the address by four for the next argument to store
1029 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1030 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1032 if (!MemOps.empty())
1033 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1036 ArgValues.push_back(Root);
1038 // Return the new list of results.
1039 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1040 Op.Val->value_end());
1041 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1044 /// isLSAAddress - Return the immediate to use if the specified
1045 /// value is representable as a LSA address.
1046 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1047 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1050 int Addr = C->getValue();
1051 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1052 (Addr << 14 >> 14) != Addr)
1053 return 0; // Top 14 bits have to be sext of immediate.
1055 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1060 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1061 SDOperand Chain = Op.getOperand(0);
1063 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1064 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1066 SDOperand Callee = Op.getOperand(4);
1067 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1068 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1069 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1070 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 // Handy pointer type
1073 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1075 // Accumulate how many bytes are to be pushed on the stack, including the
1076 // linkage area, and parameter passing area. According to the SPU ABI,
1077 // we minimally need space for [LR] and [SP]
1078 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1080 // Set up a copy of the stack pointer for use loading and storing any
1081 // arguments that may not fit in the registers available for argument
1083 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1085 // Figure out which arguments are going to go in registers, and which in
1087 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1088 unsigned ArgRegIdx = 0;
1090 // Keep track of registers passing arguments
1091 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1092 // And the arguments passed on the stack
1093 SmallVector<SDOperand, 8> MemOpChains;
1095 for (unsigned i = 0; i != NumOps; ++i) {
1096 SDOperand Arg = Op.getOperand(5+2*i);
1098 // PtrOff will be used to store the current argument to the stack if a
1099 // register cannot be found for it.
1100 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1101 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1103 switch (Arg.getValueType()) {
1104 default: assert(0 && "Unexpected ValueType for argument!");
1108 if (ArgRegIdx != NumArgRegs) {
1109 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1111 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1112 ArgOffset += StackSlotSize;
1117 if (ArgRegIdx != NumArgRegs) {
1118 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1120 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1121 ArgOffset += StackSlotSize;
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1138 // Update number of stack bytes actually used, insert a call sequence start
1139 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1140 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1142 if (!MemOpChains.empty()) {
1143 // Adjust the stack pointer for the stack arguments.
1144 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1145 &MemOpChains[0], MemOpChains.size());
1148 // Build a sequence of copy-to-reg nodes chained together with token chain
1149 // and flag operands which copy the outgoing args into the appropriate regs.
1151 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1152 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1154 InFlag = Chain.getValue(1);
1157 std::vector<MVT::ValueType> NodeTys;
1158 NodeTys.push_back(MVT::Other); // Returns a chain
1159 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1161 SmallVector<SDOperand, 8> Ops;
1162 unsigned CallOpc = SPUISD::CALL;
1164 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1165 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1166 // node so that legalize doesn't hack it.
1167 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1168 GlobalValue *GV = G->getGlobal();
1169 unsigned CalleeVT = Callee.getValueType();
1170 SDOperand Zero = DAG.getConstant(0, PtrVT);
1171 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1173 if (!ST->usingLargeMem()) {
1174 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1175 // style calls, otherwise, external symbols are BRASL calls. This assumes
1176 // that declared/defined symbols are in the same compilation unit and can
1177 // be reached through PC-relative jumps.
1180 // This may be an unsafe assumption for JIT and really large compilation
1182 if (GV->isDeclaration()) {
1183 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1185 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1188 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1190 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
1192 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1193 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1194 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1195 // If this is an absolute destination address that appears to be a legal
1196 // local store address, use the munged value.
1197 Callee = SDOperand(Dest, 0);
1200 Ops.push_back(Chain);
1201 Ops.push_back(Callee);
1203 // Add argument registers to the end of the list so that they are known live
1205 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1206 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1207 RegsToPass[i].second.getValueType()));
1210 Ops.push_back(InFlag);
1211 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1214 SDOperand ResultVals[3];
1215 unsigned NumResults = 0;
1218 // If the call has results, copy the values out of the ret val registers.
1219 switch (Op.Val->getValueType(0)) {
1220 default: assert(0 && "Unexpected ret value!");
1221 case MVT::Other: break;
1223 if (Op.Val->getValueType(1) == MVT::i32) {
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1227 Chain.getValue(2)).getValue(1);
1228 ResultVals[1] = Chain.getValue(0);
1230 NodeTys.push_back(MVT::i32);
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1236 NodeTys.push_back(MVT::i32);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1242 NodeTys.push_back(MVT::i64);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1247 InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1250 NodeTys.push_back(Op.Val->getValueType(0));
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1261 NodeTys.push_back(Op.Val->getValueType(0));
1265 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1266 DAG.getConstant(NumStackBytes, PtrVT));
1267 NodeTys.push_back(MVT::Other);
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1276 ResultVals, NumResults);
1277 return Res.getValue(Op.ResNo);
1281 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
1290 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291 for (unsigned i = 0; i != RVLocs.size(); ++i)
1292 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1295 SDOperand Chain = Op.getOperand(0);
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319 SDOperand OpVal(0, 0);
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1330 if (OpVal.Val != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1342 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 MVT::ValueType ValueType) {
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getValue();
1346 if (Value <= 0x3ffff)
1347 return DAG.getConstant(Value, ValueType);
1353 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1354 /// and the value fits into a signed 16-bit constant, and if so, return the
1356 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357 MVT::ValueType ValueType) {
1358 if (ConstantSDNode *CN = getVecImm(N)) {
1359 if (ValueType == MVT::i32) {
1360 int Value = (int) CN->getValue();
1361 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1363 if (Value == SExtValue)
1364 return DAG.getConstant(Value, ValueType);
1365 } else if (ValueType == MVT::i16) {
1366 short Value = (short) CN->getValue();
1367 int SExtValue = ((int) Value << 16) >> 16;
1369 if (Value == (short) SExtValue)
1370 return DAG.getConstant(Value, ValueType);
1371 } else if (ValueType == MVT::i64) {
1372 int64_t Value = CN->getValue();
1373 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1375 if (Value == SExtValue)
1376 return DAG.getConstant(Value, ValueType);
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387 MVT::ValueType ValueType) {
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int Value = (int) CN->getValue();
1390 if ((ValueType == MVT::i32 && isS10Constant(Value))
1391 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1392 return DAG.getConstant(Value, ValueType);
1398 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 8-bit constant, and if so, return the
1402 /// @note: The incoming vector is v16i8 because that's the only way we can load
1403 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1405 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406 MVT::ValueType ValueType) {
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 int Value = (int) CN->getValue();
1409 if (ValueType == MVT::i16
1410 && Value <= 0xffff /* truncated from uint64_t */
1411 && ((short) Value >> 8) == ((short) Value & 0xff))
1412 return DAG.getConstant(Value & 0xff, ValueType);
1413 else if (ValueType == MVT::i8
1414 && (Value & 0xff) == Value)
1415 return DAG.getConstant(Value, ValueType);
1421 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1422 /// and the value fits into a signed 16-bit constant, and if so, return the
1424 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425 MVT::ValueType ValueType) {
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 uint64_t Value = CN->getValue();
1428 if ((ValueType == MVT::i32
1429 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1430 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1431 return DAG.getConstant(Value >> 16, ValueType);
1437 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1438 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1446 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1447 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1455 // If this is a vector of constants or undefs, get the bits. A bit in
1456 // UndefBits is set if the corresponding element of the vector is an
1457 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1458 // zero. Return true if this is not an array of constants, false if it is.
1460 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1461 uint64_t UndefBits[2]) {
1462 // Start with zero'd results.
1463 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1465 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1466 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1467 SDOperand OpVal = BV->getOperand(i);
1469 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1470 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1472 uint64_t EltBits = 0;
1473 if (OpVal.getOpcode() == ISD::UNDEF) {
1474 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1475 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1477 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1478 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1479 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1480 const APFloat &apf = CN->getValueAPF();
1481 EltBits = (CN->getValueType(0) == MVT::f32
1482 ? FloatToBits(apf.convertToFloat())
1483 : DoubleToBits(apf.convertToDouble()));
1485 // Nonconstant element.
1489 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1492 //printf("%llx %llx %llx %llx\n",
1493 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1497 /// If this is a splat (repetition) of a value across the whole vector, return
1498 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1499 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1500 /// SplatSize = 1 byte.
1501 static bool isConstantSplat(const uint64_t Bits128[2],
1502 const uint64_t Undef128[2],
1504 uint64_t &SplatBits, uint64_t &SplatUndef,
1506 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1507 // the same as the lower 64-bits, ignoring undefs.
1508 uint64_t Bits64 = Bits128[0] | Bits128[1];
1509 uint64_t Undef64 = Undef128[0] & Undef128[1];
1510 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1511 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1512 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1513 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1515 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1516 if (MinSplatBits < 64) {
1518 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1520 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1521 if (MinSplatBits < 32) {
1523 // If the top 16-bits are different than the lower 16-bits, ignoring
1524 // undefs, we have an i32 splat.
1525 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1526 if (MinSplatBits < 16) {
1527 // If the top 8-bits are different than the lower 8-bits, ignoring
1528 // undefs, we have an i16 splat.
1529 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1530 // Otherwise, we have an 8-bit splat.
1531 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1532 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1538 SplatUndef = Undef16;
1545 SplatUndef = Undef32;
1551 SplatBits = Bits128[0];
1552 SplatUndef = Undef128[0];
1558 return false; // Can't be a splat if two pieces don't match.
1561 // If this is a case we can't handle, return null and let the default
1562 // expansion code take care of it. If we CAN select this case, and if it
1563 // selects to a single instruction, return Op. Otherwise, if we can codegen
1564 // this case more efficiently than a constant pool load, lower it to the
1565 // sequence of ops that should be used.
1566 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1567 MVT::ValueType VT = Op.getValueType();
1568 // If this is a vector of constants or undefs, get the bits. A bit in
1569 // UndefBits is set if the corresponding element of the vector is an
1570 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1572 uint64_t VectorBits[2];
1573 uint64_t UndefBits[2];
1574 uint64_t SplatBits, SplatUndef;
1576 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1577 || !isConstantSplat(VectorBits, UndefBits,
1578 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1579 SplatBits, SplatUndef, SplatSize))
1580 return SDOperand(); // Not a constant vector, not a splat.
1585 uint32_t Value32 = SplatBits;
1586 assert(SplatSize == 4
1587 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1590 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1595 uint64_t f64val = SplatBits;
1596 assert(SplatSize == 8
1597 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1605 // 8-bit constants have to be expanded to 16-bits
1606 unsigned short Value16 = SplatBits | (SplatBits << 8);
1608 for (int i = 0; i < 8; ++i)
1609 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610 return DAG.getNode(ISD::BIT_CONVERT, VT,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1614 unsigned short Value16;
1616 Value16 = (unsigned short) (SplatBits & 0xffff);
1618 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1621 for (int i = 0; i < 8; ++i) Ops[i] = T;
1622 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1625 unsigned int Value = SplatBits;
1626 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1627 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1630 uint64_t val = SplatBits;
1631 uint32_t upper = uint32_t(val >> 32);
1632 uint32_t lower = uint32_t(val);
1637 SmallVector<SDOperand, 16> ShufBytes;
1639 bool upper_special, lower_special;
1641 // NOTE: This code creates common-case shuffle masks that can be easily
1642 // detected as common expressions. It is not attempting to create highly
1643 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1645 // Detect if the upper or lower half is a special shuffle mask pattern:
1646 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1647 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1649 // Create lower vector if not a special pattern
1650 if (!lower_special) {
1651 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1652 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 LO32C, LO32C, LO32C, LO32C));
1657 // Create upper vector if not a special pattern
1658 if (!upper_special) {
1659 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1660 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 HI32C, HI32C, HI32C, HI32C));
1665 // If either upper or lower are special, then the two input operands are
1666 // the same (basically, one of them is a "don't care")
1671 if (lower_special && upper_special) {
1672 // Unhappy situation... both upper and lower are special, so punt with
1673 // a target constant:
1674 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1675 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1679 for (int i = 0; i < 4; ++i) {
1680 for (int j = 0; j < 4; ++j) {
1682 bool process_upper, process_lower;
1685 process_upper = (upper_special && (i & 1) == 0);
1686 process_lower = (lower_special && (i & 1) == 1);
1688 if (process_upper || process_lower) {
1689 if ((process_upper && upper == 0)
1690 || (process_lower && lower == 0))
1692 else if ((process_upper && upper == 0xffffffff)
1693 || (process_lower && lower == 0xffffffff))
1695 else if ((process_upper && upper == 0x80000000)
1696 || (process_lower && lower == 0x80000000))
1697 val = (j == 0 ? 0xe0 : 0x80);
1699 val = i * 4 + j + ((i & 1) * 16);
1701 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1705 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1706 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1707 &ShufBytes[0], ShufBytes.size()));
1709 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1710 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1711 return DAG.getNode(ISD::BIT_CONVERT, VT,
1712 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713 Zero, Zero, Zero, Zero));
1721 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1722 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1723 /// permutation vector, V3, is monotonically increasing with one "exception"
1724 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1725 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1726 /// In either case, the net result is going to eventually invoke SHUFB to
1727 /// permute/shuffle the bytes from V1 and V2.
1729 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1730 /// control word for byte/halfword/word insertion. This takes care of a single
1731 /// element move from V2 into V1.
1733 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1734 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1735 SDOperand V1 = Op.getOperand(0);
1736 SDOperand V2 = Op.getOperand(1);
1737 SDOperand PermMask = Op.getOperand(2);
1739 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1741 // If we have a single element being moved from V1 to V2, this can be handled
1742 // using the C*[DX] compute mask instructions, but the vector elements have
1743 // to be monotonically increasing with one exception element.
1744 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1745 unsigned EltsFromV2 = 0;
1747 unsigned V2EltIdx0 = 0;
1748 unsigned CurrElt = 0;
1749 bool monotonic = true;
1750 if (EltVT == MVT::i8)
1752 else if (EltVT == MVT::i16)
1754 else if (EltVT == MVT::i32)
1757 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1759 for (unsigned i = 0, e = PermMask.getNumOperands();
1760 EltsFromV2 <= 1 && monotonic && i != e;
1763 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1766 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1768 if (SrcElt >= V2EltIdx0) {
1770 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1771 } else if (CurrElt != SrcElt) {
1778 if (EltsFromV2 == 1 && monotonic) {
1779 // Compute mask and shuffle
1780 MachineFunction &MF = DAG.getMachineFunction();
1781 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1782 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1783 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1784 // Initialize temporary register to 0
1785 SDOperand InitTempReg =
1786 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1787 // Copy register's contents as index in INSERT_MASK:
1788 SDOperand ShufMaskOp =
1789 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1790 DAG.getTargetConstant(V2Elt, MVT::i32),
1791 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1792 // Use shuffle mask in SHUFB synthetic instruction:
1793 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1795 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1796 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1798 SmallVector<SDOperand, 16> ResultMask;
1799 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1801 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1804 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1806 for (unsigned j = 0; j != BytesPerElement; ++j) {
1807 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1812 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1813 &ResultMask[0], ResultMask.size());
1814 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1818 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1819 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1821 if (Op0.Val->getOpcode() == ISD::Constant) {
1822 // For a constant, build the appropriate constant vector, which will
1823 // eventually simplify to a vector register load.
1825 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1826 SmallVector<SDOperand, 16> ConstVecValues;
1830 // Create a constant vector:
1831 switch (Op.getValueType()) {
1832 default: assert(0 && "Unexpected constant value type in "
1833 "LowerSCALAR_TO_VECTOR");
1834 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1835 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1836 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1837 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1838 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1839 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1842 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1843 for (size_t j = 0; j < n_copies; ++j)
1844 ConstVecValues.push_back(CValue);
1846 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1847 &ConstVecValues[0], ConstVecValues.size());
1849 // Otherwise, copy the value from one register to another:
1850 switch (Op0.getValueType()) {
1851 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1858 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1865 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1866 switch (Op.getValueType()) {
1868 SDOperand rA = Op.getOperand(0);
1869 SDOperand rB = Op.getOperand(1);
1870 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1871 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1872 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1873 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1875 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1879 // Multiply two v8i16 vectors (pipeline friendly version):
1880 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1881 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1882 // c) Use SELB to select upper and lower halves from the intermediate results
1884 // NOTE: We really want to move the FSMBI to earlier to actually get the
1885 // dual-issue. This code does manage to do this, even if it's a little on
1888 MachineFunction &MF = DAG.getMachineFunction();
1889 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1890 SDOperand Chain = Op.getOperand(0);
1891 SDOperand rA = Op.getOperand(0);
1892 SDOperand rB = Op.getOperand(1);
1893 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1894 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1897 DAG.getCopyToReg(Chain, FSMBIreg,
1898 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1899 DAG.getConstant(0xcccc, MVT::i32)));
1902 DAG.getCopyToReg(FSMBOp, HiProdReg,
1903 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1905 SDOperand HHProd_v4i32 =
1906 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1907 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1909 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1910 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1911 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1912 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1914 DAG.getConstant(16, MVT::i16))),
1915 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1918 // This M00sE is N@stI! (apologies to Monty Python)
1920 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1921 // is to break it all apart, sign extend, and reassemble the various
1922 // intermediate products.
1924 MachineFunction &MF = DAG.getMachineFunction();
1925 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1926 SDOperand Chain = Op.getOperand(0);
1927 SDOperand rA = Op.getOperand(0);
1928 SDOperand rB = Op.getOperand(1);
1929 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1930 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1932 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1933 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1938 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1941 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1943 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1946 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1947 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1949 SDOperand FSMBdef_2222 =
1950 DAG.getCopyToReg(Chain, FSMBreg_2222,
1951 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1952 DAG.getConstant(0x2222, MVT::i32)));
1954 SDOperand FSMBuse_2222 =
1955 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1957 SDOperand LoProd_1 =
1958 DAG.getCopyToReg(Chain, LoProd_reg,
1959 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1962 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1965 DAG.getNode(ISD::AND, MVT::v4i32,
1966 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1967 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1968 LoProdMask, LoProdMask,
1969 LoProdMask, LoProdMask));
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1980 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1984 SDOperand HHProd_1 =
1985 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1992 DAG.getCopyToReg(Chain, HiProd_reg,
1993 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1995 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2000 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2002 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2003 DAG.getNode(ISD::OR, MVT::v4i32,
2008 cerr << "CellSPU: Unknown vector multiplication, got "
2009 << MVT::getValueTypeString(Op.getValueType())
2018 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2019 MachineFunction &MF = DAG.getMachineFunction();
2020 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2022 SDOperand A = Op.getOperand(0);
2023 SDOperand B = Op.getOperand(1);
2024 unsigned VT = Op.getValueType();
2026 unsigned VRegBR, VRegC;
2028 if (VT == MVT::f32) {
2029 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2030 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2032 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2033 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2035 // TODO: make sure we're feeding FPInterp the right arguments
2036 // Right now: fi B, frest(B)
2039 // (Floating Interpolate (FP Reciprocal Estimate B))
2041 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042 DAG.getNode(SPUISD::FPInterp, VT, B,
2043 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2045 // Computes A * BRcpl and stores in a temporary register
2047 DAG.getCopyToReg(BRcpl, VRegC,
2048 DAG.getNode(ISD::FMUL, VT, A,
2049 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050 // What's the Chain variable do? It's magic!
2051 // TODO: set Chain = Op(0).getEntryNode()
2053 return DAG.getNode(ISD::FADD, VT,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055 DAG.getNode(ISD::FMUL, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057 DAG.getNode(ISD::FSUB, VT, A,
2058 DAG.getNode(ISD::FMUL, VT, B,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2062 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2063 unsigned VT = Op.getValueType();
2064 SDOperand N = Op.getOperand(0);
2065 SDOperand Elt = Op.getOperand(1);
2066 SDOperand ShufMask[16];
2067 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2069 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2071 int EltNo = (int) C->getValue();
2074 if (VT == MVT::i8 && EltNo >= 16)
2075 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2076 else if (VT == MVT::i16 && EltNo >= 8)
2077 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2078 else if (VT == MVT::i32 && EltNo >= 4)
2079 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2080 else if (VT == MVT::i64 && EltNo >= 2)
2081 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2083 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2084 // i32 and i64: Element 0 is the preferred slot
2085 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2088 // Need to generate shuffle mask and extract:
2089 int prefslot_begin = -1, prefslot_end = -1;
2090 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2094 prefslot_begin = prefslot_end = 3;
2098 prefslot_begin = 2; prefslot_end = 3;
2102 prefslot_begin = 0; prefslot_end = 3;
2106 prefslot_begin = 0; prefslot_end = 7;
2111 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2112 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2114 for (int i = 0; i < 16; ++i) {
2115 // zero fill uppper part of preferred slot, don't care about the
2117 unsigned int mask_val;
2119 if (i <= prefslot_end) {
2121 ((i < prefslot_begin)
2123 : elt_byte + (i - prefslot_begin));
2125 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2127 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2130 SDOperand ShufMaskVec =
2131 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2133 sizeof(ShufMask) / sizeof(ShufMask[0]));
2135 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2136 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2137 N, N, ShufMaskVec));
2141 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2142 SDOperand VecOp = Op.getOperand(0);
2143 SDOperand ValOp = Op.getOperand(1);
2144 SDOperand IdxOp = Op.getOperand(2);
2145 MVT::ValueType VT = Op.getValueType();
2147 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2148 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2150 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2151 // Use $2 because it's always 16-byte aligned and it's available:
2152 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2155 DAG.getNode(SPUISD::SHUFB, VT,
2156 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2158 DAG.getNode(SPUISD::INSERT_MASK, VT,
2159 DAG.getNode(ISD::ADD, PtrVT,
2161 DAG.getConstant(CN->getValue(),
2167 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2168 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2170 assert(Op.getValueType() == MVT::i8);
2173 assert(0 && "Unhandled i8 math operator");
2177 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2179 SDOperand N1 = Op.getOperand(1);
2180 N0 = (N0.getOpcode() != ISD::Constant
2181 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2182 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2183 N1 = (N1.getOpcode() != ISD::Constant
2184 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2185 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2186 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2187 DAG.getNode(Opc, MVT::i16, N0, N1));
2191 SDOperand N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2197 N1 = (N1.getOpcode() != ISD::Constant
2198 ? DAG.getNode(N1Opc, MVT::i16, N1)
2199 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2200 SDOperand ExpandArg =
2201 DAG.getNode(ISD::OR, MVT::i16, N0,
2202 DAG.getNode(ISD::SHL, MVT::i16,
2203 N0, DAG.getConstant(8, MVT::i16)));
2204 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2205 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2209 SDOperand N1 = Op.getOperand(1);
2211 N0 = (N0.getOpcode() != ISD::Constant
2212 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2213 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2214 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2215 N1 = (N1.getOpcode() != ISD::Constant
2216 ? DAG.getNode(N1Opc, MVT::i16, N1)
2217 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2218 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2219 DAG.getNode(Opc, MVT::i16, N0, N1));
2222 SDOperand N1 = Op.getOperand(1);
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232 DAG.getNode(Opc, MVT::i16, N0, N1));
2235 SDOperand N1 = Op.getOperand(1);
2237 N0 = (N0.getOpcode() != ISD::Constant
2238 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2241 N1 = (N1.getOpcode() != ISD::Constant
2242 ? DAG.getNode(N1Opc, MVT::i16, N1)
2243 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245 DAG.getNode(Opc, MVT::i16, N0, N1));
2253 //! Lower byte immediate operations for v16i8 vectors:
2255 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2258 MVT::ValueType VT = Op.getValueType();
2260 ConstVec = Op.getOperand(0);
2261 Arg = Op.getOperand(1);
2262 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2263 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2264 ConstVec = ConstVec.getOperand(0);
2266 ConstVec = Op.getOperand(1);
2267 Arg = Op.getOperand(0);
2268 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2269 ConstVec = ConstVec.getOperand(0);
2274 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2275 uint64_t VectorBits[2];
2276 uint64_t UndefBits[2];
2277 uint64_t SplatBits, SplatUndef;
2280 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2281 && isConstantSplat(VectorBits, UndefBits,
2282 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2283 SplatBits, SplatUndef, SplatSize)) {
2284 SDOperand tcVec[16];
2285 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2286 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2288 // Turn the BUILD_VECTOR into a set of target constants:
2289 for (size_t i = 0; i < tcVecSize; ++i)
2292 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2293 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2300 //! Lower i32 multiplication
2301 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2305 cerr << "CellSPU: Unknown LowerMUL value type, got "
2306 << MVT::getValueTypeString(Op.getValueType())
2312 SDOperand rA = Op.getOperand(0);
2313 SDOperand rB = Op.getOperand(1);
2315 return DAG.getNode(ISD::ADD, MVT::i32,
2316 DAG.getNode(ISD::ADD, MVT::i32,
2317 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2318 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2319 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2326 //! Custom lowering for CTPOP (count population)
2328 Custom lowering code that counts the number ones in the input
2329 operand. SPU has such an instruction, but it counts the number of
2330 ones per byte, which then have to be accumulated.
2332 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2333 unsigned VT = Op.getValueType();
2334 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2338 SDOperand N = Op.getOperand(0);
2339 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2341 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2342 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2344 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2348 MachineFunction &MF = DAG.getMachineFunction();
2349 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2351 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2353 SDOperand N = Op.getOperand(0);
2354 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2355 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2356 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2358 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2359 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2361 // CNTB_result becomes the chain to which all of the virtual registers
2362 // CNTB_reg, SUM1_reg become associated:
2363 SDOperand CNTB_result =
2364 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2366 SDOperand CNTB_rescopy =
2367 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2369 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2371 return DAG.getNode(ISD::AND, MVT::i16,
2372 DAG.getNode(ISD::ADD, MVT::i16,
2373 DAG.getNode(ISD::SRL, MVT::i16,
2380 MachineFunction &MF = DAG.getMachineFunction();
2381 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2383 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2384 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2386 SDOperand N = Op.getOperand(0);
2387 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2388 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2389 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2390 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2392 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2393 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2395 // CNTB_result becomes the chain to which all of the virtual registers
2396 // CNTB_reg, SUM1_reg become associated:
2397 SDOperand CNTB_result =
2398 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2400 SDOperand CNTB_rescopy =
2401 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2404 DAG.getNode(ISD::SRL, MVT::i32,
2405 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2408 DAG.getNode(ISD::ADD, MVT::i32,
2409 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2411 SDOperand Sum1_rescopy =
2412 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2415 DAG.getNode(ISD::SRL, MVT::i32,
2416 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2419 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2420 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2422 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2432 /// LowerOperation - Provide custom lowering hooks for some operations.
2435 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2437 switch (Op.getOpcode()) {
2439 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2440 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2441 cerr << "*Op.Val:\n";
2448 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2450 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2451 case ISD::ConstantPool:
2452 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2453 case ISD::GlobalAddress:
2454 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2455 case ISD::JumpTable:
2456 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2458 return LowerConstant(Op, DAG);
2459 case ISD::ConstantFP:
2460 return LowerConstantFP(Op, DAG);
2461 case ISD::FORMAL_ARGUMENTS:
2462 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2464 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2466 return LowerRET(Op, DAG, getTargetMachine());
2475 return LowerI8Math(Op, DAG, Op.getOpcode());
2477 // Vector-related lowering.
2478 case ISD::BUILD_VECTOR:
2479 return LowerBUILD_VECTOR(Op, DAG);
2480 case ISD::SCALAR_TO_VECTOR:
2481 return LowerSCALAR_TO_VECTOR(Op, DAG);
2482 case ISD::VECTOR_SHUFFLE:
2483 return LowerVECTOR_SHUFFLE(Op, DAG);
2484 case ISD::EXTRACT_VECTOR_ELT:
2485 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2486 case ISD::INSERT_VECTOR_ELT:
2487 return LowerINSERT_VECTOR_ELT(Op, DAG);
2489 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2493 return LowerByteImmed(Op, DAG);
2495 // Vector and i8 multiply:
2497 if (MVT::isVector(Op.getValueType()))
2498 return LowerVectorMUL(Op, DAG);
2499 else if (Op.getValueType() == MVT::i8)
2500 return LowerI8Math(Op, DAG, Op.getOpcode());
2502 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2505 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2506 return LowerFDIVf32(Op, DAG);
2507 // else if (Op.getValueType() == MVT::f64)
2508 // return LowerFDIVf64(Op, DAG);
2510 assert(0 && "Calling FDIV on unsupported MVT");
2513 return LowerCTPOP(Op, DAG);
2519 //===----------------------------------------------------------------------===//
2520 // Other Lowering Code
2521 //===----------------------------------------------------------------------===//
2524 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2525 MachineBasicBlock *BB)
2530 //===----------------------------------------------------------------------===//
2531 // Target Optimization Hooks
2532 //===----------------------------------------------------------------------===//
2535 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2538 TargetMachine &TM = getTargetMachine();
2539 SelectionDAG &DAG = DCI.DAG;
2541 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2543 switch (N->getOpcode()) {
2546 // Look for obvious optimizations for shift left:
2547 // a) Replace 0 << V with 0
2548 // b) Replace V << 0 with V
2550 // N.B: llvm will generate an undef node if the shift amount is greater than
2551 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2554 case SPU::SHLQBIIvec:
2556 case SPU::ROTHIr16_i32:
2558 case SPU::ROTIr32_i16:
2559 case SPU::ROTQBYIvec:
2560 case SPU::ROTQBYBIvec:
2561 case SPU::ROTQBIIvec:
2562 case SPU::ROTHMIr16:
2564 case SPU::ROTQMBYIvec: {
2565 if (N0.getOpcode() == ISD::Constant) {
2566 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2567 if (C->getValue() == 0) // 0 << V -> 0.
2571 SDOperand N1 = N->getOperand(1);
2572 if (N1.getOpcode() == ISD::Constant) {
2573 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2574 if (C->getValue() == 0) // V << 0 -> V
2585 //===----------------------------------------------------------------------===//
2586 // Inline Assembly Support
2587 //===----------------------------------------------------------------------===//
2589 /// getConstraintType - Given a constraint letter, return the type of
2590 /// constraint it is for this target.
2591 SPUTargetLowering::ConstraintType
2592 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2593 if (ConstraintLetter.size() == 1) {
2594 switch (ConstraintLetter[0]) {
2601 return C_RegisterClass;
2604 return TargetLowering::getConstraintType(ConstraintLetter);
2607 std::pair<unsigned, const TargetRegisterClass*>
2608 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2609 MVT::ValueType VT) const
2611 if (Constraint.size() == 1) {
2612 // GCC RS6000 Constraint Letters
2613 switch (Constraint[0]) {
2617 return std::make_pair(0U, SPU::R64CRegisterClass);
2618 return std::make_pair(0U, SPU::R32CRegisterClass);
2621 return std::make_pair(0U, SPU::R32FPRegisterClass);
2622 else if (VT == MVT::f64)
2623 return std::make_pair(0U, SPU::R64FPRegisterClass);
2626 return std::make_pair(0U, SPU::GPRCRegisterClass);
2630 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2634 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2636 uint64_t &KnownZero,
2638 const SelectionDAG &DAG,
2639 unsigned Depth ) const {
2644 // LowerAsmOperandForConstraint
2646 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2647 char ConstraintLetter,
2648 std::vector<SDOperand> &Ops,
2649 SelectionDAG &DAG) {
2650 // Default, for the time being, to the base class handler
2651 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2654 /// isLegalAddressImmediate - Return true if the integer value can be used
2655 /// as the offset of the target addressing mode.
2656 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2657 // SPU's addresses are 256K:
2658 return (V > -(1 << 18) && V < (1 << 18) - 1);
2661 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {