2 // The LLVM Compiler Infrastructure
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
16 #include "NVPTXISelLowering.h"
17 #include "NVPTXTargetMachine.h"
18 #include "NVPTXTargetObjectFile.h"
19 #include "NVPTXUtilities.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/IntrinsicInst.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/DerivedTypes.h"
24 #include "llvm/GlobalValue.h"
25 #include "llvm/Module.h"
26 #include "llvm/Function.h"
27 #include "llvm/CodeGen/Analysis.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/Support/CallSite.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
37 #include "llvm/MC/MCSectionELF.h"
41 #define DEBUG_TYPE "nvptx-lower"
45 static unsigned int uniqueCallSite = 0;
48 RetainVectorOperands("nvptx-codegen-vectors",
49 cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
53 sched4reg("nvptx-sched4reg",
54 cl::desc("NVPTX Specific: schedule for register pressue"),
57 // NVPTXTargetLowering Constructor.
58 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
59 : TargetLowering(TM, new NVPTXTargetObjectFile()),
61 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
63 // always lower memset, memcpy, and memmove intrinsics to load/store
64 // instructions, rather
65 // then generating calls to memset, mempcy or memmove.
66 maxStoresPerMemset = (unsigned)0xFFFFFFFF;
67 maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
68 maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
70 setBooleanContents(ZeroOrNegativeOneBooleanContent);
72 // Jump is Expensive. Don't create extra control flow for 'and', 'or'
73 // condition branches.
74 setJumpIsExpensive(true);
76 // By default, use the Source scheduling
78 setSchedulingPreference(Sched::RegPressure);
80 setSchedulingPreference(Sched::Source);
82 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
83 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
84 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
85 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
86 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
87 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
88 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
90 if (RetainVectorOperands) {
91 addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
92 addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
93 addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
94 addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
95 addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
96 addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
97 addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
98 addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
99 addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
100 addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
102 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
103 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
104 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
105 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
106 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
107 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
108 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
109 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
110 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
111 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
113 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
114 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
115 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
116 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
117 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
118 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
122 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
125 // Operations not directly supported by NVPTX.
126 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
127 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
128 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
129 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
130 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
131 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
132 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
134 if (nvptxSubtarget.hasROT64()) {
135 setOperationAction(ISD::ROTL , MVT::i64, Legal);
136 setOperationAction(ISD::ROTR , MVT::i64, Legal);
139 setOperationAction(ISD::ROTL , MVT::i64, Expand);
140 setOperationAction(ISD::ROTR , MVT::i64, Expand);
142 if (nvptxSubtarget.hasROT32()) {
143 setOperationAction(ISD::ROTL , MVT::i32, Legal);
144 setOperationAction(ISD::ROTR , MVT::i32, Legal);
147 setOperationAction(ISD::ROTL , MVT::i32, Expand);
148 setOperationAction(ISD::ROTR , MVT::i32, Expand);
151 setOperationAction(ISD::ROTL , MVT::i16, Expand);
152 setOperationAction(ISD::ROTR , MVT::i16, Expand);
153 setOperationAction(ISD::ROTL , MVT::i8, Expand);
154 setOperationAction(ISD::ROTR , MVT::i8, Expand);
155 setOperationAction(ISD::BSWAP , MVT::i16, Expand);
156 setOperationAction(ISD::BSWAP , MVT::i32, Expand);
157 setOperationAction(ISD::BSWAP , MVT::i64, Expand);
159 // Indirect branch is not supported.
160 // This also disables Jump Table creation.
161 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
162 setOperationAction(ISD::BRIND, MVT::Other, Expand);
164 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
165 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
167 // We want to legalize constant related memmove and memcopy
169 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
171 // Turn FP extload into load/fextend
172 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
173 // Turn FP truncstore into trunc + store.
174 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
176 // PTX does not support load / store predicate registers
177 setOperationAction(ISD::LOAD, MVT::i1, Custom);
178 setOperationAction(ISD::STORE, MVT::i1, Custom);
180 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
181 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
182 setTruncStoreAction(MVT::i64, MVT::i1, Expand);
183 setTruncStoreAction(MVT::i32, MVT::i1, Expand);
184 setTruncStoreAction(MVT::i16, MVT::i1, Expand);
185 setTruncStoreAction(MVT::i8, MVT::i1, Expand);
187 // This is legal in NVPTX
188 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
189 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
191 // TRAP can be lowered to PTX trap
192 setOperationAction(ISD::TRAP, MVT::Other, Legal);
194 // By default, CONCAT_VECTORS is implemented via store/load
195 // through stack. It is slow and uses local memory. We need
196 // to custom-lowering them.
197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
198 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
199 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
200 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
201 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
202 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
203 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
204 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
205 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
206 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
208 // Expand vector int to float and float to int conversions
209 // - For SINT_TO_FP and UINT_TO_FP, the src type
210 // (Node->getOperand(0).getValueType())
211 // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
212 // the dest type (Node->getValueType(0)) is used.
214 // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
216 // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
218 // That is why v4i32 or v2i32 are used here.
220 // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
221 // (LegalizeVectorOps.cpp).
222 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
223 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
224 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
225 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
226 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
227 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
228 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
229 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
231 // Now deduce the information based on the above mentioned
233 computeRegisterProperties();
237 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
240 case NVPTXISD::CALL: return "NVPTXISD::CALL";
241 case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
242 case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
243 case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
244 case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
245 case NVPTXISD::DeclareScalarParam:
246 return "NVPTXISD::DeclareScalarParam";
247 case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
248 case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
249 case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
250 case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
251 case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
252 case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
253 case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
254 case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
255 case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
256 case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
257 case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
258 case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
259 case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
260 case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
261 case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
262 case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
263 case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
264 case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
265 case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
266 case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
267 case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
268 case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
269 case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
270 case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
276 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
277 DebugLoc dl = Op.getDebugLoc();
278 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
279 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
280 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
283 std::string NVPTXTargetLowering::getPrototype(Type *retTy,
284 const ArgListTy &Args,
285 const SmallVectorImpl<ISD::OutputArg> &Outs,
286 unsigned retAlignment) const {
288 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
291 O << "prototype_" << uniqueCallSite << " : .callprototype ";
293 if (retTy->getTypeID() == Type::VoidTyID)
298 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
300 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
301 size = ITy->getBitWidth();
302 if (size < 32) size = 32;
305 assert(retTy->isFloatingPointTy() &&
306 "Floating point type expected here");
307 size = retTy->getPrimitiveSizeInBits();
310 O << ".param .b" << size << " _";
312 else if (isa<PointerType>(retTy))
313 O << ".param .b" << getPointerTy().getSizeInBits()
316 if ((retTy->getTypeID() == Type::StructTyID) ||
317 isa<VectorType>(retTy)) {
318 SmallVector<EVT, 16> vtparts;
319 ComputeValueVTs(*this, retTy, vtparts);
320 unsigned totalsz = 0;
321 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
323 EVT elemtype = vtparts[i];
324 if (vtparts[i].isVector()) {
325 elems = vtparts[i].getVectorNumElements();
326 elemtype = vtparts[i].getVectorElementType();
328 for (unsigned j=0, je=elems; j!=je; ++j) {
329 unsigned sz = elemtype.getSizeInBits();
330 if (elemtype.isInteger() && (sz < 8)) sz = 8;
334 O << ".param .align "
341 "Unknown return type");
346 SmallVector<EVT, 16> vtparts;
347 ComputeValueVTs(*this, retTy, vtparts);
349 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
351 EVT elemtype = vtparts[i];
352 if (vtparts[i].isVector()) {
353 elems = vtparts[i].getVectorNumElements();
354 elemtype = vtparts[i].getVectorElementType();
357 for (unsigned j=0, je=elems; j!=je; ++j) {
358 unsigned sz = elemtype.getSizeInBits();
359 if (elemtype.isInteger() && (sz < 32)) sz = 32;
360 O << ".reg .b" << sz << " _";
361 if (j<je-1) O << ", ";
373 MVT thePointerTy = getPointerTy();
375 for (unsigned i=0,e=Args.size(); i!=e; ++i) {
376 const Type *Ty = Args[i].Ty;
382 if (Outs[i].Flags.isByVal() == false) {
384 if (isa<IntegerType>(Ty)) {
385 sz = cast<IntegerType>(Ty)->getBitWidth();
386 if (sz < 32) sz = 32;
388 else if (isa<PointerType>(Ty))
389 sz = thePointerTy.getSizeInBits();
391 sz = Ty->getPrimitiveSizeInBits();
393 O << ".param .b" << sz << " ";
395 O << ".reg .b" << sz << " ";
399 const PointerType *PTy = dyn_cast<PointerType>(Ty);
401 "Param with byval attribute should be a pointer type");
402 Type *ETy = PTy->getElementType();
405 unsigned align = Outs[i].Flags.getByValAlign();
406 unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
407 O << ".param .align " << align
410 O << "[" << sz << "]";
414 SmallVector<EVT, 16> vtparts;
415 ComputeValueVTs(*this, ETy, vtparts);
416 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
418 EVT elemtype = vtparts[i];
419 if (vtparts[i].isVector()) {
420 elems = vtparts[i].getVectorNumElements();
421 elemtype = vtparts[i].getVectorElementType();
424 for (unsigned j=0,je=elems; j!=je; ++j) {
425 unsigned sz = elemtype.getSizeInBits();
426 if (elemtype.isInteger() && (sz < 32)) sz = 32;
427 O << ".reg .b" << sz << " ";
429 if (j<je-1) O << ", ";
443 NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
444 SmallVectorImpl<SDValue> &InVals) const {
445 SelectionDAG &DAG = CLI.DAG;
446 DebugLoc &dl = CLI.DL;
447 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
448 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
449 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
450 SDValue Chain = CLI.Chain;
451 SDValue Callee = CLI.Callee;
452 bool &isTailCall = CLI.IsTailCall;
453 ArgListTy &Args = CLI.Args;
454 Type *retTy = CLI.RetTy;
455 ImmutableCallSite *CS = CLI.CS;
457 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
459 SDValue tempChain = Chain;
460 Chain = DAG.getCALLSEQ_START(Chain,
461 DAG.getIntPtrConstant(uniqueCallSite, true));
462 SDValue InFlag = Chain.getValue(1);
464 assert((Outs.size() == Args.size()) &&
465 "Unexpected number of arguments to function call");
466 unsigned paramCount = 0;
467 // Declare the .params or .reg need to pass values
469 for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
472 if (Outs[i].Flags.isByVal() == false) {
474 // for ABI, declare .param .b<size> .param<n>;
475 // for nonABI, declare .reg .b<size> .param<n>;
479 unsigned sz = VT.getSizeInBits();
480 if (VT.isInteger() && (sz < 32)) sz = 32;
481 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
482 SDValue DeclareParamOps[] = { Chain,
483 DAG.getConstant(paramCount, MVT::i32),
484 DAG.getConstant(sz, MVT::i32),
485 DAG.getConstant(isReg, MVT::i32),
487 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
489 InFlag = Chain.getValue(1);
490 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
491 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
492 DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
494 unsigned opcode = NVPTXISD::StoreParam;
496 opcode = NVPTXISD::MoveToParam;
498 if (Outs[i].Flags.isZExt())
499 opcode = NVPTXISD::StoreParamU32;
500 else if (Outs[i].Flags.isSExt())
501 opcode = NVPTXISD::StoreParamS32;
503 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
505 InFlag = Chain.getValue(1);
510 SmallVector<EVT, 16> vtparts;
511 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
513 "Type of a byval parameter should be pointer");
514 ComputeValueVTs(*this, PTy->getElementType(), vtparts);
517 // declare .param .align 16 .b8 .param<n>[<size>];
518 unsigned sz = Outs[i].Flags.getByValSize();
519 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
520 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
522 // worry about natural alignment or not. See TargetLowering::LowerCallTo()
523 SDValue DeclareParamOps[] = { Chain,
524 DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
525 DAG.getConstant(paramCount, MVT::i32),
526 DAG.getConstant(sz, MVT::i32),
528 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
530 InFlag = Chain.getValue(1);
531 unsigned curOffset = 0;
532 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
534 EVT elemtype = vtparts[j];
535 if (vtparts[j].isVector()) {
536 elems = vtparts[j].getVectorNumElements();
537 elemtype = vtparts[j].getVectorElementType();
539 for (unsigned k=0,ke=elems; k!=ke; ++k) {
540 unsigned sz = elemtype.getSizeInBits();
541 if (elemtype.isInteger() && (sz < 8)) sz = 8;
542 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
544 DAG.getConstant(curOffset,
546 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
547 MachinePointerInfo(), false, false, false, 0);
548 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
549 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
551 DAG.getConstant(curOffset, MVT::i32),
553 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
555 InFlag = Chain.getValue(1);
562 // Non-abi, struct or vector
563 // Declare a bunch or .reg .b<size> .param<n>
564 unsigned curOffset = 0;
565 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
567 EVT elemtype = vtparts[j];
568 if (vtparts[j].isVector()) {
569 elems = vtparts[j].getVectorNumElements();
570 elemtype = vtparts[j].getVectorElementType();
572 for (unsigned k=0,ke=elems; k!=ke; ++k) {
573 unsigned sz = elemtype.getSizeInBits();
574 if (elemtype.isInteger() && (sz < 32)) sz = 32;
575 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
576 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
578 DAG.getConstant(sz, MVT::i32),
579 DAG.getConstant(1, MVT::i32),
581 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
583 InFlag = Chain.getValue(1);
584 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
585 DAG.getConstant(curOffset,
587 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
588 MachinePointerInfo(), false, false, false, 0);
589 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
590 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
591 DAG.getConstant(0, MVT::i32), theVal,
593 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
595 InFlag = Chain.getValue(1);
601 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
602 unsigned retAlignment = 0;
605 unsigned retCount = 0;
606 if (Ins.size() > 0) {
607 SmallVector<EVT, 16> resvtparts;
608 ComputeValueVTs(*this, retTy, resvtparts);
610 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
611 // individual .reg .b<size> func_retval<0..> for non ABI
612 unsigned resultsz = 0;
613 for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
615 EVT elemtype = resvtparts[i];
616 if (resvtparts[i].isVector()) {
617 elems = resvtparts[i].getVectorNumElements();
618 elemtype = resvtparts[i].getVectorElementType();
620 for (unsigned j=0,je=elems; j!=je; ++j) {
621 unsigned sz = elemtype.getSizeInBits();
622 if (isABI == false) {
623 if (elemtype.isInteger() && (sz < 32)) sz = 32;
626 if (elemtype.isInteger() && (sz < 8)) sz = 8;
628 if (isABI == false) {
629 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
630 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
631 DAG.getConstant(sz, MVT::i32),
632 DAG.getConstant(retCount, MVT::i32),
634 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
636 InFlag = Chain.getValue(1);
643 if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
644 retTy->isPointerTy() ) {
645 // Scalar needs to be at least 32bit wide
648 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
649 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
650 DAG.getConstant(resultsz, MVT::i32),
651 DAG.getConstant(0, MVT::i32), InFlag };
652 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
654 InFlag = Chain.getValue(1);
657 if (Func) { // direct call
658 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
659 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
660 } else { // indirect call
661 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
662 if (!llvm::getAlign(*CallI, 0, retAlignment))
663 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
665 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
666 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
668 DAG.getConstant(resultsz/8, MVT::i32),
669 DAG.getConstant(0, MVT::i32), InFlag };
670 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
672 InFlag = Chain.getValue(1);
678 // This is indirect function call case : PTX requires a prototype of the
680 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
681 // to be emitted, and the label has to used as the last arg of call
683 // The prototype is embedded in a string and put as the operand for an
685 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
686 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
687 const char *asmstr = nvTM->getManagedStrPool()->
688 getManagedString(proto_string.c_str())->c_str();
689 SDValue InlineAsmOps[] = { Chain,
690 DAG.getTargetExternalSymbol(asmstr,
693 DAG.getTargetConstant(0, MVT::i32), InFlag };
694 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
695 InFlag = Chain.getValue(1);
697 // Op to just print "call"
698 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
699 SDValue PrintCallOps[] = { Chain,
700 DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
701 : retCount, MVT::i32),
703 Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
704 PrintCallVTs, PrintCallOps, 3);
705 InFlag = Chain.getValue(1);
707 // Ops to print out the function name
708 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
709 SDValue CallVoidOps[] = { Chain, Callee, InFlag };
710 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
711 InFlag = Chain.getValue(1);
713 // Ops to print out the param list
714 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
715 SDValue CallArgBeginOps[] = { Chain, InFlag };
716 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
718 InFlag = Chain.getValue(1);
720 for (unsigned i=0, e=paramCount; i!=e; ++i) {
723 opcode = NVPTXISD::LastCallArg;
725 opcode = NVPTXISD::CallArg;
726 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
727 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
728 DAG.getConstant(i, MVT::i32),
730 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
731 InFlag = Chain.getValue(1);
733 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
734 SDValue CallArgEndOps[] = { Chain,
735 DAG.getConstant(Func ? 1 : 0, MVT::i32),
737 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
739 InFlag = Chain.getValue(1);
742 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
743 SDValue PrototypeOps[] = { Chain,
744 DAG.getConstant(uniqueCallSite, MVT::i32),
746 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
747 InFlag = Chain.getValue(1);
750 // Generate loads from param memory/moves from registers for result
751 if (Ins.size() > 0) {
753 unsigned resoffset = 0;
754 for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
755 unsigned sz = Ins[i].VT.getSizeInBits();
756 if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
757 std::vector<EVT> LoadRetVTs;
758 LoadRetVTs.push_back(Ins[i].VT);
759 LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
760 std::vector<SDValue> LoadRetOps;
761 LoadRetOps.push_back(Chain);
762 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
763 LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
764 LoadRetOps.push_back(InFlag);
765 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
766 &LoadRetOps[0], LoadRetOps.size());
767 Chain = retval.getValue(1);
768 InFlag = retval.getValue(2);
769 InVals.push_back(retval);
774 SmallVector<EVT, 16> resvtparts;
775 ComputeValueVTs(*this, retTy, resvtparts);
777 assert(Ins.size() == resvtparts.size() &&
778 "Unexpected number of return values in non-ABI case");
779 unsigned paramNum = 0;
780 for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
781 assert(EVT(Ins[i].VT) == resvtparts[i] &&
782 "Unexpected EVT type in non-ABI case");
783 unsigned numelems = 1;
784 EVT elemtype = Ins[i].VT;
785 if (Ins[i].VT.isVector()) {
786 numelems = Ins[i].VT.getVectorNumElements();
787 elemtype = Ins[i].VT.getVectorElementType();
789 std::vector<SDValue> tempRetVals;
790 for (unsigned j=0; j<numelems; ++j) {
791 std::vector<EVT> MoveRetVTs;
792 MoveRetVTs.push_back(elemtype);
793 MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
794 std::vector<SDValue> MoveRetOps;
795 MoveRetOps.push_back(Chain);
796 MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
797 MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
798 MoveRetOps.push_back(InFlag);
799 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
800 &MoveRetOps[0], MoveRetOps.size());
801 Chain = retval.getValue(1);
802 InFlag = retval.getValue(2);
803 tempRetVals.push_back(retval);
806 if (Ins[i].VT.isVector())
807 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
808 &tempRetVals[0], tempRetVals.size()));
810 InVals.push_back(tempRetVals[0]);
814 Chain = DAG.getCALLSEQ_END(Chain,
815 DAG.getIntPtrConstant(uniqueCallSite, true),
816 DAG.getIntPtrConstant(uniqueCallSite+1, true),
820 // set isTailCall to false for now, until we figure out how to express
821 // tail call optimization in PTX
826 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
827 // (see LegalizeDAG.cpp). This is slow and uses local memory.
828 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
829 SDValue NVPTXTargetLowering::
830 LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
831 SDNode *Node = Op.getNode();
832 DebugLoc dl = Node->getDebugLoc();
833 SmallVector<SDValue, 8> Ops;
834 unsigned NumOperands = Node->getNumOperands();
835 for (unsigned i=0; i < NumOperands; ++i) {
836 SDValue SubOp = Node->getOperand(i);
837 EVT VVT = SubOp.getNode()->getValueType(0);
838 EVT EltVT = VVT.getVectorElementType();
839 unsigned NumSubElem = VVT.getVectorNumElements();
840 for (unsigned j=0; j < NumSubElem; ++j) {
841 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
842 DAG.getIntPtrConstant(j)));
845 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
846 &Ops[0], Ops.size());
849 SDValue NVPTXTargetLowering::
850 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
851 switch (Op.getOpcode()) {
852 case ISD::RETURNADDR: return SDValue();
853 case ISD::FRAMEADDR: return SDValue();
854 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
855 case ISD::INTRINSIC_W_CHAIN: return Op;
856 case ISD::BUILD_VECTOR:
857 case ISD::EXTRACT_SUBVECTOR:
859 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
860 case ISD::STORE: return LowerSTORE(Op, DAG);
861 case ISD::LOAD: return LowerLOAD(Op, DAG);
863 llvm_unreachable("Custom lowering not defined for operation");
871 // v = trunc v1 to i1
872 SDValue NVPTXTargetLowering::
873 LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
874 SDNode *Node = Op.getNode();
875 LoadSDNode *LD = cast<LoadSDNode>(Node);
876 DebugLoc dl = Node->getDebugLoc();
877 ISD::LoadExtType ExtType = LD->getExtensionType();
878 assert(ExtType == ISD::NON_EXTLOAD) ;
879 EVT VT = Node->getValueType(0);
880 assert(VT == MVT::i1 && "Custom lowering for i1 load only");
881 SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
882 LD->getPointerInfo(),
883 LD->isVolatile(), LD->isNonTemporal(),
886 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
887 // The legalizer (the caller) is expecting two values from the legalized
888 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
889 // in LegalizeDAG.cpp which also uses MergeValues.
890 SDValue Ops[] = {result, LD->getChain()};
891 return DAG.getMergeValues(Ops, 2, dl);
898 SDValue NVPTXTargetLowering::
899 LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
900 SDNode *Node = Op.getNode();
901 DebugLoc dl = Node->getDebugLoc();
902 StoreSDNode *ST = cast<StoreSDNode>(Node);
903 SDValue Tmp1 = ST->getChain();
904 SDValue Tmp2 = ST->getBasePtr();
905 SDValue Tmp3 = ST->getValue();
906 EVT VT = Tmp3.getValueType();
907 assert(VT == MVT::i1 && "Custom lowering for i1 store only");
908 unsigned Alignment = ST->getAlignment();
909 bool isVolatile = ST->isVolatile();
910 bool isNonTemporal = ST->isNonTemporal();
911 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
913 SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
914 ST->getPointerInfo(), isVolatile,
915 isNonTemporal, Alignment);
921 NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
923 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
924 std::stringstream suffix;
926 *name += suffix.str();
927 return DAG.getTargetExternalSymbol(name->c_str(), v);
931 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
932 return getExtSymb(DAG, ".PARAM", idx, v);
936 NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
937 return getExtSymb(DAG, ".HLPPARAM", idx);
940 // Check to see if the kernel argument is image*_t or sampler_t
942 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
943 static const char *const specialTypes[] = {
949 const Type *Ty = arg->getType();
950 const PointerType *PTy = dyn_cast<PointerType>(Ty);
958 const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
959 const std::string TypeName = STy ? STy->getName() : "";
961 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
962 if (TypeName == specialTypes[i])
969 NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
970 CallingConv::ID CallConv, bool isVarArg,
971 const SmallVectorImpl<ISD::InputArg> &Ins,
972 DebugLoc dl, SelectionDAG &DAG,
973 SmallVectorImpl<SDValue> &InVals) const {
974 MachineFunction &MF = DAG.getMachineFunction();
975 const DataLayout *TD = getDataLayout();
977 const Function *F = MF.getFunction();
978 const AttrListPtr &PAL = F->getAttributes();
980 SDValue Root = DAG.getRoot();
981 std::vector<SDValue> OutChains;
983 bool isKernel = llvm::isKernelFunction(*F);
984 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
986 std::vector<Type *> argTypes;
987 std::vector<const Argument *> theArgs;
988 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
990 theArgs.push_back(I);
991 argTypes.push_back(I->getType());
993 assert(argTypes.size() == Ins.size() &&
994 "Ins types and function types did not match");
997 for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
998 Type *Ty = argTypes[i];
999 EVT ObjectVT = getValueType(Ty);
1000 assert(ObjectVT == Ins[i].VT &&
1001 "Ins type did not match function type");
1003 // If the kernel argument is image*_t or sampler_t, convert it to
1004 // a i32 constant holding the parameter position. This can later
1005 // matched in the AsmPrinter to output the correct mangled name.
1006 if (isImageOrSamplerVal(theArgs[i],
1007 (theArgs[i]->getParent() ?
1008 theArgs[i]->getParent()->getParent() : 0))) {
1009 assert(isKernel && "Only kernels can have image/sampler params");
1010 InVals.push_back(DAG.getConstant(i+1, MVT::i32));
1014 if (theArgs[i]->use_empty()) {
1016 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
1020 // In the following cases, assign a node order of "idx+1"
1021 // to newly created nodes. The SDNOdes for params have to
1022 // appear in the same order as their order of appearance
1023 // in the original function. "idx+1" holds that order.
1024 if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
1026 if (isABI || isKernel) {
1027 // If ABI, load from the param symbol
1028 SDValue Arg = getParamSymbol(DAG, idx);
1029 Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
1031 llvm::ADDRESS_SPACE_PARAM));
1032 SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
1033 MachinePointerInfo(srcValue), false, false,
1035 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
1038 DAG.AssignOrdering(p.getNode(), idx+1);
1039 InVals.push_back(p);
1042 // If no ABI, just move the param symbol
1043 SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
1044 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1046 DAG.AssignOrdering(p.getNode(), idx+1);
1047 InVals.push_back(p);
1052 // Param has ByVal attribute
1053 if (isABI || isKernel) {
1054 // Return MoveParam(param symbol).
1055 // Ideally, the param symbol can be returned directly,
1056 // but when SDNode builder decides to use it in a CopyToReg(),
1057 // machine instruction fails because TargetExternalSymbol
1058 // (not lowered) is target dependent, and CopyToReg assumes
1059 // the source is lowered.
1060 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1061 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1063 DAG.AssignOrdering(p.getNode(), idx+1);
1065 InVals.push_back(p);
1067 SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
1068 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
1070 InVals.push_back(p2);
1073 // Have to move a set of param symbols to registers and
1074 // store them locally and return the local pointer in InVals
1075 const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
1076 assert(elemPtrType &&
1077 "Byval parameter should be a pointer type");
1078 Type *elemType = elemPtrType->getElementType();
1079 // Compute the constituent parts
1080 SmallVector<EVT, 16> vtparts;
1081 SmallVector<uint64_t, 16> offsets;
1082 ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
1083 unsigned totalsize = 0;
1084 for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
1085 totalsize += vtparts[j].getStoreSizeInBits();
1086 SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
1087 CreateStackObject(totalsize/8, 16, false),
1089 unsigned sizesofar = 0;
1090 std::vector<SDValue> theChains;
1091 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
1092 unsigned numElems = 1;
1093 if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
1094 for (unsigned k=0, ke=numElems; k!=ke; ++k) {
1095 EVT tmpvt = vtparts[j];
1096 if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
1097 SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
1098 getParamSymbol(DAG, idx, tmpvt));
1099 SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
1100 DAG.getConstant(sizesofar, getPointerTy()));
1101 theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
1102 MachinePointerInfo(), false, false, 0));
1103 sizesofar += tmpvt.getStoreSizeInBits()/8;
1108 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
1110 InVals.push_back(localcopy);
1114 // Clang will check explicit VarArg and issue error if any. However, Clang
1115 // will let code with
1116 // implicit var arg like f() pass.
1117 // We treat this case as if the arg list is empty.
1118 //if (F.isVarArg()) {
1119 // assert(0 && "VarArg not supported yet!");
1122 if (!OutChains.empty())
1123 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1124 &OutChains[0], OutChains.size()));
1130 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1132 const SmallVectorImpl<ISD::OutputArg> &Outs,
1133 const SmallVectorImpl<SDValue> &OutVals,
1134 DebugLoc dl, SelectionDAG &DAG) const {
1136 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1138 unsigned sizesofar = 0;
1140 for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
1141 SDValue theVal = OutVals[i];
1142 EVT theValType = theVal.getValueType();
1143 unsigned numElems = 1;
1144 if (theValType.isVector()) numElems = theValType.getVectorNumElements();
1145 for (unsigned j=0,je=numElems; j!=je; ++j) {
1146 SDValue tmpval = theVal;
1147 if (theValType.isVector())
1148 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1149 theValType.getVectorElementType(),
1150 tmpval, DAG.getIntPtrConstant(j));
1151 Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
1154 DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
1156 if (theValType.isVector())
1157 sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
1159 sizesofar += theValType.getStoreSizeInBits()/8;
1164 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
1168 NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
1169 std::string &Constraint,
1170 std::vector<SDValue> &Ops,
1171 SelectionDAG &DAG) const
1173 if (Constraint.length() > 1)
1176 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1179 // NVPTX suuport vector of legal types of any length in Intrinsics because the
1180 // NVPTX specific type legalizer
1181 // will legalize them to the PTX supported length.
1183 NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
1184 if (isTypeLegal(VT))
1186 if (VT.isVector()) {
1187 MVT eVT = VT.getVectorElementType();
1188 if (isTypeLegal(eVT))
1195 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
1197 // because we need the information that is only available in the "Value" type
1199 // pointer. In particular, the address space information.
1201 NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
1202 unsigned Intrinsic) const {
1203 switch (Intrinsic) {
1207 case Intrinsic::nvvm_atomic_load_add_f32:
1208 Info.opc = ISD::INTRINSIC_W_CHAIN;
1209 Info.memVT = MVT::f32;
1210 Info.ptrVal = I.getArgOperand(0);
1213 Info.readMem = true;
1214 Info.writeMem = true;
1218 case Intrinsic::nvvm_atomic_load_inc_32:
1219 case Intrinsic::nvvm_atomic_load_dec_32:
1220 Info.opc = ISD::INTRINSIC_W_CHAIN;
1221 Info.memVT = MVT::i32;
1222 Info.ptrVal = I.getArgOperand(0);
1225 Info.readMem = true;
1226 Info.writeMem = true;
1230 case Intrinsic::nvvm_ldu_global_i:
1231 case Intrinsic::nvvm_ldu_global_f:
1232 case Intrinsic::nvvm_ldu_global_p:
1234 Info.opc = ISD::INTRINSIC_W_CHAIN;
1235 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
1236 Info.memVT = MVT::i32;
1237 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
1238 Info.memVT = getPointerTy();
1240 Info.memVT = MVT::f32;
1241 Info.ptrVal = I.getArgOperand(0);
1244 Info.readMem = true;
1245 Info.writeMem = false;
1253 /// isLegalAddressingMode - Return true if the addressing mode represented
1254 /// by AM is legal for this target, for a load/store of the specified type.
1255 /// Used to guide target specific optimizations, like loop strength reduction
1256 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
1257 /// (CodeGenPrepare.cpp)
1259 NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
1262 // AddrMode - This represents an addressing mode of:
1263 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
1265 // The legal address modes are
1272 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
1278 case 0: // "r", "r+i" or "i" is allowed
1281 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
1283 // Otherwise we have r+i.
1286 // No scale > 1 is allowed
1292 //===----------------------------------------------------------------------===//
1293 // NVPTX Inline Assembly Support
1294 //===----------------------------------------------------------------------===//
1296 /// getConstraintType - Given a constraint letter, return the type of
1297 /// constraint it is for this target.
1298 NVPTXTargetLowering::ConstraintType
1299 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
1300 if (Constraint.size() == 1) {
1301 switch (Constraint[0]) {
1312 return C_RegisterClass;
1315 return TargetLowering::getConstraintType(Constraint);
1319 std::pair<unsigned, const TargetRegisterClass*>
1320 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
1322 if (Constraint.size() == 1) {
1323 switch (Constraint[0]) {
1325 return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
1327 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
1329 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
1332 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
1334 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
1336 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
1339 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
1344 /// getFunctionAlignment - Return the Log2 alignment of this function.
1345 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {