1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 unsigned FMAContractLevel = 0;
29 static cl::opt<unsigned, true>
30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32 " 1: do it 2: do it aggressively"),
33 cl::location(FMAContractLevel),
36 static cl::opt<int> UsePrecDivF32(
37 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
38 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39 " IEEE Compliant F32 div.rnd if avaiable."),
43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
44 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
49 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
53 /// createNVPTXISelDag - This pass converts a legalized DAG into a
54 /// NVPTX-specific DAG, ready for instruction scheduling.
55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56 llvm::CodeGenOpt::Level OptLevel) {
57 return new NVPTXDAGToDAGISel(TM, OptLevel);
60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61 CodeGenOpt::Level OptLevel)
62 : SelectionDAGISel(tm, OptLevel),
63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
65 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
72 allowFMA = (FMAContractLevel >= 1);
74 doMulWide = (OptLevel > 0);
77 int NVPTXDAGToDAGISel::getDivF32Level() const {
78 if (UsePrecDivF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-div32=N is used on the command-line, always honor it
82 // Otherwise, use div.approx if fast math is enabled
83 if (TM.Options.UnsafeFPMath)
90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93 return UsePrecSqrtF32;
95 // Otherwise, use sqrt.approx if fast math is enabled
96 if (TM.Options.UnsafeFPMath)
103 bool NVPTXDAGToDAGISel::useF32FTZ() const {
104 if (FtzEnabled.getNumOccurrences() > 0) {
105 // If nvptx-f32ftz is used on the command-line, always honor it
108 const Function *F = MF->getFunction();
109 // Otherwise, check for an nvptx-f32ftz attribute on the function
110 if (F->hasFnAttribute("nvptx-f32ftz"))
111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
113 .getValueAsString() == "true");
119 /// Select - Select instructions not customized! Used for
120 /// expanded, promoted and normal instructions.
121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
123 if (N->isMachineOpcode()) {
125 return nullptr; // Already selected.
128 SDNode *ResNode = nullptr;
129 switch (N->getOpcode()) {
131 ResNode = SelectLoad(N);
134 ResNode = SelectStore(N);
136 case NVPTXISD::LoadV2:
137 case NVPTXISD::LoadV4:
138 ResNode = SelectLoadVector(N);
140 case NVPTXISD::LDGV2:
141 case NVPTXISD::LDGV4:
142 case NVPTXISD::LDUV2:
143 case NVPTXISD::LDUV4:
144 ResNode = SelectLDGLDU(N);
146 case NVPTXISD::StoreV2:
147 case NVPTXISD::StoreV4:
148 ResNode = SelectStoreVector(N);
150 case NVPTXISD::LoadParam:
151 case NVPTXISD::LoadParamV2:
152 case NVPTXISD::LoadParamV4:
153 ResNode = SelectLoadParam(N);
155 case NVPTXISD::StoreRetval:
156 case NVPTXISD::StoreRetvalV2:
157 case NVPTXISD::StoreRetvalV4:
158 ResNode = SelectStoreRetval(N);
160 case NVPTXISD::StoreParam:
161 case NVPTXISD::StoreParamV2:
162 case NVPTXISD::StoreParamV4:
163 case NVPTXISD::StoreParamS32:
164 case NVPTXISD::StoreParamU32:
165 ResNode = SelectStoreParam(N);
167 case ISD::INTRINSIC_WO_CHAIN:
168 ResNode = SelectIntrinsicNoChain(N);
170 case ISD::INTRINSIC_W_CHAIN:
171 ResNode = SelectIntrinsicChain(N);
173 case NVPTXISD::Tex1DFloatI32:
174 case NVPTXISD::Tex1DFloatFloat:
175 case NVPTXISD::Tex1DFloatFloatLevel:
176 case NVPTXISD::Tex1DFloatFloatGrad:
177 case NVPTXISD::Tex1DI32I32:
178 case NVPTXISD::Tex1DI32Float:
179 case NVPTXISD::Tex1DI32FloatLevel:
180 case NVPTXISD::Tex1DI32FloatGrad:
181 case NVPTXISD::Tex1DArrayFloatI32:
182 case NVPTXISD::Tex1DArrayFloatFloat:
183 case NVPTXISD::Tex1DArrayFloatFloatLevel:
184 case NVPTXISD::Tex1DArrayFloatFloatGrad:
185 case NVPTXISD::Tex1DArrayI32I32:
186 case NVPTXISD::Tex1DArrayI32Float:
187 case NVPTXISD::Tex1DArrayI32FloatLevel:
188 case NVPTXISD::Tex1DArrayI32FloatGrad:
189 case NVPTXISD::Tex2DFloatI32:
190 case NVPTXISD::Tex2DFloatFloat:
191 case NVPTXISD::Tex2DFloatFloatLevel:
192 case NVPTXISD::Tex2DFloatFloatGrad:
193 case NVPTXISD::Tex2DI32I32:
194 case NVPTXISD::Tex2DI32Float:
195 case NVPTXISD::Tex2DI32FloatLevel:
196 case NVPTXISD::Tex2DI32FloatGrad:
197 case NVPTXISD::Tex2DArrayFloatI32:
198 case NVPTXISD::Tex2DArrayFloatFloat:
199 case NVPTXISD::Tex2DArrayFloatFloatLevel:
200 case NVPTXISD::Tex2DArrayFloatFloatGrad:
201 case NVPTXISD::Tex2DArrayI32I32:
202 case NVPTXISD::Tex2DArrayI32Float:
203 case NVPTXISD::Tex2DArrayI32FloatLevel:
204 case NVPTXISD::Tex2DArrayI32FloatGrad:
205 case NVPTXISD::Tex3DFloatI32:
206 case NVPTXISD::Tex3DFloatFloat:
207 case NVPTXISD::Tex3DFloatFloatLevel:
208 case NVPTXISD::Tex3DFloatFloatGrad:
209 case NVPTXISD::Tex3DI32I32:
210 case NVPTXISD::Tex3DI32Float:
211 case NVPTXISD::Tex3DI32FloatLevel:
212 case NVPTXISD::Tex3DI32FloatGrad:
213 ResNode = SelectTextureIntrinsic(N);
215 case NVPTXISD::Suld1DI8Trap:
216 case NVPTXISD::Suld1DI16Trap:
217 case NVPTXISD::Suld1DI32Trap:
218 case NVPTXISD::Suld1DV2I8Trap:
219 case NVPTXISD::Suld1DV2I16Trap:
220 case NVPTXISD::Suld1DV2I32Trap:
221 case NVPTXISD::Suld1DV4I8Trap:
222 case NVPTXISD::Suld1DV4I16Trap:
223 case NVPTXISD::Suld1DV4I32Trap:
224 case NVPTXISD::Suld1DArrayI8Trap:
225 case NVPTXISD::Suld1DArrayI16Trap:
226 case NVPTXISD::Suld1DArrayI32Trap:
227 case NVPTXISD::Suld1DArrayV2I8Trap:
228 case NVPTXISD::Suld1DArrayV2I16Trap:
229 case NVPTXISD::Suld1DArrayV2I32Trap:
230 case NVPTXISD::Suld1DArrayV4I8Trap:
231 case NVPTXISD::Suld1DArrayV4I16Trap:
232 case NVPTXISD::Suld1DArrayV4I32Trap:
233 case NVPTXISD::Suld2DI8Trap:
234 case NVPTXISD::Suld2DI16Trap:
235 case NVPTXISD::Suld2DI32Trap:
236 case NVPTXISD::Suld2DV2I8Trap:
237 case NVPTXISD::Suld2DV2I16Trap:
238 case NVPTXISD::Suld2DV2I32Trap:
239 case NVPTXISD::Suld2DV4I8Trap:
240 case NVPTXISD::Suld2DV4I16Trap:
241 case NVPTXISD::Suld2DV4I32Trap:
242 case NVPTXISD::Suld2DArrayI8Trap:
243 case NVPTXISD::Suld2DArrayI16Trap:
244 case NVPTXISD::Suld2DArrayI32Trap:
245 case NVPTXISD::Suld2DArrayV2I8Trap:
246 case NVPTXISD::Suld2DArrayV2I16Trap:
247 case NVPTXISD::Suld2DArrayV2I32Trap:
248 case NVPTXISD::Suld2DArrayV4I8Trap:
249 case NVPTXISD::Suld2DArrayV4I16Trap:
250 case NVPTXISD::Suld2DArrayV4I32Trap:
251 case NVPTXISD::Suld3DI8Trap:
252 case NVPTXISD::Suld3DI16Trap:
253 case NVPTXISD::Suld3DI32Trap:
254 case NVPTXISD::Suld3DV2I8Trap:
255 case NVPTXISD::Suld3DV2I16Trap:
256 case NVPTXISD::Suld3DV2I32Trap:
257 case NVPTXISD::Suld3DV4I8Trap:
258 case NVPTXISD::Suld3DV4I16Trap:
259 case NVPTXISD::Suld3DV4I32Trap:
260 ResNode = SelectSurfaceIntrinsic(N);
266 ResNode = SelectBFE(N);
268 case ISD::ADDRSPACECAST:
269 ResNode = SelectAddrSpaceCast(N);
276 return SelectCode(N);
279 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
280 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
284 case Intrinsic::nvvm_ldg_global_f:
285 case Intrinsic::nvvm_ldg_global_i:
286 case Intrinsic::nvvm_ldg_global_p:
287 case Intrinsic::nvvm_ldu_global_f:
288 case Intrinsic::nvvm_ldu_global_i:
289 case Intrinsic::nvvm_ldu_global_p:
290 return SelectLDGLDU(N);
294 static unsigned int getCodeAddrSpace(MemSDNode *N,
295 const NVPTXSubtarget &Subtarget) {
296 const Value *Src = N->getMemOperand()->getValue();
299 return NVPTX::PTXLdStInstCode::GENERIC;
301 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
302 switch (PT->getAddressSpace()) {
303 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
304 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
305 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
306 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
307 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
308 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
312 return NVPTX::PTXLdStInstCode::GENERIC;
315 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
316 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
320 case Intrinsic::nvvm_texsurf_handle_internal:
321 return SelectTexSurfHandle(N);
325 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
326 // Op 0 is the intrinsic ID
327 SDValue Wrapper = N->getOperand(1);
328 SDValue GlobalVal = Wrapper.getOperand(0);
329 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
333 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
334 SDValue Src = N->getOperand(0);
335 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
336 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
337 unsigned DstAddrSpace = CastN->getDestAddressSpace();
339 assert(SrcAddrSpace != DstAddrSpace &&
340 "addrspacecast must be between different address spaces");
342 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
343 // Specific to generic
345 switch (SrcAddrSpace) {
346 default: report_fatal_error("Bad address space in addrspacecast");
347 case ADDRESS_SPACE_GLOBAL:
348 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
349 : NVPTX::cvta_global_yes;
351 case ADDRESS_SPACE_SHARED:
352 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
353 : NVPTX::cvta_shared_yes;
355 case ADDRESS_SPACE_CONST:
356 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
357 : NVPTX::cvta_const_yes;
359 case ADDRESS_SPACE_LOCAL:
360 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
361 : NVPTX::cvta_local_yes;
364 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
366 // Generic to specific
367 if (SrcAddrSpace != 0)
368 report_fatal_error("Cannot cast between two non-generic address spaces");
370 switch (DstAddrSpace) {
371 default: report_fatal_error("Bad address space in addrspacecast");
372 case ADDRESS_SPACE_GLOBAL:
373 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
374 : NVPTX::cvta_to_global_yes;
376 case ADDRESS_SPACE_SHARED:
377 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
378 : NVPTX::cvta_to_shared_yes;
380 case ADDRESS_SPACE_CONST:
381 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
382 : NVPTX::cvta_to_const_yes;
384 case ADDRESS_SPACE_LOCAL:
385 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
386 : NVPTX::cvta_to_local_yes;
389 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
393 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
395 LoadSDNode *LD = cast<LoadSDNode>(N);
396 EVT LoadedVT = LD->getMemoryVT();
397 SDNode *NVPTXLD = nullptr;
399 // do not support pre/post inc/dec
403 if (!LoadedVT.isSimple())
406 // Address Space Setting
407 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
410 // - .volatile is only availalble for .global and .shared
411 bool isVolatile = LD->isVolatile();
412 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
413 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
414 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
418 MVT SimpleVT = LoadedVT.getSimpleVT();
419 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
420 if (SimpleVT.isVector()) {
421 unsigned num = SimpleVT.getVectorNumElements();
423 vecType = NVPTX::PTXLdStInstCode::V2;
425 vecType = NVPTX::PTXLdStInstCode::V4;
430 // Type Setting: fromType + fromTypeWidth
432 // Sign : ISD::SEXTLOAD
433 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
435 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
436 MVT ScalarVT = SimpleVT.getScalarType();
437 // Read at least 8 bits (predicates are stored as 8-bit values)
438 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
439 unsigned int fromType;
440 if ((LD->getExtensionType() == ISD::SEXTLOAD))
441 fromType = NVPTX::PTXLdStInstCode::Signed;
442 else if (ScalarVT.isFloatingPoint())
443 fromType = NVPTX::PTXLdStInstCode::Float;
445 fromType = NVPTX::PTXLdStInstCode::Unsigned;
447 // Create the machine instruction DAG
448 SDValue Chain = N->getOperand(0);
449 SDValue N1 = N->getOperand(1);
451 SDValue Offset, Base;
453 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
455 if (SelectDirectAddr(N1, Addr)) {
458 Opcode = NVPTX::LD_i8_avar;
461 Opcode = NVPTX::LD_i16_avar;
464 Opcode = NVPTX::LD_i32_avar;
467 Opcode = NVPTX::LD_i64_avar;
470 Opcode = NVPTX::LD_f32_avar;
473 Opcode = NVPTX::LD_f64_avar;
478 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
479 getI32Imm(vecType), getI32Imm(fromType),
480 getI32Imm(fromTypeWidth), Addr, Chain };
481 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
482 } else if (Subtarget.is64Bit()
483 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
484 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
487 Opcode = NVPTX::LD_i8_asi;
490 Opcode = NVPTX::LD_i16_asi;
493 Opcode = NVPTX::LD_i32_asi;
496 Opcode = NVPTX::LD_i64_asi;
499 Opcode = NVPTX::LD_f32_asi;
502 Opcode = NVPTX::LD_f64_asi;
507 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
508 getI32Imm(vecType), getI32Imm(fromType),
509 getI32Imm(fromTypeWidth), Base, Offset, Chain };
510 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
511 } else if (Subtarget.is64Bit()
512 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
513 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
514 if (Subtarget.is64Bit()) {
517 Opcode = NVPTX::LD_i8_ari_64;
520 Opcode = NVPTX::LD_i16_ari_64;
523 Opcode = NVPTX::LD_i32_ari_64;
526 Opcode = NVPTX::LD_i64_ari_64;
529 Opcode = NVPTX::LD_f32_ari_64;
532 Opcode = NVPTX::LD_f64_ari_64;
540 Opcode = NVPTX::LD_i8_ari;
543 Opcode = NVPTX::LD_i16_ari;
546 Opcode = NVPTX::LD_i32_ari;
549 Opcode = NVPTX::LD_i64_ari;
552 Opcode = NVPTX::LD_f32_ari;
555 Opcode = NVPTX::LD_f64_ari;
561 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
562 getI32Imm(vecType), getI32Imm(fromType),
563 getI32Imm(fromTypeWidth), Base, Offset, Chain };
564 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
566 if (Subtarget.is64Bit()) {
569 Opcode = NVPTX::LD_i8_areg_64;
572 Opcode = NVPTX::LD_i16_areg_64;
575 Opcode = NVPTX::LD_i32_areg_64;
578 Opcode = NVPTX::LD_i64_areg_64;
581 Opcode = NVPTX::LD_f32_areg_64;
584 Opcode = NVPTX::LD_f64_areg_64;
592 Opcode = NVPTX::LD_i8_areg;
595 Opcode = NVPTX::LD_i16_areg;
598 Opcode = NVPTX::LD_i32_areg;
601 Opcode = NVPTX::LD_i64_areg;
604 Opcode = NVPTX::LD_f32_areg;
607 Opcode = NVPTX::LD_f64_areg;
613 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
614 getI32Imm(vecType), getI32Imm(fromType),
615 getI32Imm(fromTypeWidth), N1, Chain };
616 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
621 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
622 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
628 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
630 SDValue Chain = N->getOperand(0);
631 SDValue Op1 = N->getOperand(1);
632 SDValue Addr, Offset, Base;
636 MemSDNode *MemSD = cast<MemSDNode>(N);
637 EVT LoadedVT = MemSD->getMemoryVT();
639 if (!LoadedVT.isSimple())
642 // Address Space Setting
643 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
646 // - .volatile is only availalble for .global and .shared
647 bool IsVolatile = MemSD->isVolatile();
648 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
649 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
650 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
654 MVT SimpleVT = LoadedVT.getSimpleVT();
656 // Type Setting: fromType + fromTypeWidth
658 // Sign : ISD::SEXTLOAD
659 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
661 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
662 MVT ScalarVT = SimpleVT.getScalarType();
663 // Read at least 8 bits (predicates are stored as 8-bit values)
664 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
665 unsigned int FromType;
666 // The last operand holds the original LoadSDNode::getExtensionType() value
667 unsigned ExtensionType = cast<ConstantSDNode>(
668 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
669 if (ExtensionType == ISD::SEXTLOAD)
670 FromType = NVPTX::PTXLdStInstCode::Signed;
671 else if (ScalarVT.isFloatingPoint())
672 FromType = NVPTX::PTXLdStInstCode::Float;
674 FromType = NVPTX::PTXLdStInstCode::Unsigned;
678 switch (N->getOpcode()) {
679 case NVPTXISD::LoadV2:
680 VecType = NVPTX::PTXLdStInstCode::V2;
682 case NVPTXISD::LoadV4:
683 VecType = NVPTX::PTXLdStInstCode::V4;
689 EVT EltVT = N->getValueType(0);
691 if (SelectDirectAddr(Op1, Addr)) {
692 switch (N->getOpcode()) {
695 case NVPTXISD::LoadV2:
696 switch (EltVT.getSimpleVT().SimpleTy) {
700 Opcode = NVPTX::LDV_i8_v2_avar;
703 Opcode = NVPTX::LDV_i16_v2_avar;
706 Opcode = NVPTX::LDV_i32_v2_avar;
709 Opcode = NVPTX::LDV_i64_v2_avar;
712 Opcode = NVPTX::LDV_f32_v2_avar;
715 Opcode = NVPTX::LDV_f64_v2_avar;
719 case NVPTXISD::LoadV4:
720 switch (EltVT.getSimpleVT().SimpleTy) {
724 Opcode = NVPTX::LDV_i8_v4_avar;
727 Opcode = NVPTX::LDV_i16_v4_avar;
730 Opcode = NVPTX::LDV_i32_v4_avar;
733 Opcode = NVPTX::LDV_f32_v4_avar;
739 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
740 getI32Imm(VecType), getI32Imm(FromType),
741 getI32Imm(FromTypeWidth), Addr, Chain };
742 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
743 } else if (Subtarget.is64Bit()
744 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
745 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
746 switch (N->getOpcode()) {
749 case NVPTXISD::LoadV2:
750 switch (EltVT.getSimpleVT().SimpleTy) {
754 Opcode = NVPTX::LDV_i8_v2_asi;
757 Opcode = NVPTX::LDV_i16_v2_asi;
760 Opcode = NVPTX::LDV_i32_v2_asi;
763 Opcode = NVPTX::LDV_i64_v2_asi;
766 Opcode = NVPTX::LDV_f32_v2_asi;
769 Opcode = NVPTX::LDV_f64_v2_asi;
773 case NVPTXISD::LoadV4:
774 switch (EltVT.getSimpleVT().SimpleTy) {
778 Opcode = NVPTX::LDV_i8_v4_asi;
781 Opcode = NVPTX::LDV_i16_v4_asi;
784 Opcode = NVPTX::LDV_i32_v4_asi;
787 Opcode = NVPTX::LDV_f32_v4_asi;
793 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
794 getI32Imm(VecType), getI32Imm(FromType),
795 getI32Imm(FromTypeWidth), Base, Offset, Chain };
796 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
797 } else if (Subtarget.is64Bit()
798 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
799 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
800 if (Subtarget.is64Bit()) {
801 switch (N->getOpcode()) {
804 case NVPTXISD::LoadV2:
805 switch (EltVT.getSimpleVT().SimpleTy) {
809 Opcode = NVPTX::LDV_i8_v2_ari_64;
812 Opcode = NVPTX::LDV_i16_v2_ari_64;
815 Opcode = NVPTX::LDV_i32_v2_ari_64;
818 Opcode = NVPTX::LDV_i64_v2_ari_64;
821 Opcode = NVPTX::LDV_f32_v2_ari_64;
824 Opcode = NVPTX::LDV_f64_v2_ari_64;
828 case NVPTXISD::LoadV4:
829 switch (EltVT.getSimpleVT().SimpleTy) {
833 Opcode = NVPTX::LDV_i8_v4_ari_64;
836 Opcode = NVPTX::LDV_i16_v4_ari_64;
839 Opcode = NVPTX::LDV_i32_v4_ari_64;
842 Opcode = NVPTX::LDV_f32_v4_ari_64;
848 switch (N->getOpcode()) {
851 case NVPTXISD::LoadV2:
852 switch (EltVT.getSimpleVT().SimpleTy) {
856 Opcode = NVPTX::LDV_i8_v2_ari;
859 Opcode = NVPTX::LDV_i16_v2_ari;
862 Opcode = NVPTX::LDV_i32_v2_ari;
865 Opcode = NVPTX::LDV_i64_v2_ari;
868 Opcode = NVPTX::LDV_f32_v2_ari;
871 Opcode = NVPTX::LDV_f64_v2_ari;
875 case NVPTXISD::LoadV4:
876 switch (EltVT.getSimpleVT().SimpleTy) {
880 Opcode = NVPTX::LDV_i8_v4_ari;
883 Opcode = NVPTX::LDV_i16_v4_ari;
886 Opcode = NVPTX::LDV_i32_v4_ari;
889 Opcode = NVPTX::LDV_f32_v4_ari;
896 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
897 getI32Imm(VecType), getI32Imm(FromType),
898 getI32Imm(FromTypeWidth), Base, Offset, Chain };
900 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
902 if (Subtarget.is64Bit()) {
903 switch (N->getOpcode()) {
906 case NVPTXISD::LoadV2:
907 switch (EltVT.getSimpleVT().SimpleTy) {
911 Opcode = NVPTX::LDV_i8_v2_areg_64;
914 Opcode = NVPTX::LDV_i16_v2_areg_64;
917 Opcode = NVPTX::LDV_i32_v2_areg_64;
920 Opcode = NVPTX::LDV_i64_v2_areg_64;
923 Opcode = NVPTX::LDV_f32_v2_areg_64;
926 Opcode = NVPTX::LDV_f64_v2_areg_64;
930 case NVPTXISD::LoadV4:
931 switch (EltVT.getSimpleVT().SimpleTy) {
935 Opcode = NVPTX::LDV_i8_v4_areg_64;
938 Opcode = NVPTX::LDV_i16_v4_areg_64;
941 Opcode = NVPTX::LDV_i32_v4_areg_64;
944 Opcode = NVPTX::LDV_f32_v4_areg_64;
950 switch (N->getOpcode()) {
953 case NVPTXISD::LoadV2:
954 switch (EltVT.getSimpleVT().SimpleTy) {
958 Opcode = NVPTX::LDV_i8_v2_areg;
961 Opcode = NVPTX::LDV_i16_v2_areg;
964 Opcode = NVPTX::LDV_i32_v2_areg;
967 Opcode = NVPTX::LDV_i64_v2_areg;
970 Opcode = NVPTX::LDV_f32_v2_areg;
973 Opcode = NVPTX::LDV_f64_v2_areg;
977 case NVPTXISD::LoadV4:
978 switch (EltVT.getSimpleVT().SimpleTy) {
982 Opcode = NVPTX::LDV_i8_v4_areg;
985 Opcode = NVPTX::LDV_i16_v4_areg;
988 Opcode = NVPTX::LDV_i32_v4_areg;
991 Opcode = NVPTX::LDV_f32_v4_areg;
998 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
999 getI32Imm(VecType), getI32Imm(FromType),
1000 getI32Imm(FromTypeWidth), Op1, Chain };
1001 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1004 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1005 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1006 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1011 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1013 SDValue Chain = N->getOperand(0);
1018 // If this is an LDG intrinsic, the address is the third operand. Its its an
1019 // LDG/LDU SD node (from custom vector handling), then its the second operand
1020 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1021 Op1 = N->getOperand(2);
1022 Mem = cast<MemIntrinsicSDNode>(N);
1023 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1027 case Intrinsic::nvvm_ldg_global_f:
1028 case Intrinsic::nvvm_ldg_global_i:
1029 case Intrinsic::nvvm_ldg_global_p:
1032 case Intrinsic::nvvm_ldu_global_f:
1033 case Intrinsic::nvvm_ldu_global_i:
1034 case Intrinsic::nvvm_ldu_global_p:
1039 Op1 = N->getOperand(1);
1040 Mem = cast<MemSDNode>(N);
1046 SDValue Base, Offset, Addr;
1048 EVT EltVT = Mem->getMemoryVT();
1049 if (EltVT.isVector()) {
1050 EltVT = EltVT.getVectorElementType();
1053 if (SelectDirectAddr(Op1, Addr)) {
1054 switch (N->getOpcode()) {
1057 case ISD::INTRINSIC_W_CHAIN:
1059 switch (EltVT.getSimpleVT().SimpleTy) {
1063 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1066 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1069 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1072 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1075 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1078 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1082 switch (EltVT.getSimpleVT().SimpleTy) {
1086 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1089 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1092 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1095 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1098 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1101 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1106 case NVPTXISD::LDGV2:
1107 switch (EltVT.getSimpleVT().SimpleTy) {
1111 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1114 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1117 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1120 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1123 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1126 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1130 case NVPTXISD::LDUV2:
1131 switch (EltVT.getSimpleVT().SimpleTy) {
1135 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1138 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1141 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1144 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1147 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1150 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1154 case NVPTXISD::LDGV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1159 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1162 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1165 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1168 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1172 case NVPTXISD::LDUV4:
1173 switch (EltVT.getSimpleVT().SimpleTy) {
1177 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1180 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1183 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1186 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1192 SDValue Ops[] = { Addr, Chain };
1193 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1194 } else if (Subtarget.is64Bit()
1195 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1196 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1197 if (Subtarget.is64Bit()) {
1198 switch (N->getOpcode()) {
1201 case ISD::INTRINSIC_W_CHAIN:
1203 switch (EltVT.getSimpleVT().SimpleTy) {
1207 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1210 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1213 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1216 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1219 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1222 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1226 switch (EltVT.getSimpleVT().SimpleTy) {
1230 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1233 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1236 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1239 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1242 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1245 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1250 case NVPTXISD::LDGV2:
1251 switch (EltVT.getSimpleVT().SimpleTy) {
1255 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1258 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1261 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1264 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1267 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1270 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1274 case NVPTXISD::LDUV2:
1275 switch (EltVT.getSimpleVT().SimpleTy) {
1279 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1282 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1285 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1288 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1291 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1294 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1298 case NVPTXISD::LDGV4:
1299 switch (EltVT.getSimpleVT().SimpleTy) {
1303 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1306 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1309 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1312 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1316 case NVPTXISD::LDUV4:
1317 switch (EltVT.getSimpleVT().SimpleTy) {
1321 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1324 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1327 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1330 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1336 switch (N->getOpcode()) {
1339 case ISD::INTRINSIC_W_CHAIN:
1341 switch (EltVT.getSimpleVT().SimpleTy) {
1345 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1348 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1351 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1354 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1357 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1360 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1364 switch (EltVT.getSimpleVT().SimpleTy) {
1368 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1371 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1374 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1377 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1380 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1383 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1388 case NVPTXISD::LDGV2:
1389 switch (EltVT.getSimpleVT().SimpleTy) {
1393 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1396 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1399 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1402 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1405 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1408 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1412 case NVPTXISD::LDUV2:
1413 switch (EltVT.getSimpleVT().SimpleTy) {
1417 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1420 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1423 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1426 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1429 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1432 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1436 case NVPTXISD::LDGV4:
1437 switch (EltVT.getSimpleVT().SimpleTy) {
1441 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1444 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1447 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1450 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1454 case NVPTXISD::LDUV4:
1455 switch (EltVT.getSimpleVT().SimpleTy) {
1459 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1462 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1465 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1468 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1475 SDValue Ops[] = { Base, Offset, Chain };
1477 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1479 if (Subtarget.is64Bit()) {
1480 switch (N->getOpcode()) {
1483 case ISD::INTRINSIC_W_CHAIN:
1485 switch (EltVT.getSimpleVT().SimpleTy) {
1489 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1492 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1495 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1498 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1501 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1504 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1508 switch (EltVT.getSimpleVT().SimpleTy) {
1512 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1515 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1518 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1521 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1524 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1527 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1532 case NVPTXISD::LDGV2:
1533 switch (EltVT.getSimpleVT().SimpleTy) {
1537 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1549 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1556 case NVPTXISD::LDUV2:
1557 switch (EltVT.getSimpleVT().SimpleTy) {
1561 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1573 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1580 case NVPTXISD::LDGV4:
1581 switch (EltVT.getSimpleVT().SimpleTy) {
1585 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1588 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1591 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1594 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1598 case NVPTXISD::LDUV4:
1599 switch (EltVT.getSimpleVT().SimpleTy) {
1603 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1606 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1609 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1612 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1618 switch (N->getOpcode()) {
1621 case ISD::INTRINSIC_W_CHAIN:
1623 switch (EltVT.getSimpleVT().SimpleTy) {
1627 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1630 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1633 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1636 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1639 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1642 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1646 switch (EltVT.getSimpleVT().SimpleTy) {
1650 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1653 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1656 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1659 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1662 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1665 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1670 case NVPTXISD::LDGV2:
1671 switch (EltVT.getSimpleVT().SimpleTy) {
1675 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1678 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1687 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1690 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1694 case NVPTXISD::LDUV2:
1695 switch (EltVT.getSimpleVT().SimpleTy) {
1699 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1702 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1711 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1714 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1718 case NVPTXISD::LDGV4:
1719 switch (EltVT.getSimpleVT().SimpleTy) {
1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1726 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1729 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1732 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1736 case NVPTXISD::LDUV4:
1737 switch (EltVT.getSimpleVT().SimpleTy) {
1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1744 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1747 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1750 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1757 SDValue Ops[] = { Op1, Chain };
1758 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1761 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1762 MemRefs0[0] = Mem->getMemOperand();
1763 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1768 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1770 StoreSDNode *ST = cast<StoreSDNode>(N);
1771 EVT StoreVT = ST->getMemoryVT();
1772 SDNode *NVPTXST = nullptr;
1774 // do not support pre/post inc/dec
1775 if (ST->isIndexed())
1778 if (!StoreVT.isSimple())
1781 // Address Space Setting
1782 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1785 // - .volatile is only availalble for .global and .shared
1786 bool isVolatile = ST->isVolatile();
1787 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1788 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1789 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1793 MVT SimpleVT = StoreVT.getSimpleVT();
1794 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1795 if (SimpleVT.isVector()) {
1796 unsigned num = SimpleVT.getVectorNumElements();
1798 vecType = NVPTX::PTXLdStInstCode::V2;
1800 vecType = NVPTX::PTXLdStInstCode::V4;
1805 // Type Setting: toType + toTypeWidth
1806 // - for integer type, always use 'u'
1808 MVT ScalarVT = SimpleVT.getScalarType();
1809 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1810 unsigned int toType;
1811 if (ScalarVT.isFloatingPoint())
1812 toType = NVPTX::PTXLdStInstCode::Float;
1814 toType = NVPTX::PTXLdStInstCode::Unsigned;
1816 // Create the machine instruction DAG
1817 SDValue Chain = N->getOperand(0);
1818 SDValue N1 = N->getOperand(1);
1819 SDValue N2 = N->getOperand(2);
1821 SDValue Offset, Base;
1823 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1825 if (SelectDirectAddr(N2, Addr)) {
1828 Opcode = NVPTX::ST_i8_avar;
1831 Opcode = NVPTX::ST_i16_avar;
1834 Opcode = NVPTX::ST_i32_avar;
1837 Opcode = NVPTX::ST_i64_avar;
1840 Opcode = NVPTX::ST_f32_avar;
1843 Opcode = NVPTX::ST_f64_avar;
1848 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1849 getI32Imm(vecType), getI32Imm(toType),
1850 getI32Imm(toTypeWidth), Addr, Chain };
1851 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1852 } else if (Subtarget.is64Bit()
1853 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1854 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1857 Opcode = NVPTX::ST_i8_asi;
1860 Opcode = NVPTX::ST_i16_asi;
1863 Opcode = NVPTX::ST_i32_asi;
1866 Opcode = NVPTX::ST_i64_asi;
1869 Opcode = NVPTX::ST_f32_asi;
1872 Opcode = NVPTX::ST_f64_asi;
1877 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1878 getI32Imm(vecType), getI32Imm(toType),
1879 getI32Imm(toTypeWidth), Base, Offset, Chain };
1880 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1881 } else if (Subtarget.is64Bit()
1882 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1883 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1884 if (Subtarget.is64Bit()) {
1887 Opcode = NVPTX::ST_i8_ari_64;
1890 Opcode = NVPTX::ST_i16_ari_64;
1893 Opcode = NVPTX::ST_i32_ari_64;
1896 Opcode = NVPTX::ST_i64_ari_64;
1899 Opcode = NVPTX::ST_f32_ari_64;
1902 Opcode = NVPTX::ST_f64_ari_64;
1910 Opcode = NVPTX::ST_i8_ari;
1913 Opcode = NVPTX::ST_i16_ari;
1916 Opcode = NVPTX::ST_i32_ari;
1919 Opcode = NVPTX::ST_i64_ari;
1922 Opcode = NVPTX::ST_f32_ari;
1925 Opcode = NVPTX::ST_f64_ari;
1931 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1932 getI32Imm(vecType), getI32Imm(toType),
1933 getI32Imm(toTypeWidth), Base, Offset, Chain };
1934 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1936 if (Subtarget.is64Bit()) {
1939 Opcode = NVPTX::ST_i8_areg_64;
1942 Opcode = NVPTX::ST_i16_areg_64;
1945 Opcode = NVPTX::ST_i32_areg_64;
1948 Opcode = NVPTX::ST_i64_areg_64;
1951 Opcode = NVPTX::ST_f32_areg_64;
1954 Opcode = NVPTX::ST_f64_areg_64;
1962 Opcode = NVPTX::ST_i8_areg;
1965 Opcode = NVPTX::ST_i16_areg;
1968 Opcode = NVPTX::ST_i32_areg;
1971 Opcode = NVPTX::ST_i64_areg;
1974 Opcode = NVPTX::ST_f32_areg;
1977 Opcode = NVPTX::ST_f64_areg;
1983 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1984 getI32Imm(vecType), getI32Imm(toType),
1985 getI32Imm(toTypeWidth), N2, Chain };
1986 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1990 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1991 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1992 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1998 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1999 SDValue Chain = N->getOperand(0);
2000 SDValue Op1 = N->getOperand(1);
2001 SDValue Addr, Offset, Base;
2005 EVT EltVT = Op1.getValueType();
2006 MemSDNode *MemSD = cast<MemSDNode>(N);
2007 EVT StoreVT = MemSD->getMemoryVT();
2009 // Address Space Setting
2010 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2012 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2013 report_fatal_error("Cannot store to pointer that points to constant "
2018 // - .volatile is only availalble for .global and .shared
2019 bool IsVolatile = MemSD->isVolatile();
2020 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2021 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2022 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2025 // Type Setting: toType + toTypeWidth
2026 // - for integer type, always use 'u'
2027 assert(StoreVT.isSimple() && "Store value is not simple");
2028 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2029 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2031 if (ScalarVT.isFloatingPoint())
2032 ToType = NVPTX::PTXLdStInstCode::Float;
2034 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2036 SmallVector<SDValue, 12> StOps;
2040 switch (N->getOpcode()) {
2041 case NVPTXISD::StoreV2:
2042 VecType = NVPTX::PTXLdStInstCode::V2;
2043 StOps.push_back(N->getOperand(1));
2044 StOps.push_back(N->getOperand(2));
2045 N2 = N->getOperand(3);
2047 case NVPTXISD::StoreV4:
2048 VecType = NVPTX::PTXLdStInstCode::V4;
2049 StOps.push_back(N->getOperand(1));
2050 StOps.push_back(N->getOperand(2));
2051 StOps.push_back(N->getOperand(3));
2052 StOps.push_back(N->getOperand(4));
2053 N2 = N->getOperand(5);
2059 StOps.push_back(getI32Imm(IsVolatile));
2060 StOps.push_back(getI32Imm(CodeAddrSpace));
2061 StOps.push_back(getI32Imm(VecType));
2062 StOps.push_back(getI32Imm(ToType));
2063 StOps.push_back(getI32Imm(ToTypeWidth));
2065 if (SelectDirectAddr(N2, Addr)) {
2066 switch (N->getOpcode()) {
2069 case NVPTXISD::StoreV2:
2070 switch (EltVT.getSimpleVT().SimpleTy) {
2074 Opcode = NVPTX::STV_i8_v2_avar;
2077 Opcode = NVPTX::STV_i16_v2_avar;
2080 Opcode = NVPTX::STV_i32_v2_avar;
2083 Opcode = NVPTX::STV_i64_v2_avar;
2086 Opcode = NVPTX::STV_f32_v2_avar;
2089 Opcode = NVPTX::STV_f64_v2_avar;
2093 case NVPTXISD::StoreV4:
2094 switch (EltVT.getSimpleVT().SimpleTy) {
2098 Opcode = NVPTX::STV_i8_v4_avar;
2101 Opcode = NVPTX::STV_i16_v4_avar;
2104 Opcode = NVPTX::STV_i32_v4_avar;
2107 Opcode = NVPTX::STV_f32_v4_avar;
2112 StOps.push_back(Addr);
2113 } else if (Subtarget.is64Bit()
2114 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2115 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2116 switch (N->getOpcode()) {
2119 case NVPTXISD::StoreV2:
2120 switch (EltVT.getSimpleVT().SimpleTy) {
2124 Opcode = NVPTX::STV_i8_v2_asi;
2127 Opcode = NVPTX::STV_i16_v2_asi;
2130 Opcode = NVPTX::STV_i32_v2_asi;
2133 Opcode = NVPTX::STV_i64_v2_asi;
2136 Opcode = NVPTX::STV_f32_v2_asi;
2139 Opcode = NVPTX::STV_f64_v2_asi;
2143 case NVPTXISD::StoreV4:
2144 switch (EltVT.getSimpleVT().SimpleTy) {
2148 Opcode = NVPTX::STV_i8_v4_asi;
2151 Opcode = NVPTX::STV_i16_v4_asi;
2154 Opcode = NVPTX::STV_i32_v4_asi;
2157 Opcode = NVPTX::STV_f32_v4_asi;
2162 StOps.push_back(Base);
2163 StOps.push_back(Offset);
2164 } else if (Subtarget.is64Bit()
2165 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2166 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2167 if (Subtarget.is64Bit()) {
2168 switch (N->getOpcode()) {
2171 case NVPTXISD::StoreV2:
2172 switch (EltVT.getSimpleVT().SimpleTy) {
2176 Opcode = NVPTX::STV_i8_v2_ari_64;
2179 Opcode = NVPTX::STV_i16_v2_ari_64;
2182 Opcode = NVPTX::STV_i32_v2_ari_64;
2185 Opcode = NVPTX::STV_i64_v2_ari_64;
2188 Opcode = NVPTX::STV_f32_v2_ari_64;
2191 Opcode = NVPTX::STV_f64_v2_ari_64;
2195 case NVPTXISD::StoreV4:
2196 switch (EltVT.getSimpleVT().SimpleTy) {
2200 Opcode = NVPTX::STV_i8_v4_ari_64;
2203 Opcode = NVPTX::STV_i16_v4_ari_64;
2206 Opcode = NVPTX::STV_i32_v4_ari_64;
2209 Opcode = NVPTX::STV_f32_v4_ari_64;
2215 switch (N->getOpcode()) {
2218 case NVPTXISD::StoreV2:
2219 switch (EltVT.getSimpleVT().SimpleTy) {
2223 Opcode = NVPTX::STV_i8_v2_ari;
2226 Opcode = NVPTX::STV_i16_v2_ari;
2229 Opcode = NVPTX::STV_i32_v2_ari;
2232 Opcode = NVPTX::STV_i64_v2_ari;
2235 Opcode = NVPTX::STV_f32_v2_ari;
2238 Opcode = NVPTX::STV_f64_v2_ari;
2242 case NVPTXISD::StoreV4:
2243 switch (EltVT.getSimpleVT().SimpleTy) {
2247 Opcode = NVPTX::STV_i8_v4_ari;
2250 Opcode = NVPTX::STV_i16_v4_ari;
2253 Opcode = NVPTX::STV_i32_v4_ari;
2256 Opcode = NVPTX::STV_f32_v4_ari;
2262 StOps.push_back(Base);
2263 StOps.push_back(Offset);
2265 if (Subtarget.is64Bit()) {
2266 switch (N->getOpcode()) {
2269 case NVPTXISD::StoreV2:
2270 switch (EltVT.getSimpleVT().SimpleTy) {
2274 Opcode = NVPTX::STV_i8_v2_areg_64;
2277 Opcode = NVPTX::STV_i16_v2_areg_64;
2280 Opcode = NVPTX::STV_i32_v2_areg_64;
2283 Opcode = NVPTX::STV_i64_v2_areg_64;
2286 Opcode = NVPTX::STV_f32_v2_areg_64;
2289 Opcode = NVPTX::STV_f64_v2_areg_64;
2293 case NVPTXISD::StoreV4:
2294 switch (EltVT.getSimpleVT().SimpleTy) {
2298 Opcode = NVPTX::STV_i8_v4_areg_64;
2301 Opcode = NVPTX::STV_i16_v4_areg_64;
2304 Opcode = NVPTX::STV_i32_v4_areg_64;
2307 Opcode = NVPTX::STV_f32_v4_areg_64;
2313 switch (N->getOpcode()) {
2316 case NVPTXISD::StoreV2:
2317 switch (EltVT.getSimpleVT().SimpleTy) {
2321 Opcode = NVPTX::STV_i8_v2_areg;
2324 Opcode = NVPTX::STV_i16_v2_areg;
2327 Opcode = NVPTX::STV_i32_v2_areg;
2330 Opcode = NVPTX::STV_i64_v2_areg;
2333 Opcode = NVPTX::STV_f32_v2_areg;
2336 Opcode = NVPTX::STV_f64_v2_areg;
2340 case NVPTXISD::StoreV4:
2341 switch (EltVT.getSimpleVT().SimpleTy) {
2345 Opcode = NVPTX::STV_i8_v4_areg;
2348 Opcode = NVPTX::STV_i16_v4_areg;
2351 Opcode = NVPTX::STV_i32_v4_areg;
2354 Opcode = NVPTX::STV_f32_v4_areg;
2360 StOps.push_back(N2);
2363 StOps.push_back(Chain);
2365 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2367 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2368 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2369 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2374 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2375 SDValue Chain = Node->getOperand(0);
2376 SDValue Offset = Node->getOperand(2);
2377 SDValue Flag = Node->getOperand(3);
2379 MemSDNode *Mem = cast<MemSDNode>(Node);
2382 switch (Node->getOpcode()) {
2385 case NVPTXISD::LoadParam:
2388 case NVPTXISD::LoadParamV2:
2391 case NVPTXISD::LoadParamV4:
2396 EVT EltVT = Node->getValueType(0);
2397 EVT MemVT = Mem->getMemoryVT();
2405 switch (MemVT.getSimpleVT().SimpleTy) {
2409 Opc = NVPTX::LoadParamMemI8;
2412 Opc = NVPTX::LoadParamMemI8;
2415 Opc = NVPTX::LoadParamMemI16;
2418 Opc = NVPTX::LoadParamMemI32;
2421 Opc = NVPTX::LoadParamMemI64;
2424 Opc = NVPTX::LoadParamMemF32;
2427 Opc = NVPTX::LoadParamMemF64;
2432 switch (MemVT.getSimpleVT().SimpleTy) {
2436 Opc = NVPTX::LoadParamMemV2I8;
2439 Opc = NVPTX::LoadParamMemV2I8;
2442 Opc = NVPTX::LoadParamMemV2I16;
2445 Opc = NVPTX::LoadParamMemV2I32;
2448 Opc = NVPTX::LoadParamMemV2I64;
2451 Opc = NVPTX::LoadParamMemV2F32;
2454 Opc = NVPTX::LoadParamMemV2F64;
2459 switch (MemVT.getSimpleVT().SimpleTy) {
2463 Opc = NVPTX::LoadParamMemV4I8;
2466 Opc = NVPTX::LoadParamMemV4I8;
2469 Opc = NVPTX::LoadParamMemV4I16;
2472 Opc = NVPTX::LoadParamMemV4I32;
2475 Opc = NVPTX::LoadParamMemV4F32;
2483 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2484 } else if (VecSize == 2) {
2485 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2487 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2488 VTs = CurDAG->getVTList(EVTs);
2491 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2493 SmallVector<SDValue, 2> Ops;
2494 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2495 Ops.push_back(Chain);
2496 Ops.push_back(Flag);
2499 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2503 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2505 SDValue Chain = N->getOperand(0);
2506 SDValue Offset = N->getOperand(1);
2507 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2508 MemSDNode *Mem = cast<MemSDNode>(N);
2510 // How many elements do we have?
2511 unsigned NumElts = 1;
2512 switch (N->getOpcode()) {
2515 case NVPTXISD::StoreRetval:
2518 case NVPTXISD::StoreRetvalV2:
2521 case NVPTXISD::StoreRetvalV4:
2526 // Build vector of operands
2527 SmallVector<SDValue, 6> Ops;
2528 for (unsigned i = 0; i < NumElts; ++i)
2529 Ops.push_back(N->getOperand(i + 2));
2530 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2531 Ops.push_back(Chain);
2533 // Determine target opcode
2534 // If we have an i1, use an 8-bit store. The lowering code in
2535 // NVPTXISelLowering will have already emitted an upcast.
2536 unsigned Opcode = 0;
2541 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2545 Opcode = NVPTX::StoreRetvalI8;
2548 Opcode = NVPTX::StoreRetvalI8;
2551 Opcode = NVPTX::StoreRetvalI16;
2554 Opcode = NVPTX::StoreRetvalI32;
2557 Opcode = NVPTX::StoreRetvalI64;
2560 Opcode = NVPTX::StoreRetvalF32;
2563 Opcode = NVPTX::StoreRetvalF64;
2568 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2572 Opcode = NVPTX::StoreRetvalV2I8;
2575 Opcode = NVPTX::StoreRetvalV2I8;
2578 Opcode = NVPTX::StoreRetvalV2I16;
2581 Opcode = NVPTX::StoreRetvalV2I32;
2584 Opcode = NVPTX::StoreRetvalV2I64;
2587 Opcode = NVPTX::StoreRetvalV2F32;
2590 Opcode = NVPTX::StoreRetvalV2F64;
2595 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2599 Opcode = NVPTX::StoreRetvalV4I8;
2602 Opcode = NVPTX::StoreRetvalV4I8;
2605 Opcode = NVPTX::StoreRetvalV4I16;
2608 Opcode = NVPTX::StoreRetvalV4I32;
2611 Opcode = NVPTX::StoreRetvalV4F32;
2618 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2619 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2620 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2621 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2626 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2628 SDValue Chain = N->getOperand(0);
2629 SDValue Param = N->getOperand(1);
2630 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2631 SDValue Offset = N->getOperand(2);
2632 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2633 MemSDNode *Mem = cast<MemSDNode>(N);
2634 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2636 // How many elements do we have?
2637 unsigned NumElts = 1;
2638 switch (N->getOpcode()) {
2641 case NVPTXISD::StoreParamU32:
2642 case NVPTXISD::StoreParamS32:
2643 case NVPTXISD::StoreParam:
2646 case NVPTXISD::StoreParamV2:
2649 case NVPTXISD::StoreParamV4:
2654 // Build vector of operands
2655 SmallVector<SDValue, 8> Ops;
2656 for (unsigned i = 0; i < NumElts; ++i)
2657 Ops.push_back(N->getOperand(i + 3));
2658 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2659 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2660 Ops.push_back(Chain);
2661 Ops.push_back(Flag);
2663 // Determine target opcode
2664 // If we have an i1, use an 8-bit store. The lowering code in
2665 // NVPTXISelLowering will have already emitted an upcast.
2666 unsigned Opcode = 0;
2667 switch (N->getOpcode()) {
2673 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2677 Opcode = NVPTX::StoreParamI8;
2680 Opcode = NVPTX::StoreParamI8;
2683 Opcode = NVPTX::StoreParamI16;
2686 Opcode = NVPTX::StoreParamI32;
2689 Opcode = NVPTX::StoreParamI64;
2692 Opcode = NVPTX::StoreParamF32;
2695 Opcode = NVPTX::StoreParamF64;
2700 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2704 Opcode = NVPTX::StoreParamV2I8;
2707 Opcode = NVPTX::StoreParamV2I8;
2710 Opcode = NVPTX::StoreParamV2I16;
2713 Opcode = NVPTX::StoreParamV2I32;
2716 Opcode = NVPTX::StoreParamV2I64;
2719 Opcode = NVPTX::StoreParamV2F32;
2722 Opcode = NVPTX::StoreParamV2F64;
2727 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2731 Opcode = NVPTX::StoreParamV4I8;
2734 Opcode = NVPTX::StoreParamV4I8;
2737 Opcode = NVPTX::StoreParamV4I16;
2740 Opcode = NVPTX::StoreParamV4I32;
2743 Opcode = NVPTX::StoreParamV4F32;
2749 // Special case: if we have a sign-extend/zero-extend node, insert the
2750 // conversion instruction first, and use that as the value operand to
2751 // the selected StoreParam node.
2752 case NVPTXISD::StoreParamU32: {
2753 Opcode = NVPTX::StoreParamI32;
2754 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2756 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2757 MVT::i32, Ops[0], CvtNone);
2758 Ops[0] = SDValue(Cvt, 0);
2761 case NVPTXISD::StoreParamS32: {
2762 Opcode = NVPTX::StoreParamI32;
2763 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2765 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2766 MVT::i32, Ops[0], CvtNone);
2767 Ops[0] = SDValue(Cvt, 0);
2772 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2774 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2775 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2776 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2777 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2782 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2783 SDValue Chain = N->getOperand(0);
2784 SDValue TexRef = N->getOperand(1);
2785 SDValue SampRef = N->getOperand(2);
2786 SDNode *Ret = nullptr;
2788 SmallVector<SDValue, 8> Ops;
2790 switch (N->getOpcode()) {
2791 default: return nullptr;
2792 case NVPTXISD::Tex1DFloatI32:
2793 Opc = NVPTX::TEX_1D_F32_I32;
2795 case NVPTXISD::Tex1DFloatFloat:
2796 Opc = NVPTX::TEX_1D_F32_F32;
2798 case NVPTXISD::Tex1DFloatFloatLevel:
2799 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2801 case NVPTXISD::Tex1DFloatFloatGrad:
2802 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2804 case NVPTXISD::Tex1DI32I32:
2805 Opc = NVPTX::TEX_1D_I32_I32;
2807 case NVPTXISD::Tex1DI32Float:
2808 Opc = NVPTX::TEX_1D_I32_F32;
2810 case NVPTXISD::Tex1DI32FloatLevel:
2811 Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2813 case NVPTXISD::Tex1DI32FloatGrad:
2814 Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2816 case NVPTXISD::Tex1DArrayFloatI32:
2817 Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2819 case NVPTXISD::Tex1DArrayFloatFloat:
2820 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2822 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2823 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2825 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2826 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2828 case NVPTXISD::Tex1DArrayI32I32:
2829 Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2831 case NVPTXISD::Tex1DArrayI32Float:
2832 Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2834 case NVPTXISD::Tex1DArrayI32FloatLevel:
2835 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2837 case NVPTXISD::Tex1DArrayI32FloatGrad:
2838 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2840 case NVPTXISD::Tex2DFloatI32:
2841 Opc = NVPTX::TEX_2D_F32_I32;
2843 case NVPTXISD::Tex2DFloatFloat:
2844 Opc = NVPTX::TEX_2D_F32_F32;
2846 case NVPTXISD::Tex2DFloatFloatLevel:
2847 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2849 case NVPTXISD::Tex2DFloatFloatGrad:
2850 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2852 case NVPTXISD::Tex2DI32I32:
2853 Opc = NVPTX::TEX_2D_I32_I32;
2855 case NVPTXISD::Tex2DI32Float:
2856 Opc = NVPTX::TEX_2D_I32_F32;
2858 case NVPTXISD::Tex2DI32FloatLevel:
2859 Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2861 case NVPTXISD::Tex2DI32FloatGrad:
2862 Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2864 case NVPTXISD::Tex2DArrayFloatI32:
2865 Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2867 case NVPTXISD::Tex2DArrayFloatFloat:
2868 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2870 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2871 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2873 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2874 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2876 case NVPTXISD::Tex2DArrayI32I32:
2877 Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2879 case NVPTXISD::Tex2DArrayI32Float:
2880 Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2882 case NVPTXISD::Tex2DArrayI32FloatLevel:
2883 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2885 case NVPTXISD::Tex2DArrayI32FloatGrad:
2886 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2888 case NVPTXISD::Tex3DFloatI32:
2889 Opc = NVPTX::TEX_3D_F32_I32;
2891 case NVPTXISD::Tex3DFloatFloat:
2892 Opc = NVPTX::TEX_3D_F32_F32;
2894 case NVPTXISD::Tex3DFloatFloatLevel:
2895 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2897 case NVPTXISD::Tex3DFloatFloatGrad:
2898 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2900 case NVPTXISD::Tex3DI32I32:
2901 Opc = NVPTX::TEX_3D_I32_I32;
2903 case NVPTXISD::Tex3DI32Float:
2904 Opc = NVPTX::TEX_3D_I32_F32;
2906 case NVPTXISD::Tex3DI32FloatLevel:
2907 Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2909 case NVPTXISD::Tex3DI32FloatGrad:
2910 Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2914 Ops.push_back(TexRef);
2915 Ops.push_back(SampRef);
2917 // Copy over indices
2918 for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2919 Ops.push_back(N->getOperand(i));
2922 Ops.push_back(Chain);
2923 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2927 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2928 SDValue Chain = N->getOperand(0);
2929 SDValue TexHandle = N->getOperand(1);
2930 SDNode *Ret = nullptr;
2932 SmallVector<SDValue, 8> Ops;
2933 switch (N->getOpcode()) {
2934 default: return nullptr;
2935 case NVPTXISD::Suld1DI8Trap:
2936 Opc = NVPTX::SULD_1D_I8_TRAP;
2937 Ops.push_back(TexHandle);
2938 Ops.push_back(N->getOperand(2));
2939 Ops.push_back(Chain);
2941 case NVPTXISD::Suld1DI16Trap:
2942 Opc = NVPTX::SULD_1D_I16_TRAP;
2943 Ops.push_back(TexHandle);
2944 Ops.push_back(N->getOperand(2));
2945 Ops.push_back(Chain);
2947 case NVPTXISD::Suld1DI32Trap:
2948 Opc = NVPTX::SULD_1D_I32_TRAP;
2949 Ops.push_back(TexHandle);
2950 Ops.push_back(N->getOperand(2));
2951 Ops.push_back(Chain);
2953 case NVPTXISD::Suld1DV2I8Trap:
2954 Opc = NVPTX::SULD_1D_V2I8_TRAP;
2955 Ops.push_back(TexHandle);
2956 Ops.push_back(N->getOperand(2));
2957 Ops.push_back(Chain);
2959 case NVPTXISD::Suld1DV2I16Trap:
2960 Opc = NVPTX::SULD_1D_V2I16_TRAP;
2961 Ops.push_back(TexHandle);
2962 Ops.push_back(N->getOperand(2));
2963 Ops.push_back(Chain);
2965 case NVPTXISD::Suld1DV2I32Trap:
2966 Opc = NVPTX::SULD_1D_V2I32_TRAP;
2967 Ops.push_back(TexHandle);
2968 Ops.push_back(N->getOperand(2));
2969 Ops.push_back(Chain);
2971 case NVPTXISD::Suld1DV4I8Trap:
2972 Opc = NVPTX::SULD_1D_V4I8_TRAP;
2973 Ops.push_back(TexHandle);
2974 Ops.push_back(N->getOperand(2));
2975 Ops.push_back(Chain);
2977 case NVPTXISD::Suld1DV4I16Trap:
2978 Opc = NVPTX::SULD_1D_V4I16_TRAP;
2979 Ops.push_back(TexHandle);
2980 Ops.push_back(N->getOperand(2));
2981 Ops.push_back(Chain);
2983 case NVPTXISD::Suld1DV4I32Trap:
2984 Opc = NVPTX::SULD_1D_V4I32_TRAP;
2985 Ops.push_back(TexHandle);
2986 Ops.push_back(N->getOperand(2));
2987 Ops.push_back(Chain);
2989 case NVPTXISD::Suld1DArrayI8Trap:
2990 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2991 Ops.push_back(TexHandle);
2992 Ops.push_back(N->getOperand(2));
2993 Ops.push_back(N->getOperand(3));
2994 Ops.push_back(Chain);
2996 case NVPTXISD::Suld1DArrayI16Trap:
2997 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2998 Ops.push_back(TexHandle);
2999 Ops.push_back(N->getOperand(2));
3000 Ops.push_back(N->getOperand(3));
3001 Ops.push_back(Chain);
3003 case NVPTXISD::Suld1DArrayI32Trap:
3004 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3005 Ops.push_back(TexHandle);
3006 Ops.push_back(N->getOperand(2));
3007 Ops.push_back(N->getOperand(3));
3008 Ops.push_back(Chain);
3010 case NVPTXISD::Suld1DArrayV2I8Trap:
3011 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3012 Ops.push_back(TexHandle);
3013 Ops.push_back(N->getOperand(2));
3014 Ops.push_back(N->getOperand(3));
3015 Ops.push_back(Chain);
3017 case NVPTXISD::Suld1DArrayV2I16Trap:
3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3019 Ops.push_back(TexHandle);
3020 Ops.push_back(N->getOperand(2));
3021 Ops.push_back(N->getOperand(3));
3022 Ops.push_back(Chain);
3024 case NVPTXISD::Suld1DArrayV2I32Trap:
3025 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3026 Ops.push_back(TexHandle);
3027 Ops.push_back(N->getOperand(2));
3028 Ops.push_back(N->getOperand(3));
3029 Ops.push_back(Chain);
3031 case NVPTXISD::Suld1DArrayV4I8Trap:
3032 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3033 Ops.push_back(TexHandle);
3034 Ops.push_back(N->getOperand(2));
3035 Ops.push_back(N->getOperand(3));
3036 Ops.push_back(Chain);
3038 case NVPTXISD::Suld1DArrayV4I16Trap:
3039 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3040 Ops.push_back(TexHandle);
3041 Ops.push_back(N->getOperand(2));
3042 Ops.push_back(N->getOperand(3));
3043 Ops.push_back(Chain);
3045 case NVPTXISD::Suld1DArrayV4I32Trap:
3046 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3047 Ops.push_back(TexHandle);
3048 Ops.push_back(N->getOperand(2));
3049 Ops.push_back(N->getOperand(3));
3050 Ops.push_back(Chain);
3052 case NVPTXISD::Suld2DI8Trap:
3053 Opc = NVPTX::SULD_2D_I8_TRAP;
3054 Ops.push_back(TexHandle);
3055 Ops.push_back(N->getOperand(2));
3056 Ops.push_back(N->getOperand(3));
3057 Ops.push_back(Chain);
3059 case NVPTXISD::Suld2DI16Trap:
3060 Opc = NVPTX::SULD_2D_I16_TRAP;
3061 Ops.push_back(TexHandle);
3062 Ops.push_back(N->getOperand(2));
3063 Ops.push_back(N->getOperand(3));
3064 Ops.push_back(Chain);
3066 case NVPTXISD::Suld2DI32Trap:
3067 Opc = NVPTX::SULD_2D_I32_TRAP;
3068 Ops.push_back(TexHandle);
3069 Ops.push_back(N->getOperand(2));
3070 Ops.push_back(N->getOperand(3));
3071 Ops.push_back(Chain);
3073 case NVPTXISD::Suld2DV2I8Trap:
3074 Opc = NVPTX::SULD_2D_V2I8_TRAP;
3075 Ops.push_back(TexHandle);
3076 Ops.push_back(N->getOperand(2));
3077 Ops.push_back(N->getOperand(3));
3078 Ops.push_back(Chain);
3080 case NVPTXISD::Suld2DV2I16Trap:
3081 Opc = NVPTX::SULD_2D_V2I16_TRAP;
3082 Ops.push_back(TexHandle);
3083 Ops.push_back(N->getOperand(2));
3084 Ops.push_back(N->getOperand(3));
3085 Ops.push_back(Chain);
3087 case NVPTXISD::Suld2DV2I32Trap:
3088 Opc = NVPTX::SULD_2D_V2I32_TRAP;
3089 Ops.push_back(TexHandle);
3090 Ops.push_back(N->getOperand(2));
3091 Ops.push_back(N->getOperand(3));
3092 Ops.push_back(Chain);
3094 case NVPTXISD::Suld2DV4I8Trap:
3095 Opc = NVPTX::SULD_2D_V4I8_TRAP;
3096 Ops.push_back(TexHandle);
3097 Ops.push_back(N->getOperand(2));
3098 Ops.push_back(N->getOperand(3));
3099 Ops.push_back(Chain);
3101 case NVPTXISD::Suld2DV4I16Trap:
3102 Opc = NVPTX::SULD_2D_V4I16_TRAP;
3103 Ops.push_back(TexHandle);
3104 Ops.push_back(N->getOperand(2));
3105 Ops.push_back(N->getOperand(3));
3106 Ops.push_back(Chain);
3108 case NVPTXISD::Suld2DV4I32Trap:
3109 Opc = NVPTX::SULD_2D_V4I32_TRAP;
3110 Ops.push_back(TexHandle);
3111 Ops.push_back(N->getOperand(2));
3112 Ops.push_back(N->getOperand(3));
3113 Ops.push_back(Chain);
3115 case NVPTXISD::Suld2DArrayI8Trap:
3116 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3117 Ops.push_back(TexHandle);
3118 Ops.push_back(N->getOperand(2));
3119 Ops.push_back(N->getOperand(3));
3120 Ops.push_back(N->getOperand(4));
3121 Ops.push_back(Chain);
3123 case NVPTXISD::Suld2DArrayI16Trap:
3124 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3125 Ops.push_back(TexHandle);
3126 Ops.push_back(N->getOperand(2));
3127 Ops.push_back(N->getOperand(3));
3128 Ops.push_back(N->getOperand(4));
3129 Ops.push_back(Chain);
3131 case NVPTXISD::Suld2DArrayI32Trap:
3132 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3133 Ops.push_back(TexHandle);
3134 Ops.push_back(N->getOperand(2));
3135 Ops.push_back(N->getOperand(3));
3136 Ops.push_back(N->getOperand(4));
3137 Ops.push_back(Chain);
3139 case NVPTXISD::Suld2DArrayV2I8Trap:
3140 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3141 Ops.push_back(TexHandle);
3142 Ops.push_back(N->getOperand(2));
3143 Ops.push_back(N->getOperand(3));
3144 Ops.push_back(N->getOperand(4));
3145 Ops.push_back(Chain);
3147 case NVPTXISD::Suld2DArrayV2I16Trap:
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149 Ops.push_back(TexHandle);
3150 Ops.push_back(N->getOperand(2));
3151 Ops.push_back(N->getOperand(3));
3152 Ops.push_back(N->getOperand(4));
3153 Ops.push_back(Chain);
3155 case NVPTXISD::Suld2DArrayV2I32Trap:
3156 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3157 Ops.push_back(TexHandle);
3158 Ops.push_back(N->getOperand(2));
3159 Ops.push_back(N->getOperand(3));
3160 Ops.push_back(N->getOperand(4));
3161 Ops.push_back(Chain);
3163 case NVPTXISD::Suld2DArrayV4I8Trap:
3164 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3165 Ops.push_back(TexHandle);
3166 Ops.push_back(N->getOperand(2));
3167 Ops.push_back(N->getOperand(3));
3168 Ops.push_back(N->getOperand(4));
3169 Ops.push_back(Chain);
3171 case NVPTXISD::Suld2DArrayV4I16Trap:
3172 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3173 Ops.push_back(TexHandle);
3174 Ops.push_back(N->getOperand(2));
3175 Ops.push_back(N->getOperand(3));
3176 Ops.push_back(N->getOperand(4));
3177 Ops.push_back(Chain);
3179 case NVPTXISD::Suld2DArrayV4I32Trap:
3180 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3181 Ops.push_back(TexHandle);
3182 Ops.push_back(N->getOperand(2));
3183 Ops.push_back(N->getOperand(3));
3184 Ops.push_back(N->getOperand(4));
3185 Ops.push_back(Chain);
3187 case NVPTXISD::Suld3DI8Trap:
3188 Opc = NVPTX::SULD_3D_I8_TRAP;
3189 Ops.push_back(TexHandle);
3190 Ops.push_back(N->getOperand(2));
3191 Ops.push_back(N->getOperand(3));
3192 Ops.push_back(N->getOperand(4));
3193 Ops.push_back(Chain);
3195 case NVPTXISD::Suld3DI16Trap:
3196 Opc = NVPTX::SULD_3D_I16_TRAP;
3197 Ops.push_back(TexHandle);
3198 Ops.push_back(N->getOperand(2));
3199 Ops.push_back(N->getOperand(3));
3200 Ops.push_back(N->getOperand(4));
3201 Ops.push_back(Chain);
3203 case NVPTXISD::Suld3DI32Trap:
3204 Opc = NVPTX::SULD_3D_I32_TRAP;
3205 Ops.push_back(TexHandle);
3206 Ops.push_back(N->getOperand(2));
3207 Ops.push_back(N->getOperand(3));
3208 Ops.push_back(N->getOperand(4));
3209 Ops.push_back(Chain);
3211 case NVPTXISD::Suld3DV2I8Trap:
3212 Opc = NVPTX::SULD_3D_V2I8_TRAP;
3213 Ops.push_back(TexHandle);
3214 Ops.push_back(N->getOperand(2));
3215 Ops.push_back(N->getOperand(3));
3216 Ops.push_back(N->getOperand(4));
3217 Ops.push_back(Chain);
3219 case NVPTXISD::Suld3DV2I16Trap:
3220 Opc = NVPTX::SULD_3D_V2I16_TRAP;
3221 Ops.push_back(TexHandle);
3222 Ops.push_back(N->getOperand(2));
3223 Ops.push_back(N->getOperand(3));
3224 Ops.push_back(N->getOperand(4));
3225 Ops.push_back(Chain);
3227 case NVPTXISD::Suld3DV2I32Trap:
3228 Opc = NVPTX::SULD_3D_V2I32_TRAP;
3229 Ops.push_back(TexHandle);
3230 Ops.push_back(N->getOperand(2));
3231 Ops.push_back(N->getOperand(3));
3232 Ops.push_back(N->getOperand(4));
3233 Ops.push_back(Chain);
3235 case NVPTXISD::Suld3DV4I8Trap:
3236 Opc = NVPTX::SULD_3D_V4I8_TRAP;
3237 Ops.push_back(TexHandle);
3238 Ops.push_back(N->getOperand(2));
3239 Ops.push_back(N->getOperand(3));
3240 Ops.push_back(N->getOperand(4));
3241 Ops.push_back(Chain);
3243 case NVPTXISD::Suld3DV4I16Trap:
3244 Opc = NVPTX::SULD_3D_V4I16_TRAP;
3245 Ops.push_back(TexHandle);
3246 Ops.push_back(N->getOperand(2));
3247 Ops.push_back(N->getOperand(3));
3248 Ops.push_back(N->getOperand(4));
3249 Ops.push_back(Chain);
3251 case NVPTXISD::Suld3DV4I32Trap:
3252 Opc = NVPTX::SULD_3D_V4I32_TRAP;
3253 Ops.push_back(TexHandle);
3254 Ops.push_back(N->getOperand(2));
3255 Ops.push_back(N->getOperand(3));
3256 Ops.push_back(N->getOperand(4));
3257 Ops.push_back(Chain);
3260 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3264 /// SelectBFE - Look for instruction sequences that can be made more efficient
3265 /// by using the 'bfe' (bit-field extract) PTX instruction
3266 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
3267 SDValue LHS = N->getOperand(0);
3268 SDValue RHS = N->getOperand(1);
3272 bool IsSigned = false;
3274 if (N->getOpcode() == ISD::AND) {
3275 // Canonicalize the operands
3276 // We want 'and %val, %mask'
3277 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3278 std::swap(LHS, RHS);
3281 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3283 // We need a constant mask on the RHS of the AND
3287 // Extract the mask bits
3288 uint64_t MaskVal = Mask->getZExtValue();
3289 if (!isMask_64(MaskVal)) {
3290 // We *could* handle shifted masks here, but doing so would require an
3291 // 'and' operation to fix up the low-order bits so we would trade
3292 // shr+and for bfe+and, which has the same throughput
3296 // How many bits are in our mask?
3297 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3298 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3300 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3301 // We have a 'srl/and' pair, extract the effective start bit and length
3302 Val = LHS.getNode()->getOperand(0);
3303 Start = LHS.getNode()->getOperand(1);
3304 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3306 uint64_t StartVal = StartConst->getZExtValue();
3307 // How many "good" bits do we have left? "good" is defined here as bits
3308 // that exist in the original value, not shifted in.
3309 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3310 if (NumBits > GoodBits) {
3311 // Do not handle the case where bits have been shifted in. In theory
3312 // we could handle this, but the cost is likely higher than just
3313 // emitting the srl/and pair.
3316 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3318 // Do not handle the case where the shift amount (can be zero if no srl
3319 // was found) is not constant. We could handle this case, but it would
3320 // require run-time logic that would be more expensive than just
3321 // emitting the srl/and pair.
3325 // Do not handle the case where the LHS of the and is not a shift. While
3326 // it would be trivial to handle this case, it would just transform
3327 // 'and' -> 'bfe', but 'and' has higher-throughput.
3330 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3331 if (LHS->getOpcode() == ISD::AND) {
3332 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3334 // Shift amount must be constant
3338 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3340 SDValue AndLHS = LHS->getOperand(0);
3341 SDValue AndRHS = LHS->getOperand(1);
3343 // Canonicalize the AND to have the mask on the RHS
3344 if (isa<ConstantSDNode>(AndLHS)) {
3345 std::swap(AndLHS, AndRHS);
3348 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3350 // Mask must be constant
3354 uint64_t MaskVal = MaskCnst->getZExtValue();
3357 if (isMask_64(MaskVal)) {
3359 // The number of bits in the result bitfield will be the number of
3360 // trailing ones (the AND) minus the number of bits we shift off
3361 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3362 } else if (isShiftedMask_64(MaskVal)) {
3363 NumZeros = countTrailingZeros(MaskVal);
3364 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3365 // The number of bits in the result bitfield will be the number of
3366 // trailing zeros plus the number of set bits in the mask minus the
3367 // number of bits we shift off
3368 NumBits = NumZeros + NumOnes - ShiftAmt;
3370 // This is not a mask we can handle
3374 if (ShiftAmt < NumZeros) {
3375 // Handling this case would require extra logic that would make this
3376 // transformation non-profitable
3381 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3382 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3383 } else if (LHS->getOpcode() == ISD::SHL) {
3384 // Here, we have a pattern like:
3386 // (sra (shl val, NN), MM)
3388 // (srl (shl val, NN), MM)
3390 // If MM >= NN, we can efficiently optimize this with bfe
3391 Val = LHS->getOperand(0);
3393 SDValue ShlRHS = LHS->getOperand(1);
3394 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3396 // Shift amount must be constant
3399 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3401 SDValue ShrRHS = RHS;
3402 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3404 // Shift amount must be constant
3407 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3409 // To avoid extra codegen and be profitable, we need Outer >= Inner
3410 if (OuterShiftAmt < InnerShiftAmt) {
3414 // If the outer shift is more than the type size, we have no bitfield to
3415 // extract (since we also check that the inner shift is <= the outer shift
3416 // then this also implies that the inner shift is < the type size)
3417 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3422 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3424 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3425 OuterShiftAmt, MVT::i32);
3427 if (N->getOpcode() == ISD::SRA) {
3428 // If we have a arithmetic right shift, we need to use the signed bfe
3443 // For the BFE operations we form here from "and" and "srl", always use the
3444 // unsigned variants.
3445 if (Val.getValueType() == MVT::i32) {
3447 Opc = NVPTX::BFE_S32rii;
3449 Opc = NVPTX::BFE_U32rii;
3451 } else if (Val.getValueType() == MVT::i64) {
3453 Opc = NVPTX::BFE_S64rii;
3455 Opc = NVPTX::BFE_U64rii;
3458 // We cannot handle this type
3467 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3472 // SelectDirectAddr - Match a direct address for DAG.
3473 // A direct address could be a globaladdress or externalsymbol.
3474 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3475 // Return true if TGA or ES.
3476 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3477 N.getOpcode() == ISD::TargetExternalSymbol) {
3481 if (N.getOpcode() == NVPTXISD::Wrapper) {
3482 Address = N.getOperand(0);
3485 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3486 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3487 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3488 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3489 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3495 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3496 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3497 if (Addr.getOpcode() == ISD::ADD) {
3498 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3499 SDValue base = Addr.getOperand(0);
3500 if (SelectDirectAddr(base, Base)) {
3501 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3510 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3511 SDValue &Base, SDValue &Offset) {
3512 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3516 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3517 SDValue &Base, SDValue &Offset) {
3518 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3522 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3523 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3524 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3525 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3526 Offset = CurDAG->getTargetConstant(0, mvt);
3529 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3530 Addr.getOpcode() == ISD::TargetGlobalAddress)
3531 return false; // direct calls.
3533 if (Addr.getOpcode() == ISD::ADD) {
3534 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3537 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3538 if (FrameIndexSDNode *FIN =
3539 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3540 // Constant offset from frame ref.
3541 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3543 Base = Addr.getOperand(0);
3544 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3552 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3553 SDValue &Base, SDValue &Offset) {
3554 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3558 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3559 SDValue &Base, SDValue &Offset) {
3560 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3563 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3564 unsigned int spN) const {
3565 const Value *Src = nullptr;
3566 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3567 // the classof() for MemSDNode does not include MemIntrinsicSDNode
3568 // (See SelectionDAGNodes.h). So we need to check for both.
3569 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3570 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3572 Src = mN->getMemOperand()->getValue();
3573 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
3574 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3576 Src = mN->getMemOperand()->getValue();
3580 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3581 return (PT->getAddressSpace() == spN);
3585 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3586 /// inline asm expressions.
3587 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3588 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
3590 switch (ConstraintCode) {
3594 if (SelectDirectAddr(Op, Op0)) {
3595 OutOps.push_back(Op0);
3596 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3599 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3600 OutOps.push_back(Op0);
3601 OutOps.push_back(Op1);