#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <sstream>
"nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
+static cl::opt<unsigned>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
}
}
-static uint64_t GCD( int a, int b)
-{
- if (a < b) std::swap(a,b);
- while (b > 0) {
- uint64_t c = b;
- b = a % b;
- a = c;
- }
- return a;
-}
-
/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
/// into their primitive components.
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
- : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
+ : TargetLowering(TM), nvTM(&TM),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::ADDE, MVT::i64, Expand);
// Register custom handling for vector loads/stores
- for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT VT = (MVT::SimpleValueType) i;
+ for (MVT VT : MVT::vector_valuetypes()) {
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // PTX does not directly support SELP of i1, so promote to i32 first
+ setOperationAction(ISD::SELECT, MVT::i1, Custom);
+
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AND);
return "NVPTXISD::MUL_WIDE_SIGNED";
case NVPTXISD::MUL_WIDE_UNSIGNED:
return "NVPTXISD::MUL_WIDE_UNSIGNED";
- case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
+ case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
return "NVPTXISD::Tex1DFloatFloatLevel";
case NVPTXISD::Tex1DFloatFloatGrad:
return "NVPTXISD::Tex1DFloatFloatGrad";
- case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32";
- case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float";
- case NVPTXISD::Tex1DI32FloatLevel:
- return "NVPTXISD::Tex1DI32FloatLevel";
- case NVPTXISD::Tex1DI32FloatGrad:
- return "NVPTXISD::Tex1DI32FloatGrad";
- case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
- case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
+ case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
+ case NVPTXISD::Tex1DS32FloatLevel:
+ return "NVPTXISD::Tex1DS32FloatLevel";
+ case NVPTXISD::Tex1DS32FloatGrad:
+ return "NVPTXISD::Tex1DS32FloatGrad";
+ case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
+ case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
+ case NVPTXISD::Tex1DU32FloatLevel:
+ return "NVPTXISD::Tex1DU32FloatLevel";
+ case NVPTXISD::Tex1DU32FloatGrad:
+ return "NVPTXISD::Tex1DU32FloatGrad";
+ case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
+ case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
case NVPTXISD::Tex1DArrayFloatFloatLevel:
- return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ return "NVPTXISD::Tex1DArrayFloatFloatLevel";
case NVPTXISD::Tex1DArrayFloatFloatGrad:
- return "NVPTXISD::Tex2DArrayFloatFloatGrad";
- case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
- case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
- case NVPTXISD::Tex1DArrayI32FloatLevel:
- return "NVPTXISD::Tex2DArrayI32FloatLevel";
- case NVPTXISD::Tex1DArrayI32FloatGrad:
- return "NVPTXISD::Tex2DArrayI32FloatGrad";
- case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32";
+ return "NVPTXISD::Tex1DArrayFloatFloatGrad";
+ case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
+ case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
+ case NVPTXISD::Tex1DArrayS32FloatLevel:
+ return "NVPTXISD::Tex1DArrayS32FloatLevel";
+ case NVPTXISD::Tex1DArrayS32FloatGrad:
+ return "NVPTXISD::Tex1DArrayS32FloatGrad";
+ case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
+ case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
+ case NVPTXISD::Tex1DArrayU32FloatLevel:
+ return "NVPTXISD::Tex1DArrayU32FloatLevel";
+ case NVPTXISD::Tex1DArrayU32FloatGrad:
+ return "NVPTXISD::Tex1DArrayU32FloatGrad";
+ case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
case NVPTXISD::Tex2DFloatFloatLevel:
return "NVPTXISD::Tex2DFloatFloatLevel";
case NVPTXISD::Tex2DFloatFloatGrad:
return "NVPTXISD::Tex2DFloatFloatGrad";
- case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32";
- case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float";
- case NVPTXISD::Tex2DI32FloatLevel:
- return "NVPTXISD::Tex2DI32FloatLevel";
- case NVPTXISD::Tex2DI32FloatGrad:
- return "NVPTXISD::Tex2DI32FloatGrad";
- case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
+ case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
+ case NVPTXISD::Tex2DS32FloatLevel:
+ return "NVPTXISD::Tex2DS32FloatLevel";
+ case NVPTXISD::Tex2DS32FloatGrad:
+ return "NVPTXISD::Tex2DS32FloatGrad";
+ case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
+ case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
+ case NVPTXISD::Tex2DU32FloatLevel:
+ return "NVPTXISD::Tex2DU32FloatLevel";
+ case NVPTXISD::Tex2DU32FloatGrad:
+ return "NVPTXISD::Tex2DU32FloatGrad";
+ case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
case NVPTXISD::Tex2DArrayFloatFloatLevel:
return "NVPTXISD::Tex2DArrayFloatFloatLevel";
case NVPTXISD::Tex2DArrayFloatFloatGrad:
return "NVPTXISD::Tex2DArrayFloatFloatGrad";
- case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
- case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
- case NVPTXISD::Tex2DArrayI32FloatLevel:
- return "NVPTXISD::Tex2DArrayI32FloatLevel";
- case NVPTXISD::Tex2DArrayI32FloatGrad:
- return "NVPTXISD::Tex2DArrayI32FloatGrad";
- case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32";
+ case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
+ case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
+ case NVPTXISD::Tex2DArrayS32FloatLevel:
+ return "NVPTXISD::Tex2DArrayS32FloatLevel";
+ case NVPTXISD::Tex2DArrayS32FloatGrad:
+ return "NVPTXISD::Tex2DArrayS32FloatGrad";
+ case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
+ case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
+ case NVPTXISD::Tex2DArrayU32FloatLevel:
+ return "NVPTXISD::Tex2DArrayU32FloatLevel";
+ case NVPTXISD::Tex2DArrayU32FloatGrad:
+ return "NVPTXISD::Tex2DArrayU32FloatGrad";
+ case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
case NVPTXISD::Tex3DFloatFloatLevel:
return "NVPTXISD::Tex3DFloatFloatLevel";
case NVPTXISD::Tex3DFloatFloatGrad:
return "NVPTXISD::Tex3DFloatFloatGrad";
- case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32";
- case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float";
- case NVPTXISD::Tex3DI32FloatLevel:
- return "NVPTXISD::Tex3DI32FloatLevel";
- case NVPTXISD::Tex3DI32FloatGrad:
- return "NVPTXISD::Tex3DI32FloatGrad";
+ case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
+ case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
+ case NVPTXISD::Tex3DS32FloatLevel:
+ return "NVPTXISD::Tex3DS32FloatLevel";
+ case NVPTXISD::Tex3DS32FloatGrad:
+ return "NVPTXISD::Tex3DS32FloatGrad";
+ case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
+ case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
+ case NVPTXISD::Tex3DU32FloatLevel:
+ return "NVPTXISD::Tex3DU32FloatLevel";
+ case NVPTXISD::Tex3DU32FloatGrad:
+ return "NVPTXISD::Tex3DU32FloatGrad";
+ case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
+ case NVPTXISD::TexCubeFloatFloatLevel:
+ return "NVPTXISD::TexCubeFloatFloatLevel";
+ case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
+ case NVPTXISD::TexCubeS32FloatLevel:
+ return "NVPTXISD::TexCubeS32FloatLevel";
+ case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
+ case NVPTXISD::TexCubeU32FloatLevel:
+ return "NVPTXISD::TexCubeU32FloatLevel";
+ case NVPTXISD::TexCubeArrayFloatFloat:
+ return "NVPTXISD::TexCubeArrayFloatFloat";
+ case NVPTXISD::TexCubeArrayFloatFloatLevel:
+ return "NVPTXISD::TexCubeArrayFloatFloatLevel";
+ case NVPTXISD::TexCubeArrayS32Float:
+ return "NVPTXISD::TexCubeArrayS32Float";
+ case NVPTXISD::TexCubeArrayS32FloatLevel:
+ return "NVPTXISD::TexCubeArrayS32FloatLevel";
+ case NVPTXISD::TexCubeArrayU32Float:
+ return "NVPTXISD::TexCubeArrayU32Float";
+ case NVPTXISD::TexCubeArrayU32FloatLevel:
+ return "NVPTXISD::TexCubeArrayU32FloatLevel";
+ case NVPTXISD::Tld4R2DFloatFloat:
+ return "NVPTXISD::Tld4R2DFloatFloat";
+ case NVPTXISD::Tld4G2DFloatFloat:
+ return "NVPTXISD::Tld4G2DFloatFloat";
+ case NVPTXISD::Tld4B2DFloatFloat:
+ return "NVPTXISD::Tld4B2DFloatFloat";
+ case NVPTXISD::Tld4A2DFloatFloat:
+ return "NVPTXISD::Tld4A2DFloatFloat";
+ case NVPTXISD::Tld4R2DS64Float:
+ return "NVPTXISD::Tld4R2DS64Float";
+ case NVPTXISD::Tld4G2DS64Float:
+ return "NVPTXISD::Tld4G2DS64Float";
+ case NVPTXISD::Tld4B2DS64Float:
+ return "NVPTXISD::Tld4B2DS64Float";
+ case NVPTXISD::Tld4A2DS64Float:
+ return "NVPTXISD::Tld4A2DS64Float";
+ case NVPTXISD::Tld4R2DU64Float:
+ return "NVPTXISD::Tld4R2DU64Float";
+ case NVPTXISD::Tld4G2DU64Float:
+ return "NVPTXISD::Tld4G2DU64Float";
+ case NVPTXISD::Tld4B2DU64Float:
+ return "NVPTXISD::Tld4B2DU64Float";
+ case NVPTXISD::Tld4A2DU64Float:
+ return "NVPTXISD::Tld4A2DU64Float";
+
+ case NVPTXISD::TexUnified1DFloatS32:
+ return "NVPTXISD::TexUnified1DFloatS32";
+ case NVPTXISD::TexUnified1DFloatFloat:
+ return "NVPTXISD::TexUnified1DFloatFloat";
+ case NVPTXISD::TexUnified1DFloatFloatLevel:
+ return "NVPTXISD::TexUnified1DFloatFloatLevel";
+ case NVPTXISD::TexUnified1DFloatFloatGrad:
+ return "NVPTXISD::TexUnified1DFloatFloatGrad";
+ case NVPTXISD::TexUnified1DS32S32:
+ return "NVPTXISD::TexUnified1DS32S32";
+ case NVPTXISD::TexUnified1DS32Float:
+ return "NVPTXISD::TexUnified1DS32Float";
+ case NVPTXISD::TexUnified1DS32FloatLevel:
+ return "NVPTXISD::TexUnified1DS32FloatLevel";
+ case NVPTXISD::TexUnified1DS32FloatGrad:
+ return "NVPTXISD::TexUnified1DS32FloatGrad";
+ case NVPTXISD::TexUnified1DU32S32:
+ return "NVPTXISD::TexUnified1DU32S32";
+ case NVPTXISD::TexUnified1DU32Float:
+ return "NVPTXISD::TexUnified1DU32Float";
+ case NVPTXISD::TexUnified1DU32FloatLevel:
+ return "NVPTXISD::TexUnified1DU32FloatLevel";
+ case NVPTXISD::TexUnified1DU32FloatGrad:
+ return "NVPTXISD::TexUnified1DU32FloatGrad";
+ case NVPTXISD::TexUnified1DArrayFloatS32:
+ return "NVPTXISD::TexUnified1DArrayFloatS32";
+ case NVPTXISD::TexUnified1DArrayFloatFloat:
+ return "NVPTXISD::TexUnified1DArrayFloatFloat";
+ case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
+ case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
+ case NVPTXISD::TexUnified1DArrayS32S32:
+ return "NVPTXISD::TexUnified1DArrayS32S32";
+ case NVPTXISD::TexUnified1DArrayS32Float:
+ return "NVPTXISD::TexUnified1DArrayS32Float";
+ case NVPTXISD::TexUnified1DArrayS32FloatLevel:
+ return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
+ case NVPTXISD::TexUnified1DArrayS32FloatGrad:
+ return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
+ case NVPTXISD::TexUnified1DArrayU32S32:
+ return "NVPTXISD::TexUnified1DArrayU32S32";
+ case NVPTXISD::TexUnified1DArrayU32Float:
+ return "NVPTXISD::TexUnified1DArrayU32Float";
+ case NVPTXISD::TexUnified1DArrayU32FloatLevel:
+ return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
+ case NVPTXISD::TexUnified1DArrayU32FloatGrad:
+ return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
+ case NVPTXISD::TexUnified2DFloatS32:
+ return "NVPTXISD::TexUnified2DFloatS32";
+ case NVPTXISD::TexUnified2DFloatFloat:
+ return "NVPTXISD::TexUnified2DFloatFloat";
+ case NVPTXISD::TexUnified2DFloatFloatLevel:
+ return "NVPTXISD::TexUnified2DFloatFloatLevel";
+ case NVPTXISD::TexUnified2DFloatFloatGrad:
+ return "NVPTXISD::TexUnified2DFloatFloatGrad";
+ case NVPTXISD::TexUnified2DS32S32:
+ return "NVPTXISD::TexUnified2DS32S32";
+ case NVPTXISD::TexUnified2DS32Float:
+ return "NVPTXISD::TexUnified2DS32Float";
+ case NVPTXISD::TexUnified2DS32FloatLevel:
+ return "NVPTXISD::TexUnified2DS32FloatLevel";
+ case NVPTXISD::TexUnified2DS32FloatGrad:
+ return "NVPTXISD::TexUnified2DS32FloatGrad";
+ case NVPTXISD::TexUnified2DU32S32:
+ return "NVPTXISD::TexUnified2DU32S32";
+ case NVPTXISD::TexUnified2DU32Float:
+ return "NVPTXISD::TexUnified2DU32Float";
+ case NVPTXISD::TexUnified2DU32FloatLevel:
+ return "NVPTXISD::TexUnified2DU32FloatLevel";
+ case NVPTXISD::TexUnified2DU32FloatGrad:
+ return "NVPTXISD::TexUnified2DU32FloatGrad";
+ case NVPTXISD::TexUnified2DArrayFloatS32:
+ return "NVPTXISD::TexUnified2DArrayFloatS32";
+ case NVPTXISD::TexUnified2DArrayFloatFloat:
+ return "NVPTXISD::TexUnified2DArrayFloatFloat";
+ case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
+ case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
+ case NVPTXISD::TexUnified2DArrayS32S32:
+ return "NVPTXISD::TexUnified2DArrayS32S32";
+ case NVPTXISD::TexUnified2DArrayS32Float:
+ return "NVPTXISD::TexUnified2DArrayS32Float";
+ case NVPTXISD::TexUnified2DArrayS32FloatLevel:
+ return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
+ case NVPTXISD::TexUnified2DArrayS32FloatGrad:
+ return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
+ case NVPTXISD::TexUnified2DArrayU32S32:
+ return "NVPTXISD::TexUnified2DArrayU32S32";
+ case NVPTXISD::TexUnified2DArrayU32Float:
+ return "NVPTXISD::TexUnified2DArrayU32Float";
+ case NVPTXISD::TexUnified2DArrayU32FloatLevel:
+ return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
+ case NVPTXISD::TexUnified2DArrayU32FloatGrad:
+ return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
+ case NVPTXISD::TexUnified3DFloatS32:
+ return "NVPTXISD::TexUnified3DFloatS32";
+ case NVPTXISD::TexUnified3DFloatFloat:
+ return "NVPTXISD::TexUnified3DFloatFloat";
+ case NVPTXISD::TexUnified3DFloatFloatLevel:
+ return "NVPTXISD::TexUnified3DFloatFloatLevel";
+ case NVPTXISD::TexUnified3DFloatFloatGrad:
+ return "NVPTXISD::TexUnified3DFloatFloatGrad";
+ case NVPTXISD::TexUnified3DS32S32:
+ return "NVPTXISD::TexUnified3DS32S32";
+ case NVPTXISD::TexUnified3DS32Float:
+ return "NVPTXISD::TexUnified3DS32Float";
+ case NVPTXISD::TexUnified3DS32FloatLevel:
+ return "NVPTXISD::TexUnified3DS32FloatLevel";
+ case NVPTXISD::TexUnified3DS32FloatGrad:
+ return "NVPTXISD::TexUnified3DS32FloatGrad";
+ case NVPTXISD::TexUnified3DU32S32:
+ return "NVPTXISD::TexUnified3DU32S32";
+ case NVPTXISD::TexUnified3DU32Float:
+ return "NVPTXISD::TexUnified3DU32Float";
+ case NVPTXISD::TexUnified3DU32FloatLevel:
+ return "NVPTXISD::TexUnified3DU32FloatLevel";
+ case NVPTXISD::TexUnified3DU32FloatGrad:
+ return "NVPTXISD::TexUnified3DU32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeFloatFloat:
+ return "NVPTXISD::TexUnifiedCubeFloatFloat";
+ case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
+ return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
+ case NVPTXISD::TexUnifiedCubeS32Float:
+ return "NVPTXISD::TexUnifiedCubeS32Float";
+ case NVPTXISD::TexUnifiedCubeS32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeU32Float:
+ return "NVPTXISD::TexUnifiedCubeU32Float";
+ case NVPTXISD::TexUnifiedCubeU32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayS32Float:
+ return "NVPTXISD::TexUnifiedCubeArrayS32Float";
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayU32Float:
+ return "NVPTXISD::TexUnifiedCubeArrayU32Float";
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
+ case NVPTXISD::Tld4UnifiedR2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedG2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedB2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedA2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedR2DS64Float:
+ return "NVPTXISD::Tld4UnifiedR2DS64Float";
+ case NVPTXISD::Tld4UnifiedG2DS64Float:
+ return "NVPTXISD::Tld4UnifiedG2DS64Float";
+ case NVPTXISD::Tld4UnifiedB2DS64Float:
+ return "NVPTXISD::Tld4UnifiedB2DS64Float";
+ case NVPTXISD::Tld4UnifiedA2DS64Float:
+ return "NVPTXISD::Tld4UnifiedA2DS64Float";
+ case NVPTXISD::Tld4UnifiedR2DU64Float:
+ return "NVPTXISD::Tld4UnifiedR2DU64Float";
+ case NVPTXISD::Tld4UnifiedG2DU64Float:
+ return "NVPTXISD::Tld4UnifiedG2DU64Float";
+ case NVPTXISD::Tld4UnifiedB2DU64Float:
+ return "NVPTXISD::Tld4UnifiedB2DU64Float";
+ case NVPTXISD::Tld4UnifiedA2DU64Float:
+ return "NVPTXISD::Tld4UnifiedA2DU64Float";
+
+ case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
+ case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
+ case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
+ case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
+ case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
+ case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
+ case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
+ case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
+ case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
+ case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
+ case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
+
+ case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
+ case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
+ case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
+ case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
+ case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
+ case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
+ case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
+ case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
+ case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
+ case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
+ case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
+
+ case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
+ case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
+ case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
+ case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
+ case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
+ case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
+ case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
+ case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
+ case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
+ case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
+ case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
+
+ case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
+ case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
+ case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
+ case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
+ case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
+ case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
+ case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
+ case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
+ case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
+ case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
+ case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
+
+ case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
+ case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
+ case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
+ case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
+ case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
+ case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
+ case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
+ case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
+ case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
+ case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
+ case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
+ case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
+ case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
+ case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
+ case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
+ case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
+ case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
+ case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
+ case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
+ case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
+ case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
+
+ case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
+ case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
+ case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
+ case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
+ case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
+ case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
+ case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
+ case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
+ case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
+ case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
+ case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
+
+ case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
+ case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
+ case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
+ case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
+ case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
+ case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
+ case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
+ case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
+ case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
+ case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
+ case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
+
+ case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
+ case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
+ case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
+ case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
+ case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
+ case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
+ case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
+ case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
+ case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
+ case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
+ case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
+
+ case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
+ case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
+ case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
+ case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
+ case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
+ case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
+ case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
+ case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
+ case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
+ case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
+ case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
+
+ case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
+ case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
+ case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
+ case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
+ case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
+ case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
+ case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
+ case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
+ case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
+ case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
+ case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
}
}
-bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
- return VT.getScalarType() == MVT::i1;
+TargetLoweringBase::LegalizeTypeAction
+NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
+ return TypeSplitVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
}
SDValue
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ } else if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
- if((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
- } else {
- assert(false && "Unknown return type");
- }
+ llvm_unreachable("Unknown return type");
}
O << ") ";
}
InFlag = Chain.getValue(1);
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
EVT elemtype = vtparts[j];
- unsigned ArgAlign = GCD(align, Offsets[j]);
+ unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
if (elemtype.isInteger() && (sz < 8))
sz = 8;
SDValue StVal = OutVals[OIdx];
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
EVT elemtype = vtparts[j];
int curOffset = Offsets[j];
- unsigned PartAlign = GCD(ArgAlign, curOffset);
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
SDValue srcAddr =
DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
DAG.getConstant(curOffset, getPointerTy()));
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
- if (retTy->isSingleValueType()) {
+ // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
+ // these three types to match the logic in
+ // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
+ // Plus, this behavior is consistent with nvcc's.
+ if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
+ assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
+ F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
bool needTruncate = sz < 8 ? true : false;
unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
- unsigned AlignI = GCD(RetAlign, Offsets[i]);
+ unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
bool needTruncate = sz < 8 ? true : false;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);
+ case ISD::SELECT:
+ return LowerSelect(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Op0 = Op->getOperand(0);
+ SDValue Op1 = Op->getOperand(1);
+ SDValue Op2 = Op->getOperand(2);
+ SDLoc DL(Op.getNode());
+
+ assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
+
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+ SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
+
+ return Trunc;
+}
+
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
break;
}
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ const DataLayout *TD = getDataLayout();
+
+ unsigned Align = MemSD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This store is not sufficiently aligned, so bail out and let this vector
+ // store be scalarized. Note that we may still be able to emit smaller
+ // vector stores. For example, if we are storing a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return SDValue();
+ }
+
unsigned Opcode = 0;
EVT EltVT = ValVT.getVectorElementType();
unsigned NumElts = ValVT.getVectorNumElements();
Ops.push_back(N->getOperand(i));
}
- MemSDNode *MemSD = cast<MemSDNode>(N);
-
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
MemSD->getMemoryVT(), MemSD->getMemOperand());
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
+ const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
- EVT partVT = vtparts[parti];
- InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT));
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
++InsIdx;
}
if (vtparts.size() > 0)
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
MachinePointerInfo(srcValue), partVT, false,
- false, partAlign);
+ false, false, partAlign);
} else {
p = DAG.getLoad(partVT, dl, Root, srcAddr,
MachinePointerInfo(srcValue), false, false, false,
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
- unsigned Ofst = 0;
EVT EltVT = ObjectVT.getVectorElementType();
// V1 load
// We only have one element, so just directly load it
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
InVals.push_back(P);
- Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
++InsIdx;
} else if (NumElts == 2) {
// V2 load
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
InVals.push_back(Elt0);
InVals.push_back(Elt1);
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
InsIdx += 2;
} else {
// V4 loads
VecSize = 2;
}
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
+ unsigned Ofst = 0;
for (unsigned i = 0; i < NumElts; i += VecSize) {
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
MachinePointerInfo(srcValue), ObjectVT, false, false,
+ false,
TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
unsigned Offset = 0;
EVT VecVT =
- EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize);
+ EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
} else {
SmallVector<EVT, 16> ValVTs;
- // const_cast is necessary since we are still using an LLVM version from
- // before the type system re-write.
- ComputePTXValueVTs(*this, RetTy, ValVTs);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
- unsigned SizeSoFar = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
EVT TheValType = theVal.getValueType();
else if (TmpVal.getValueType().getSizeInBits() < 16)
TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
- SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
+ SDValue Ops[] = {
+ Chain,
+ DAG.getConstant(Offsets[i], MVT::i32),
+ TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
DAG.getVTList(MVT::Other), Ops,
TheStoreType,
MachinePointerInfo());
- if(TheValType.isVector())
- SizeSoFar +=
- TheStoreType.getVectorElementType().getStoreSizeInBits() / 8;
- else
- SizeSoFar += TheStoreType.getStoreSizeInBits()/8;
}
}
}
default:
return 0;
- case Intrinsic::nvvm_tex_1d_v4f32_i32:
- return NVPTXISD::Tex1DFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4f32_s32:
+ return NVPTXISD::Tex1DFloatS32;
case Intrinsic::nvvm_tex_1d_v4f32_f32:
return NVPTXISD::Tex1DFloatFloat;
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatGrad;
- case Intrinsic::nvvm_tex_1d_v4i32_i32:
- return NVPTXISD::Tex1DI32I32;
- case Intrinsic::nvvm_tex_1d_v4i32_f32:
- return NVPTXISD::Tex1DI32Float;
- case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
- return NVPTXISD::Tex1DI32FloatLevel;
- case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
- return NVPTXISD::Tex1DI32FloatGrad;
-
- case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
- return NVPTXISD::Tex1DArrayFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4s32_s32:
+ return NVPTXISD::Tex1DS32S32;
+ case Intrinsic::nvvm_tex_1d_v4s32_f32:
+ return NVPTXISD::Tex1DS32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
+ return NVPTXISD::Tex1DS32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
+ return NVPTXISD::Tex1DS32FloatGrad;
+ case Intrinsic::nvvm_tex_1d_v4u32_s32:
+ return NVPTXISD::Tex1DU32S32;
+ case Intrinsic::nvvm_tex_1d_v4u32_f32:
+ return NVPTXISD::Tex1DU32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
+ return NVPTXISD::Tex1DU32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
+ return NVPTXISD::Tex1DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
+ return NVPTXISD::Tex1DArrayFloatS32;
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloat;
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatGrad;
- case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
- return NVPTXISD::Tex1DArrayI32I32;
- case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32Float;
- case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32FloatLevel;
- case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32FloatGrad;
-
- case Intrinsic::nvvm_tex_2d_v4f32_i32:
- return NVPTXISD::Tex2DFloatI32;
+ case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
+ return NVPTXISD::Tex1DArrayS32S32;
+ case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
+ return NVPTXISD::Tex1DArrayU32S32;
+ case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_v4f32_s32:
+ return NVPTXISD::Tex2DFloatS32;
case Intrinsic::nvvm_tex_2d_v4f32_f32:
return NVPTXISD::Tex2DFloatFloat;
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatGrad;
- case Intrinsic::nvvm_tex_2d_v4i32_i32:
- return NVPTXISD::Tex2DI32I32;
- case Intrinsic::nvvm_tex_2d_v4i32_f32:
- return NVPTXISD::Tex2DI32Float;
- case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
- return NVPTXISD::Tex2DI32FloatLevel;
- case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
- return NVPTXISD::Tex2DI32FloatGrad;
-
- case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
- return NVPTXISD::Tex2DArrayFloatI32;
+ case Intrinsic::nvvm_tex_2d_v4s32_s32:
+ return NVPTXISD::Tex2DS32S32;
+ case Intrinsic::nvvm_tex_2d_v4s32_f32:
+ return NVPTXISD::Tex2DS32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
+ return NVPTXISD::Tex2DS32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
+ return NVPTXISD::Tex2DS32FloatGrad;
+ case Intrinsic::nvvm_tex_2d_v4u32_s32:
+ return NVPTXISD::Tex2DU32S32;
+ case Intrinsic::nvvm_tex_2d_v4u32_f32:
+ return NVPTXISD::Tex2DU32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
+ return NVPTXISD::Tex2DU32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
+ return NVPTXISD::Tex2DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
+ return NVPTXISD::Tex2DArrayFloatS32;
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloat;
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatGrad;
- case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
- return NVPTXISD::Tex2DArrayI32I32;
- case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32Float;
- case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32FloatLevel;
- case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32FloatGrad;
-
- case Intrinsic::nvvm_tex_3d_v4f32_i32:
- return NVPTXISD::Tex3DFloatI32;
+ case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
+ return NVPTXISD::Tex2DArrayS32S32;
+ case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
+ return NVPTXISD::Tex2DArrayU32S32;
+ case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_3d_v4f32_s32:
+ return NVPTXISD::Tex3DFloatS32;
case Intrinsic::nvvm_tex_3d_v4f32_f32:
return NVPTXISD::Tex3DFloatFloat;
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatLevel;
case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatGrad;
- case Intrinsic::nvvm_tex_3d_v4i32_i32:
- return NVPTXISD::Tex3DI32I32;
- case Intrinsic::nvvm_tex_3d_v4i32_f32:
- return NVPTXISD::Tex3DI32Float;
- case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
- return NVPTXISD::Tex3DI32FloatLevel;
- case Intrinsic::nvvm_tex_3d_grad_v4i32_f32:
- return NVPTXISD::Tex3DI32FloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4s32_s32:
+ return NVPTXISD::Tex3DS32S32;
+ case Intrinsic::nvvm_tex_3d_v4s32_f32:
+ return NVPTXISD::Tex3DS32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
+ return NVPTXISD::Tex3DS32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
+ return NVPTXISD::Tex3DS32FloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4u32_s32:
+ return NVPTXISD::Tex3DU32S32;
+ case Intrinsic::nvvm_tex_3d_v4u32_f32:
+ return NVPTXISD::Tex3DU32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
+ return NVPTXISD::Tex3DU32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
+ return NVPTXISD::Tex3DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_cube_v4f32_f32:
+ return NVPTXISD::TexCubeFloatFloat;
+ case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
+ return NVPTXISD::TexCubeFloatFloatLevel;
+ case Intrinsic::nvvm_tex_cube_v4s32_f32:
+ return NVPTXISD::TexCubeS32Float;
+ case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
+ return NVPTXISD::TexCubeS32FloatLevel;
+ case Intrinsic::nvvm_tex_cube_v4u32_f32:
+ return NVPTXISD::TexCubeU32Float;
+ case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
+ return NVPTXISD::TexCubeU32FloatLevel;
+
+ case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
+ return NVPTXISD::TexCubeArrayFloatFloat;
+ case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
+ return NVPTXISD::TexCubeArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
+ return NVPTXISD::TexCubeArrayS32Float;
+ case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
+ return NVPTXISD::TexCubeArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
+ return NVPTXISD::TexCubeArrayU32Float;
+ case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
+ return NVPTXISD::TexCubeArrayU32FloatLevel;
+
+ case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
+ return NVPTXISD::Tld4R2DFloatFloat;
+ case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
+ return NVPTXISD::Tld4G2DFloatFloat;
+ case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
+ return NVPTXISD::Tld4B2DFloatFloat;
+ case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
+ return NVPTXISD::Tld4A2DFloatFloat;
+ case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
+ return NVPTXISD::Tld4R2DS64Float;
+ case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
+ return NVPTXISD::Tld4G2DS64Float;
+ case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
+ return NVPTXISD::Tld4B2DS64Float;
+ case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
+ return NVPTXISD::Tld4A2DS64Float;
+ case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
+ return NVPTXISD::Tld4R2DU64Float;
+ case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
+ return NVPTXISD::Tld4G2DU64Float;
+ case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
+ return NVPTXISD::Tld4B2DU64Float;
+ case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
+ return NVPTXISD::Tld4A2DU64Float;
+
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
+ return NVPTXISD::TexUnified1DFloatS32;
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
+ return NVPTXISD::TexUnified1DS32S32;
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32Float;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
+ return NVPTXISD::TexUnified1DU32S32;
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32Float;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
+ return NVPTXISD::TexUnified1DArrayFloatS32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
+ return NVPTXISD::TexUnified1DArrayS32S32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
+ return NVPTXISD::TexUnified1DArrayU32S32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
+ return NVPTXISD::TexUnified2DFloatS32;
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
+ return NVPTXISD::TexUnified2DS32S32;
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32Float;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
+ return NVPTXISD::TexUnified2DU32S32;
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32Float;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
+ return NVPTXISD::TexUnified2DArrayFloatS32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
+ return NVPTXISD::TexUnified2DArrayS32S32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
+ return NVPTXISD::TexUnified2DArrayU32S32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
+ return NVPTXISD::TexUnified3DFloatS32;
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
+ return NVPTXISD::TexUnified3DS32S32;
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32Float;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
+ return NVPTXISD::TexUnified3DU32S32;
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32Float;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloat;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32Float;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32Float;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32FloatLevel;
+
+ case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
+
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedR2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedG2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedB2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedA2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedR2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedG2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedB2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedA2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedR2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedG2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedB2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedA2DU64Float;
}
}
switch (Intrinsic) {
default:
return 0;
+ case Intrinsic::nvvm_suld_1d_i8_clamp:
+ return NVPTXISD::Suld1DI8Clamp;
+ case Intrinsic::nvvm_suld_1d_i16_clamp:
+ return NVPTXISD::Suld1DI16Clamp;
+ case Intrinsic::nvvm_suld_1d_i32_clamp:
+ return NVPTXISD::Suld1DI32Clamp;
+ case Intrinsic::nvvm_suld_1d_i64_clamp:
+ return NVPTXISD::Suld1DI64Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i8_clamp:
+ return NVPTXISD::Suld1DV2I8Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i16_clamp:
+ return NVPTXISD::Suld1DV2I16Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i32_clamp:
+ return NVPTXISD::Suld1DV2I32Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i64_clamp:
+ return NVPTXISD::Suld1DV2I64Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i8_clamp:
+ return NVPTXISD::Suld1DV4I8Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i16_clamp:
+ return NVPTXISD::Suld1DV4I16Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i32_clamp:
+ return NVPTXISD::Suld1DV4I32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i8_clamp:
+ return NVPTXISD::Suld1DArrayI8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i16_clamp:
+ return NVPTXISD::Suld1DArrayI16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i32_clamp:
+ return NVPTXISD::Suld1DArrayI32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i64_clamp:
+ return NVPTXISD::Suld1DArrayI64Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
+ return NVPTXISD::Suld1DArrayV2I8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
+ return NVPTXISD::Suld1DArrayV2I16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
+ return NVPTXISD::Suld1DArrayV2I32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
+ return NVPTXISD::Suld1DArrayV2I64Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
+ return NVPTXISD::Suld1DArrayV4I8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
+ return NVPTXISD::Suld1DArrayV4I16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
+ return NVPTXISD::Suld1DArrayV4I32Clamp;
+ case Intrinsic::nvvm_suld_2d_i8_clamp:
+ return NVPTXISD::Suld2DI8Clamp;
+ case Intrinsic::nvvm_suld_2d_i16_clamp:
+ return NVPTXISD::Suld2DI16Clamp;
+ case Intrinsic::nvvm_suld_2d_i32_clamp:
+ return NVPTXISD::Suld2DI32Clamp;
+ case Intrinsic::nvvm_suld_2d_i64_clamp:
+ return NVPTXISD::Suld2DI64Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i8_clamp:
+ return NVPTXISD::Suld2DV2I8Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i16_clamp:
+ return NVPTXISD::Suld2DV2I16Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i32_clamp:
+ return NVPTXISD::Suld2DV2I32Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i64_clamp:
+ return NVPTXISD::Suld2DV2I64Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i8_clamp:
+ return NVPTXISD::Suld2DV4I8Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i16_clamp:
+ return NVPTXISD::Suld2DV4I16Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i32_clamp:
+ return NVPTXISD::Suld2DV4I32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i8_clamp:
+ return NVPTXISD::Suld2DArrayI8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i16_clamp:
+ return NVPTXISD::Suld2DArrayI16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i32_clamp:
+ return NVPTXISD::Suld2DArrayI32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i64_clamp:
+ return NVPTXISD::Suld2DArrayI64Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
+ return NVPTXISD::Suld2DArrayV2I8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
+ return NVPTXISD::Suld2DArrayV2I16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
+ return NVPTXISD::Suld2DArrayV2I32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
+ return NVPTXISD::Suld2DArrayV2I64Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
+ return NVPTXISD::Suld2DArrayV4I8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
+ return NVPTXISD::Suld2DArrayV4I16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
+ return NVPTXISD::Suld2DArrayV4I32Clamp;
+ case Intrinsic::nvvm_suld_3d_i8_clamp:
+ return NVPTXISD::Suld3DI8Clamp;
+ case Intrinsic::nvvm_suld_3d_i16_clamp:
+ return NVPTXISD::Suld3DI16Clamp;
+ case Intrinsic::nvvm_suld_3d_i32_clamp:
+ return NVPTXISD::Suld3DI32Clamp;
+ case Intrinsic::nvvm_suld_3d_i64_clamp:
+ return NVPTXISD::Suld3DI64Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i8_clamp:
+ return NVPTXISD::Suld3DV2I8Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i16_clamp:
+ return NVPTXISD::Suld3DV2I16Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i32_clamp:
+ return NVPTXISD::Suld3DV2I32Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i64_clamp:
+ return NVPTXISD::Suld3DV2I64Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i8_clamp:
+ return NVPTXISD::Suld3DV4I8Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i16_clamp:
+ return NVPTXISD::Suld3DV4I16Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i32_clamp:
+ return NVPTXISD::Suld3DV4I32Clamp;
case Intrinsic::nvvm_suld_1d_i8_trap:
return NVPTXISD::Suld1DI8Trap;
case Intrinsic::nvvm_suld_1d_i16_trap:
return NVPTXISD::Suld1DI16Trap;
case Intrinsic::nvvm_suld_1d_i32_trap:
return NVPTXISD::Suld1DI32Trap;
+ case Intrinsic::nvvm_suld_1d_i64_trap:
+ return NVPTXISD::Suld1DI64Trap;
case Intrinsic::nvvm_suld_1d_v2i8_trap:
return NVPTXISD::Suld1DV2I8Trap;
case Intrinsic::nvvm_suld_1d_v2i16_trap:
return NVPTXISD::Suld1DV2I16Trap;
case Intrinsic::nvvm_suld_1d_v2i32_trap:
return NVPTXISD::Suld1DV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_v2i64_trap:
+ return NVPTXISD::Suld1DV2I64Trap;
case Intrinsic::nvvm_suld_1d_v4i8_trap:
return NVPTXISD::Suld1DV4I8Trap;
case Intrinsic::nvvm_suld_1d_v4i16_trap:
return NVPTXISD::Suld1DArrayI16Trap;
case Intrinsic::nvvm_suld_1d_array_i32_trap:
return NVPTXISD::Suld1DArrayI32Trap;
+ case Intrinsic::nvvm_suld_1d_array_i64_trap:
+ return NVPTXISD::Suld1DArrayI64Trap;
case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
return NVPTXISD::Suld1DArrayV2I8Trap;
case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
return NVPTXISD::Suld1DArrayV2I16Trap;
case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
return NVPTXISD::Suld1DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
+ return NVPTXISD::Suld1DArrayV2I64Trap;
case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
return NVPTXISD::Suld1DArrayV4I8Trap;
case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
return NVPTXISD::Suld2DI16Trap;
case Intrinsic::nvvm_suld_2d_i32_trap:
return NVPTXISD::Suld2DI32Trap;
+ case Intrinsic::nvvm_suld_2d_i64_trap:
+ return NVPTXISD::Suld2DI64Trap;
case Intrinsic::nvvm_suld_2d_v2i8_trap:
return NVPTXISD::Suld2DV2I8Trap;
case Intrinsic::nvvm_suld_2d_v2i16_trap:
return NVPTXISD::Suld2DV2I16Trap;
case Intrinsic::nvvm_suld_2d_v2i32_trap:
return NVPTXISD::Suld2DV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_v2i64_trap:
+ return NVPTXISD::Suld2DV2I64Trap;
case Intrinsic::nvvm_suld_2d_v4i8_trap:
return NVPTXISD::Suld2DV4I8Trap;
case Intrinsic::nvvm_suld_2d_v4i16_trap:
return NVPTXISD::Suld2DArrayI16Trap;
case Intrinsic::nvvm_suld_2d_array_i32_trap:
return NVPTXISD::Suld2DArrayI32Trap;
+ case Intrinsic::nvvm_suld_2d_array_i64_trap:
+ return NVPTXISD::Suld2DArrayI64Trap;
case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
return NVPTXISD::Suld2DArrayV2I8Trap;
case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
return NVPTXISD::Suld2DArrayV2I16Trap;
case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
return NVPTXISD::Suld2DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
+ return NVPTXISD::Suld2DArrayV2I64Trap;
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
return NVPTXISD::Suld2DArrayV4I8Trap;
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
return NVPTXISD::Suld3DI16Trap;
case Intrinsic::nvvm_suld_3d_i32_trap:
return NVPTXISD::Suld3DI32Trap;
+ case Intrinsic::nvvm_suld_3d_i64_trap:
+ return NVPTXISD::Suld3DI64Trap;
case Intrinsic::nvvm_suld_3d_v2i8_trap:
return NVPTXISD::Suld3DV2I8Trap;
case Intrinsic::nvvm_suld_3d_v2i16_trap:
return NVPTXISD::Suld3DV2I16Trap;
case Intrinsic::nvvm_suld_3d_v2i32_trap:
return NVPTXISD::Suld3DV2I32Trap;
+ case Intrinsic::nvvm_suld_3d_v2i64_trap:
+ return NVPTXISD::Suld3DV2I64Trap;
case Intrinsic::nvvm_suld_3d_v4i8_trap:
return NVPTXISD::Suld3DV4I8Trap;
case Intrinsic::nvvm_suld_3d_v4i16_trap:
return NVPTXISD::Suld3DV4I16Trap;
case Intrinsic::nvvm_suld_3d_v4i32_trap:
return NVPTXISD::Suld3DV4I32Trap;
+ case Intrinsic::nvvm_suld_1d_i8_zero:
+ return NVPTXISD::Suld1DI8Zero;
+ case Intrinsic::nvvm_suld_1d_i16_zero:
+ return NVPTXISD::Suld1DI16Zero;
+ case Intrinsic::nvvm_suld_1d_i32_zero:
+ return NVPTXISD::Suld1DI32Zero;
+ case Intrinsic::nvvm_suld_1d_i64_zero:
+ return NVPTXISD::Suld1DI64Zero;
+ case Intrinsic::nvvm_suld_1d_v2i8_zero:
+ return NVPTXISD::Suld1DV2I8Zero;
+ case Intrinsic::nvvm_suld_1d_v2i16_zero:
+ return NVPTXISD::Suld1DV2I16Zero;
+ case Intrinsic::nvvm_suld_1d_v2i32_zero:
+ return NVPTXISD::Suld1DV2I32Zero;
+ case Intrinsic::nvvm_suld_1d_v2i64_zero:
+ return NVPTXISD::Suld1DV2I64Zero;
+ case Intrinsic::nvvm_suld_1d_v4i8_zero:
+ return NVPTXISD::Suld1DV4I8Zero;
+ case Intrinsic::nvvm_suld_1d_v4i16_zero:
+ return NVPTXISD::Suld1DV4I16Zero;
+ case Intrinsic::nvvm_suld_1d_v4i32_zero:
+ return NVPTXISD::Suld1DV4I32Zero;
+ case Intrinsic::nvvm_suld_1d_array_i8_zero:
+ return NVPTXISD::Suld1DArrayI8Zero;
+ case Intrinsic::nvvm_suld_1d_array_i16_zero:
+ return NVPTXISD::Suld1DArrayI16Zero;
+ case Intrinsic::nvvm_suld_1d_array_i32_zero:
+ return NVPTXISD::Suld1DArrayI32Zero;
+ case Intrinsic::nvvm_suld_1d_array_i64_zero:
+ return NVPTXISD::Suld1DArrayI64Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
+ return NVPTXISD::Suld1DArrayV2I8Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
+ return NVPTXISD::Suld1DArrayV2I16Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
+ return NVPTXISD::Suld1DArrayV2I32Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
+ return NVPTXISD::Suld1DArrayV2I64Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
+ return NVPTXISD::Suld1DArrayV4I8Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
+ return NVPTXISD::Suld1DArrayV4I16Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
+ return NVPTXISD::Suld1DArrayV4I32Zero;
+ case Intrinsic::nvvm_suld_2d_i8_zero:
+ return NVPTXISD::Suld2DI8Zero;
+ case Intrinsic::nvvm_suld_2d_i16_zero:
+ return NVPTXISD::Suld2DI16Zero;
+ case Intrinsic::nvvm_suld_2d_i32_zero:
+ return NVPTXISD::Suld2DI32Zero;
+ case Intrinsic::nvvm_suld_2d_i64_zero:
+ return NVPTXISD::Suld2DI64Zero;
+ case Intrinsic::nvvm_suld_2d_v2i8_zero:
+ return NVPTXISD::Suld2DV2I8Zero;
+ case Intrinsic::nvvm_suld_2d_v2i16_zero:
+ return NVPTXISD::Suld2DV2I16Zero;
+ case Intrinsic::nvvm_suld_2d_v2i32_zero:
+ return NVPTXISD::Suld2DV2I32Zero;
+ case Intrinsic::nvvm_suld_2d_v2i64_zero:
+ return NVPTXISD::Suld2DV2I64Zero;
+ case Intrinsic::nvvm_suld_2d_v4i8_zero:
+ return NVPTXISD::Suld2DV4I8Zero;
+ case Intrinsic::nvvm_suld_2d_v4i16_zero:
+ return NVPTXISD::Suld2DV4I16Zero;
+ case Intrinsic::nvvm_suld_2d_v4i32_zero:
+ return NVPTXISD::Suld2DV4I32Zero;
+ case Intrinsic::nvvm_suld_2d_array_i8_zero:
+ return NVPTXISD::Suld2DArrayI8Zero;
+ case Intrinsic::nvvm_suld_2d_array_i16_zero:
+ return NVPTXISD::Suld2DArrayI16Zero;
+ case Intrinsic::nvvm_suld_2d_array_i32_zero:
+ return NVPTXISD::Suld2DArrayI32Zero;
+ case Intrinsic::nvvm_suld_2d_array_i64_zero:
+ return NVPTXISD::Suld2DArrayI64Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
+ return NVPTXISD::Suld2DArrayV2I8Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
+ return NVPTXISD::Suld2DArrayV2I16Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
+ return NVPTXISD::Suld2DArrayV2I32Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
+ return NVPTXISD::Suld2DArrayV2I64Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
+ return NVPTXISD::Suld2DArrayV4I8Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
+ return NVPTXISD::Suld2DArrayV4I16Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
+ return NVPTXISD::Suld2DArrayV4I32Zero;
+ case Intrinsic::nvvm_suld_3d_i8_zero:
+ return NVPTXISD::Suld3DI8Zero;
+ case Intrinsic::nvvm_suld_3d_i16_zero:
+ return NVPTXISD::Suld3DI16Zero;
+ case Intrinsic::nvvm_suld_3d_i32_zero:
+ return NVPTXISD::Suld3DI32Zero;
+ case Intrinsic::nvvm_suld_3d_i64_zero:
+ return NVPTXISD::Suld3DI64Zero;
+ case Intrinsic::nvvm_suld_3d_v2i8_zero:
+ return NVPTXISD::Suld3DV2I8Zero;
+ case Intrinsic::nvvm_suld_3d_v2i16_zero:
+ return NVPTXISD::Suld3DV2I16Zero;
+ case Intrinsic::nvvm_suld_3d_v2i32_zero:
+ return NVPTXISD::Suld3DV2I32Zero;
+ case Intrinsic::nvvm_suld_3d_v2i64_zero:
+ return NVPTXISD::Suld3DV2I64Zero;
+ case Intrinsic::nvvm_suld_3d_v4i8_zero:
+ return NVPTXISD::Suld3DV4I8Zero;
+ case Intrinsic::nvvm_suld_3d_v4i16_zero:
+ return NVPTXISD::Suld3DV4I16Zero;
+ case Intrinsic::nvvm_suld_3d_v4i32_zero:
+ return NVPTXISD::Suld3DV4I32Zero;
}
}
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
- case Intrinsic::nvvm_ldu_global_p:
+ case Intrinsic::nvvm_ldu_global_p: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
Info.memVT = getValueType(I.getType());
- else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
Info.memVT = getValueType(I.getType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+
+ return true;
+ }
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p: {
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
+ Info.memVT = getValueType(I.getType());
+ else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
+ Info.memVT = getPointerTy();
else
- Info.memVT = MVT::f32;
+ Info.memVT = getValueType(I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
- Info.align = 0;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+
return true;
+ }
- case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_v4f32_s32:
case Intrinsic::nvvm_tex_1d_v4f32_f32:
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_v4f32_s32:
case Intrinsic::nvvm_tex_2d_v4f32_f32:
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ case Intrinsic::nvvm_tex_3d_v4f32_s32:
case Intrinsic::nvvm_tex_3d_v4f32_f32:
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
- case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: {
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
- Info.memVT = MVT::f32;
+ Info.memVT = MVT::v4f32;
Info.ptrVal = nullptr;
Info.offset = 0;
Info.vol = 0;
Info.align = 16;
return true;
}
- case Intrinsic::nvvm_tex_1d_v4i32_i32:
- case Intrinsic::nvvm_tex_1d_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
- case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_v4i32_i32:
- case Intrinsic::nvvm_tex_2d_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
- case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_v4i32_i32:
- case Intrinsic::nvvm_tex_3d_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: {
+ case Intrinsic::nvvm_tex_1d_v4s32_s32:
+ case Intrinsic::nvvm_tex_1d_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_v4s32_s32:
+ case Intrinsic::nvvm_tex_2d_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_v4s32_s32:
+ case Intrinsic::nvvm_tex_3d_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_v4u32_s32:
+ case Intrinsic::nvvm_tex_1d_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_v4u32_s32:
+ case Intrinsic::nvvm_tex_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_v4u32_s32:
+ case Intrinsic::nvvm_tex_3d_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
- Info.memVT = MVT::i32;
+ Info.memVT = MVT::v4i32;
Info.ptrVal = nullptr;
Info.offset = 0;
Info.vol = 0;
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i8_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
+ case Intrinsic::nvvm_suld_2d_i8_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
+ case Intrinsic::nvvm_suld_3d_i8_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i8_clamp:
case Intrinsic::nvvm_suld_1d_i8_trap:
case Intrinsic::nvvm_suld_1d_v2i8_trap:
case Intrinsic::nvvm_suld_1d_v4i8_trap:
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
case Intrinsic::nvvm_suld_3d_i8_trap:
case Intrinsic::nvvm_suld_3d_v2i8_trap:
- case Intrinsic::nvvm_suld_3d_v4i8_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i8_trap:
+ case Intrinsic::nvvm_suld_1d_i8_zero:
+ case Intrinsic::nvvm_suld_1d_v2i8_zero:
+ case Intrinsic::nvvm_suld_1d_v4i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
+ case Intrinsic::nvvm_suld_2d_i8_zero:
+ case Intrinsic::nvvm_suld_2d_v2i8_zero:
+ case Intrinsic::nvvm_suld_2d_v4i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
+ case Intrinsic::nvvm_suld_3d_i8_zero:
+ case Intrinsic::nvvm_suld_3d_v2i8_zero:
+ case Intrinsic::nvvm_suld_3d_v4i8_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i8;
Info.ptrVal = nullptr;
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i16_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
+ case Intrinsic::nvvm_suld_2d_i16_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
+ case Intrinsic::nvvm_suld_3d_i16_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i16_clamp:
case Intrinsic::nvvm_suld_1d_i16_trap:
case Intrinsic::nvvm_suld_1d_v2i16_trap:
case Intrinsic::nvvm_suld_1d_v4i16_trap:
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
case Intrinsic::nvvm_suld_3d_i16_trap:
case Intrinsic::nvvm_suld_3d_v2i16_trap:
- case Intrinsic::nvvm_suld_3d_v4i16_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i16_trap:
+ case Intrinsic::nvvm_suld_1d_i16_zero:
+ case Intrinsic::nvvm_suld_1d_v2i16_zero:
+ case Intrinsic::nvvm_suld_1d_v4i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
+ case Intrinsic::nvvm_suld_2d_i16_zero:
+ case Intrinsic::nvvm_suld_2d_v2i16_zero:
+ case Intrinsic::nvvm_suld_2d_v4i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
+ case Intrinsic::nvvm_suld_3d_i16_zero:
+ case Intrinsic::nvvm_suld_3d_v2i16_zero:
+ case Intrinsic::nvvm_suld_3d_v4i16_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i16;
Info.ptrVal = nullptr;
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i32_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
+ case Intrinsic::nvvm_suld_2d_i32_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
+ case Intrinsic::nvvm_suld_3d_i32_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i32_clamp:
case Intrinsic::nvvm_suld_1d_i32_trap:
case Intrinsic::nvvm_suld_1d_v2i32_trap:
case Intrinsic::nvvm_suld_1d_v4i32_trap:
case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
case Intrinsic::nvvm_suld_3d_i32_trap:
case Intrinsic::nvvm_suld_3d_v2i32_trap:
- case Intrinsic::nvvm_suld_3d_v4i32_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i32_trap:
+ case Intrinsic::nvvm_suld_1d_i32_zero:
+ case Intrinsic::nvvm_suld_1d_v2i32_zero:
+ case Intrinsic::nvvm_suld_1d_v4i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
+ case Intrinsic::nvvm_suld_2d_i32_zero:
+ case Intrinsic::nvvm_suld_2d_v2i32_zero:
+ case Intrinsic::nvvm_suld_2d_v4i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
+ case Intrinsic::nvvm_suld_3d_i32_zero:
+ case Intrinsic::nvvm_suld_3d_v2i32_zero:
+ case Intrinsic::nvvm_suld_3d_v4i32_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i32;
Info.ptrVal = nullptr;
Info.align = 16;
return true;
}
-
+ case Intrinsic::nvvm_suld_1d_i64_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i64_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
+ case Intrinsic::nvvm_suld_2d_i64_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i64_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
+ case Intrinsic::nvvm_suld_3d_i64_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_1d_i64_trap:
+ case Intrinsic::nvvm_suld_1d_v2i64_trap:
+ case Intrinsic::nvvm_suld_1d_array_i64_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
+ case Intrinsic::nvvm_suld_2d_i64_trap:
+ case Intrinsic::nvvm_suld_2d_v2i64_trap:
+ case Intrinsic::nvvm_suld_2d_array_i64_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
+ case Intrinsic::nvvm_suld_3d_i64_trap:
+ case Intrinsic::nvvm_suld_3d_v2i64_trap:
+ case Intrinsic::nvvm_suld_1d_i64_zero:
+ case Intrinsic::nvvm_suld_1d_v2i64_zero:
+ case Intrinsic::nvvm_suld_1d_array_i64_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
+ case Intrinsic::nvvm_suld_2d_i64_zero:
+ case Intrinsic::nvvm_suld_2d_v2i64_zero:
+ case Intrinsic::nvvm_suld_2d_array_i64_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
+ case Intrinsic::nvvm_suld_3d_i64_zero:
+ case Intrinsic::nvvm_suld_3d_v2i64_zero: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i64;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
}
return false;
}
switch (Constraint[0]) {
default:
break;
+ case 'b':
case 'r':
case 'h':
case 'c':
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
+ case 'b':
+ return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
case 'c':
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
case 'h':
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
-extern unsigned FMAContractLevel;
+bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
+ CodeGenOpt::Level OptLevel) const {
+ const Function *F = MF.getFunction();
+ const TargetOptions &TO = MF.getTarget().Options;
+
+ // Always honor command-line argument
+ if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+ return FMAContractLevelOpt > 0;
+ } else if (OptLevel == 0) {
+ // Do not contract if we're not optimizing the code
+ return false;
+ } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
+ // Honor TargetOptions flags that explicitly say fusion is okay
+ return true;
+ } else if (F->hasFnAttribute("unsafe-fp-math")) {
+ // Check for unsafe-fp-math=true coming from Clang
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ StringRef Val = Attr.getValueAsString();
+ if (Val == "true")
+ return true;
+ }
+
+ // We did not have a clear indication that fusion is allowed, so assume not
+ return false;
+}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
}
else if (N0.getOpcode() == ISD::FMUL) {
if (VT == MVT::f32 || VT == MVT::f64) {
- if (FMAContractLevel == 0)
+ const auto *TLI = static_cast<const NVPTXTargetLowering *>(
+ &DAG.getTargetLoweringInfo());
+ if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
return SDValue();
// For floating point:
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- // FIXME: Get this from the DAG somehow
- CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
break;
}
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ unsigned Align = LD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This load is not sufficiently aligned, so bail out and let this vector
+ // load be scalarized. Note that we may still be able to emit smaller
+ // vector loads. For example, if we are loading a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return;
+ }
+
EVT EltVT = ResVT.getVectorElementType();
unsigned NumElts = ResVT.getVectorNumElements();
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
- LoadSDNode *LD = cast<LoadSDNode>(N);
-
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, Results);
+ ReplaceLoadVector(N, DAG, getDataLayout(), Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
delete DwarfRangesSection;
delete DwarfMacroInfoSection;
}
+
+const MCSection *
+NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ return getDataSection();
+}