Mips
MBlaze
MSP430
+ NVPTX
PowerPC
PTX
Sparc
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
+ nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac])
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
+ NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
esac
fi
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
- xcore, msp430, ptx, and cpp (default=all)]),,
+ xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+ nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+ NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) AC_MSG_ERROR([Can not set target to build]) ;;
esac ;;
*) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
- xcore, msp430, ptx, and cpp (default=all)
+ xcore, msp430, ptx, nvptx, and cpp (default=all)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
+ nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac
fi
MBlaze) TARGET_HAS_JIT=0
;;
PTX) TARGET_HAS_JIT=0
+ ;;
+ NVPTX) TARGET_HAS_JIT=0
;;
*) TARGET_HAS_JIT=0
;;
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+ nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+ NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
echo "$as_me: error: Can not set target to build" >&2;}
{ (exit 1); exit 1; }; } ;;
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10404 "configure"
+#line 10409 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
mblaze, // MBlaze: mblaze
ptx32, // PTX: ptx (32-bit)
ptx64, // PTX: ptx (64-bit)
+ nvptx, // NVPTX: 32-bit
+ nvptx64, // NVPTX: 64-bit
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
amdil // amdil: amd IL
};
include "llvm/IntrinsicsXCore.td"
include "llvm/IntrinsicsPTX.td"
include "llvm/IntrinsicsHexagon.td"
+include "llvm/IntrinsicsNVVM.td"
--- /dev/null
+//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
+
+//
+// MISC
+//
+
+ def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
+ Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
+ Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Min Max
+//
+
+ def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
+ , [IntrNoMem, Commutative]>;
+ def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Multiplication
+//
+
+ def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Div
+//
+
+ def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Brev
+//
+
+ def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+//
+// Sad
+//
+
+ def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Floor Ceil
+//
+
+ def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Abs
+//
+
+ def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Round
+//
+
+ def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Trunc
+//
+
+ def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Saturate
+//
+
+ def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Exp2 Log2
+//
+
+ def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Sin Cos
+//
+
+ def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+//
+// Fma
+//
+
+ def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Rcp
+//
+
+ def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Sqrt
+//
+
+ def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Rsqrt
+//
+
+ def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Add
+//
+
+ def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+ def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, Commutative]>;
+
+//
+// Convert
+//
+
+ def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
+ Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+
+ def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
+ Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
+ Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
+ Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+
+ def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
+ Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
+
+//
+// Bitcast
+//
+
+ def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
+ Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
+ Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+
+// Atomic not available as an llvm intrinsic.
+ def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
+ [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
+ [IntrReadWriteArgMem, NoCapture<0>]>;
+ def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
+ [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+ [IntrReadWriteArgMem, NoCapture<0>]>;
+ def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
+ [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+ [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// Bar.Sync
+ def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
+ Intrinsic<[], [], []>;
+ def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
+ Intrinsic<[], [], []>;
+ def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+ // Membar
+ def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
+ Intrinsic<[], [], []>;
+ def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
+ Intrinsic<[], [], []>;
+ def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
+ Intrinsic<[], [], []>;
+
+
+// Accessing special registers
+ def int_nvvm_read_ptx_sreg_tid_x :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
+ def int_nvvm_read_ptx_sreg_tid_y :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
+ def int_nvvm_read_ptx_sreg_tid_z :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
+
+ def int_nvvm_read_ptx_sreg_ntid_x :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
+ def int_nvvm_read_ptx_sreg_ntid_y :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
+ def int_nvvm_read_ptx_sreg_ntid_z :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
+
+ def int_nvvm_read_ptx_sreg_ctaid_x :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
+ def int_nvvm_read_ptx_sreg_ctaid_y :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
+ def int_nvvm_read_ptx_sreg_ctaid_z :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
+
+ def int_nvvm_read_ptx_sreg_nctaid_x :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
+ def int_nvvm_read_ptx_sreg_nctaid_y :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
+ def int_nvvm_read_ptx_sreg_nctaid_z :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
+
+ def int_nvvm_read_ptx_sreg_warpsize :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
+
+
+// Generated within nvvm. Use for ldu on sm_20 or later
+// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
+def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldu.global.i">;
+def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldu.global.f">;
+def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldu.global.p">;
+
+
+// Use for generic pointers
+// - These intrinsics are used to convert address spaces.
+// - The input pointer and output pointer must have the same type, except for
+// the address-space. (This restriction is not enforced here as there is
+// currently no way to describe it).
+// - This complements the llvm bitcast, which can be used to cast one type
+// of pointer to another type of pointer, while the address space remains
+// the same.
+def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.local.to.gen">;
+def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.shared.to.gen">;
+def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.global.to.gen">;
+def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.constant.to.gen">;
+
+def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.gen.to.global">;
+def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.gen.to.shared">;
+def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.gen.to.local">;
+def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.gen.to.constant">;
+
+// Used in nvvm internally to help address space opt and ptx code generation
+// This is for params that are passed to kernel functions by pointer by-val.
+def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty],
+ [IntrNoMem, NoCapture<0>],
+ "llvm.nvvm.ptr.gen.to.param">;
+
+// Move intrinsics, used in nvvm internally
+
+def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
+ "llvm.nvvm.move.i8">;
+def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
+ "llvm.nvvm.move.i16">;
+def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
+ "llvm.nvvm.move.i32">;
+def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
+ "llvm.nvvm.move.i64">;
+def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
+ [IntrNoMem], "llvm.nvvm.move.float">;
+def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
+ [IntrNoMem], "llvm.nvvm.move.double">;
+def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
+ [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
+
+
+/// Error / Warn
+def int_nvvm_compiler_error :
+ Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
+def int_nvvm_compiler_warn :
+ Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
case mblaze: return "mblaze";
case ptx32: return "ptx32";
case ptx64: return "ptx64";
+ case nvptx: return "nvptx";
+ case nvptx64: return "nvptx64";
case le32: return "le32";
case amdil: return "amdil";
}
case ptx32: return "ptx";
case ptx64: return "ptx";
+ case nvptx: return "nvptx";
+ case nvptx64: return "nvptx";
case le32: return "le32";
case amdil: return "amdil";
}
.Case("xcore", xcore)
.Case("ptx32", ptx32)
.Case("ptx64", ptx64)
+ .Case("nvptx", nvptx)
+ .Case("nvptx64", nvptx64)
.Case("le32", le32)
.Case("amdil", amdil)
.Default(UnknownArch);
.Case("r600", Triple::r600)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
+ .Case("nvptx", Triple::nvptx)
+ .Case("nvptx64", Triple::nvptx64)
.Case("amdil", Triple::amdil)
.Default(Triple::UnknownArch);
}
.Case("r600", "r600")
.Case("ptx32", "ptx32")
.Case("ptx64", "ptx64")
+ .Case("nvptx", "nvptx")
+ .Case("nvptx64", "nvptx64")
.Case("le32", "le32")
.Case("amdil", "amdil")
.Default(NULL);
.Case("xcore", Triple::xcore)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
+ .Case("nvptx", Triple::nvptx)
+ .Case("nvptx64", Triple::nvptx64)
.Case("le32", Triple::le32)
.Case("amdil", Triple::amdil)
.Default(Triple::UnknownArch);
case llvm::Triple::mblaze:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
+ case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::ptx32:
case llvm::Triple::r600:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
+ case llvm::Triple::nvptx64:
case llvm::Triple::ppc64:
case llvm::Triple::ptx64:
case llvm::Triple::sparcv9:
case Triple::mblaze:
case Triple::mips:
case Triple::mipsel:
+ case Triple::nvptx:
case Triple::ppc:
case Triple::ptx32:
case Triple::r600:
case Triple::mips64: T.setArch(Triple::mips); break;
case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::ptx64: T.setArch(Triple::ptx32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::mips64:
case Triple::mips64el:
+ case Triple::nvptx64:
case Triple::ppc64:
case Triple::ptx64:
case Triple::sparcv9:
case Triple::mips: T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::ptx32: T.setArch(Triple::ptx64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
--- /dev/null
+set(LLVM_TARGET_DEFINITIONS NVPTX.td)
+
+
+tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(NVPTXCommonTableGen)
+
+set(NVPTXCodeGen_sources
+ NVPTXFrameLowering.cpp
+ NVPTXInstrInfo.cpp
+ NVPTXISelDAGToDAG.cpp
+ NVPTXISelLowering.cpp
+ NVPTXRegisterInfo.cpp
+ NVPTXSubtarget.cpp
+ NVPTXTargetMachine.cpp
+ NVPTXSplitBBatBar.cpp
+ NVPTXLowerAggrCopies.cpp
+ NVPTXutil.cpp
+ NVPTXAllocaHoisting.cpp
+ NVPTXAsmPrinter.cpp
+ NVPTXUtilities.cpp
+ VectorElementize.cpp
+ )
+
+add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
+
+
+add_subdirectory(TargetInfo)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
--- /dev/null
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMNVPTXAsmPrinter
+ NVPTXInstPrinter.cpp
+ )
+
+add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)
--- /dev/null
+;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NVPTXAsmPrinter
+parent = NVPTX
+required_libraries = MC Support
+add_to_library_groups = NVPTX
--- /dev/null
+##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNVPTXAsmPrinter
+
+# Hack: we need to include 'main' ptx target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- /dev/null
+// Placeholder
--- /dev/null
+;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = NVPTX
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = NVPTXCodeGen
+parent = NVPTX
+required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
+add_to_library_groups = NVPTX
--- /dev/null
+add_llvm_library(LLVMNVPTXDesc
+ NVPTXMCAsmInfo.cpp
+ NVPTXMCTargetDesc.cpp
+ )
+
+add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
--- /dev/null
+;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NVPTXDesc
+parent = NVPTX
+required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
+add_to_library_groups = NVPTX
--- /dev/null
+##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNVPTXDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- /dev/null
+//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the NVPTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXBASEINFO_H
+#define NVPTXBASEINFO_H
+
+namespace llvm {
+
+enum AddressSpace {
+ ADDRESS_SPACE_GENERIC = 0,
+ ADDRESS_SPACE_GLOBAL = 1,
+ ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
+ ADDRESS_SPACE_SHARED = 3,
+ ADDRESS_SPACE_CONST = 4,
+ ADDRESS_SPACE_LOCAL = 5,
+
+ // NVVM Internal
+ ADDRESS_SPACE_PARAM = 101
+};
+
+enum PropertyAnnotation {
+ PROPERTY_MAXNTID_X = 0,
+ PROPERTY_MAXNTID_Y,
+ PROPERTY_MAXNTID_Z,
+ PROPERTY_REQNTID_X,
+ PROPERTY_REQNTID_Y,
+ PROPERTY_REQNTID_Z,
+ PROPERTY_MINNCTAPERSM,
+ PROPERTY_ISTEXTURE,
+ PROPERTY_ISSURFACE,
+ PROPERTY_ISSAMPLER,
+ PROPERTY_ISREADONLY_IMAGE_PARAM,
+ PROPERTY_ISWRITEONLY_IMAGE_PARAM,
+ PROPERTY_ISKERNEL_FUNCTION,
+ PROPERTY_ALIGN,
+
+ // last property
+ PROPERTY_LAST
+};
+
+const unsigned AnnotationNameLen = 8; // length of each annotation name
+const char
+PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+ "maxntidx", // PROPERTY_MAXNTID_X
+ "maxntidy", // PROPERTY_MAXNTID_Y
+ "maxntidz", // PROPERTY_MAXNTID_Z
+ "reqntidx", // PROPERTY_REQNTID_X
+ "reqntidy", // PROPERTY_REQNTID_Y
+ "reqntidz", // PROPERTY_REQNTID_Z
+ "minctasm", // PROPERTY_MINNCTAPERSM
+ "texture", // PROPERTY_ISTEXTURE
+ "surface", // PROPERTY_ISSURFACE
+ "sampler", // PROPERTY_ISSAMPLER
+ "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
+ "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "kernel", // PROPERTY_ISKERNEL_FUNCTION
+ "align", // PROPERTY_ALIGN
+
+ // last property
+ "proplast", // PROPERTY_LAST
+};
+
+// name of named metadata used for global annotations
+#if defined(__GNUC__)
+// As this is declared to be static but some of the .cpp files that
+// include NVVM.h do not use this array, gcc gives a warning when
+// compiling those .cpp files, hence __attribute__((unused)).
+__attribute__((unused))
+#endif
+static const char* NamedMDForAnnotations = "nvvm.annotations";
+
+}
+
+
+#endif
--- /dev/null
+//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the NVPTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool CompileForDebugging;
+
+// -debug-compile - Command line option to inform opt and llc passes to
+// compile for debugging
+static cl::opt<bool, true>
+Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
+ cl::location(CompileForDebugging),
+ cl::init(false));
+
+void NVPTXMCAsmInfo::anchor() { }
+
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::nvptx64)
+ PointerSize = 8;
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasSingleParameterDotFile = false;
+
+ InlineAsmStart = " inline asm";
+ InlineAsmEnd = " inline asm";
+
+ SupportsDebugInformation = CompileForDebugging;
+ HasDotTypeDotSizeDirective = false;
+
+ Data8bitsDirective = " .b8 ";
+ Data16bitsDirective = " .b16 ";
+ Data32bitsDirective = " .b32 ";
+ Data64bitsDirective = " .b64 ";
+ PrivateGlobalPrefix = "";
+ ZeroDirective = " .b8";
+ AsciiDirective = " .b8";
+ AscizDirective = " .b8";
+
+ // @TODO: Can we just disable this?
+ GlobalDirective = "\t// .globl\t";
+}
--- /dev/null
+//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVPTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_MCASM_INFO_H
+#define NVPTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class Target;
+class StringRef;
+
+class NVPTXMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+public:
+ explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
+};
+} // namespace llvm
+
+#endif // NVPTX_MCASM_INFO_H
--- /dev/null
+//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides NVPTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCTargetDesc.h"
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "NVPTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "NVPTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "NVPTXGenRegisterInfo.inc"
+
+
+using namespace llvm;
+
+static MCInstrInfo *createNVPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitNVPTXMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitNVPTXMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
+ createNVPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
+ createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
+ createNVPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
+ createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
+ createNVPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
+ createNVPTXMCSubtargetInfo);
+
+}
--- /dev/null
+//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides NVPTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXMCTARGETDESC_H
+#define NVPTXMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "NVPTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "NVPTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "NVPTXGenSubtargetInfo.inc"
+
+#endif
--- /dev/null
+##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMNVPTXCodeGen
+TARGET = NVPTX
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = NVPTXGenAsmWriter.inc \
+ NVPTXGenDAGISel.inc \
+ NVPTXGenInstrInfo.inc \
+ NVPTXGenRegisterInfo.inc \
+ NVPTXGenSubtargetInfo.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
--- /dev/null
+//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The strings allocated from a managed string pool are owned by the string
+// pool and will be deleted together with the managed string pool.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_SUPPORT_MANAGED_STRING_H
+#define LLVM_SUPPORT_MANAGED_STRING_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+
+namespace llvm {
+
+/// ManagedStringPool - The strings allocated from a managed string pool are
+/// owned by the string pool and will be deleted together with the managed
+/// string pool.
+class ManagedStringPool {
+ SmallVector<std::string *, 8> Pool;
+
+public:
+ ManagedStringPool() {}
+ ~ManagedStringPool() {
+ SmallVector<std::string *, 8>::iterator Current = Pool.begin();
+ while (Current != Pool.end()) {
+ delete *Current;
+ Current++;
+ }
+ }
+
+ std::string *getManagedString(const char *S) {
+ std::string *Str = new std::string(S);
+ Pool.push_back(Str);
+ return Str;
+ }
+};
+
+}
+
+#endif
--- /dev/null
+//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM NVPTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_H
+#define LLVM_TARGET_NVPTX_H
+
+#include <cassert>
+#include <iosfwd>
+#include "llvm/Value.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+namespace llvm {
+class NVPTXTargetMachine;
+class FunctionPass;
+class formatted_raw_ostream;
+
+namespace NVPTXCC {
+enum CondCodes {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE
+};
+}
+
+inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case NVPTXCC::NE: return "ne";
+ case NVPTXCC::EQ: return "eq";
+ case NVPTXCC::LT: return "lt";
+ case NVPTXCC::LE: return "le";
+ case NVPTXCC::GT: return "gt";
+ case NVPTXCC::GE: return "ge";
+ }
+}
+
+FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
+FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+
+bool isImageOrSamplerVal(const Value *, const Module *);
+
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
+
+namespace NVPTX
+{
+enum DrvInterface {
+ NVCL,
+ CUDA,
+ TEST
+};
+
+// A field inside TSFlags needs a shift and a mask. The usage is
+// always as follows :
+// ((TSFlags & fieldMask) >> fieldShift)
+// The enum keeps the mask, the shift, and all valid values of the
+// field in one place.
+enum VecInstType {
+ VecInstTypeShift = 0,
+ VecInstTypeMask = 0xF,
+
+ VecNOP = 0,
+ VecLoad = 1,
+ VecStore = 2,
+ VecBuild = 3,
+ VecShuffle = 4,
+ VecExtract = 5,
+ VecInsert = 6,
+ VecDest = 7,
+ VecOther = 15
+};
+
+enum SimpleMove {
+ SimpleMoveMask = 0x10,
+ SimpleMoveShift = 4
+};
+enum LoadStore {
+ isLoadMask = 0x20,
+ isLoadShift = 5,
+ isStoreMask = 0x40,
+ isStoreShift = 6
+};
+
+namespace PTXLdStInstCode {
+enum AddressSpace{
+ GENERIC = 0,
+ GLOBAL = 1,
+ CONSTANT = 2,
+ SHARED = 3,
+ PARAM = 4,
+ LOCAL = 5
+};
+enum FromType {
+ Unsigned = 0,
+ Signed,
+ Float
+};
+enum VecType {
+ Scalar = 1,
+ V2 = 2,
+ V4 = 4
+};
+}
+}
+} // end namespace llvm;
+
+// Defines symbolic names for NVPTX registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "NVPTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the NVPTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "NVPTXGenInstrInfo.inc"
+
+#endif
--- /dev/null
+//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the NVPTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "NVPTXRegisterInfo.td"
+include "NVPTXInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+// - We use the SM version number instead of explicit feature table.
+// - Need at least one feature to avoid generating zero sized array by
+// TableGen in NVPTXGenSubtarget.inc.
+//===----------------------------------------------------------------------===//
+def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">;
+
+//===----------------------------------------------------------------------===//
+// NVPTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"sm_10", [FeatureDummy]>;
+
+
+def NVPTXInstrInfo : InstrInfo {
+}
+
+def NVPTX : Target {
+ let InstructionSet = NVPTXInstrInfo;
+}
--- /dev/null
+//===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "NVPTXAllocaHoisting.h"
+
+namespace llvm {
+
+bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
+ bool functionModified = false;
+ Function::iterator I = function.begin();
+ TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+
+ for (Function::iterator E = function.end(); I != E; ++I) {
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
+ if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
+ allocaInst->moveBefore(firstTerminatorInst);
+ functionModified = true;
+ }
+ }
+ }
+
+ return functionModified;
+}
+
+char NVPTXAllocaHoisting::ID = 1;
+RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
+ "Hoisting alloca instructsion in non-entry "
+ "blocks to the entry block");
+
+FunctionPass *createAllocaHoisting() {
+ return new NVPTXAllocaHoisting();
+}
+
+} // end namespace llvm
--- /dev/null
+//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_ALLOCA_HOISTING_H_
+#define NVPTX_ALLOCA_HOISTING_H_
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class FunctionPass;
+class Function;
+
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+
+ virtual const char *getPassName() const {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ virtual bool runOnFunction(Function &function);
+};
+
+extern FunctionPass *createAllocaHoisting();
+
+} // end namespace llvm
+
+#endif // NVPTX_ALLOCA_HOISTING_H_
--- /dev/null
+//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXAsmPrinter.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTXNumRegisters.h"
+#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/DerivedTypes.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Support/TimeValue.h"
+#include <sstream>
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Assembly/Writer.h"
+#include "cl_common_defines.h"
+
+
+using namespace llvm;
+
+
+#include "NVPTXGenAsmWriter.inc"
+
+bool RegAllocNilUsed = true;
+
+#define DEPOTNAME "__local_depot"
+
+static cl::opt<bool>
+EmitLineNumbers("nvptx-emit-line-numbers",
+ cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
+ cl::init(true));
+
+namespace llvm {
+bool InterleaveSrcInPtx = false;
+}
+
+static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Emit source line in ptx file"),
+ cl::location(llvm::InterleaveSrcInPtx));
+
+
+
+
+// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
+// cannot just link to the existing version.
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+using namespace nvptx;
+const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0)
+ llvm_unreachable("Unknown constant value to lower!");
+
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using TargetData as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Generate a symbolic expression for the byte address
+ const Constant *PtrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (Offset == 0)
+ return Base;
+
+ // Truncate/sext the offset to the pointer size.
+ if (TD.getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
+{
+ if (!EmitLineNumbers)
+ return;
+ if (ignoreLoc(MI))
+ return;
+
+ DebugLoc curLoc = MI.getDebugLoc();
+
+ if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+ return;
+
+ if (prevDebugLoc == curLoc)
+ return;
+
+ prevDebugLoc = curLoc;
+
+ if (curLoc.isUnknown())
+ return;
+
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ //const TargetMachine &TM = MF->getTarget();
+
+ const LLVMContext &ctx = MF->getFunction()->getContext();
+ DIScope Scope(curLoc.getScope(ctx));
+
+ if (!Scope.Verify())
+ return;
+
+ StringRef fileName(Scope.getFilename());
+ StringRef dirName(Scope.getDirectory());
+ SmallString<128> FullPathName = dirName;
+ if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
+ sys::path::append(FullPathName, fileName);
+ fileName = FullPathName.str();
+ }
+
+ if (filenameMap.find(fileName.str()) == filenameMap.end())
+ return;
+
+
+ // Emit the line from the source file.
+ if (llvm::InterleaveSrcInPtx)
+ this->emitSrcInText(fileName.str(), curLoc.getLine());
+
+ std::stringstream temp;
+ temp << "\t.loc " << filenameMap[fileName.str()]
+ << " " << curLoc.getLine() << " " << curLoc.getCol();
+ OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+}
+
+void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ emitLineNumberAsDotLoc(*MI);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const Function *F,
+ raw_ostream &O)
+{
+ const TargetData *TD = TM.getTargetData();
+ const TargetLowering *TLI = TM.getTargetLowering();
+
+ Type *Ty = F->getReturnType();
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return;
+
+ O << " (";
+
+ if (isABI) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ size = ITy->getBitWidth();
+ if (size < 32) size = 32;
+ } else {
+ assert(Ty->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = Ty->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " func_retval0";
+ }
+ else if (isa<PointerType>(Ty)) {
+ O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+ << " func_retval0";
+ } else {
+ if ((Ty->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(Ty)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ totalsz += sz/8;
+ }
+ }
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align "
+ << retAlignment
+ << " .b8 func_retval0["
+ << totalsz << "]";
+ } else
+ assert(false &&
+ "Unknown return type");
+ }
+ } else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned idx = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " func_retval" << idx;
+ if (j<je-1) O << ", ";
+ ++idx;
+ }
+ if (i < e-1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ return;
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ printReturnValStr(F, O);
+}
+
+void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Set up
+ MRI = &MF->getRegInfo();
+ F = MF->getFunction();
+ emitLinkageDirective(F,O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else {
+ O << ".func ";
+ printReturnValStr(*MF, O);
+ }
+
+ O << *CurrentFnSym;
+
+ emitFunctionParamList(*MF, O);
+
+ if (llvm::isKernelFunction(*F))
+ emitKernelFunctionDirectives(*F, O);
+
+ OutStreamer.EmitRawText(O.str());
+
+ prevDebugLoc = DebugLoc();
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyStart() {
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ unsigned numRegClasses = TRI.getNumRegClasses();
+ VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+ OutStreamer.EmitRawText(StringRef("{\n"));
+ setAndEmitFunctionVirtualRegisters(*MF);
+
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+ emitDemotedVars(MF->getFunction(), O);
+ OutStreamer.EmitRawText(O.str());
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(StringRef("}\n"));
+ delete []VRidGlobal2LocalMap;
+}
+
+
+void
+NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
+ raw_ostream &O) const {
+ // If the NVVM IR has some of reqntid* specified, then output
+ // the reqntid directive, and set the unspecified ones to 1.
+ // If none of reqntid* is specified, don't output reqntid directive.
+ unsigned reqntidx, reqntidy, reqntidz;
+ bool specified = false;
+ if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
+ else specified = true;
+ if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
+ else specified = true;
+ if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
+ else specified = true;
+
+ if (specified)
+ O << ".reqntid " << reqntidx << ", "
+ << reqntidy << ", " << reqntidz << "\n";
+
+ // If the NVVM IR has some of maxntid* specified, then output
+ // the maxntid directive, and set the unspecified ones to 1.
+ // If none of maxntid* is specified, don't output maxntid directive.
+ unsigned maxntidx, maxntidy, maxntidz;
+ specified = false;
+ if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
+ else specified = true;
+ if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
+ else specified = true;
+ if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
+ else specified = true;
+
+ if (specified)
+ O << ".maxntid " << maxntidx << ", "
+ << maxntidy << ", " << maxntidz << "\n";
+
+ unsigned mincta;
+ if (llvm::getMinCTASm(F, mincta))
+ O << ".minnctapersm " << mincta << "\n";
+}
+
+void
+NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+ raw_ostream &O) {
+ const TargetRegisterClass * RC = MRI->getRegClass(vr);
+ unsigned id = RC->getID();
+
+ std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id];
+ unsigned mapped_vr = regmap[vr];
+
+ if (!isVec) {
+ O << getNVPTXRegClassStr(RC) << mapped_vr;
+ return;
+ }
+ // Vector virtual register
+ if (getNVPTXVectorSize(RC) == 4)
+ O << "{"
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
+ << "}";
+ else if (getNVPTXVectorSize(RC) == 2)
+ O << "{"
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
+ << "}";
+ else
+ assert(0 && "Unsupported vector size");
+}
+
+void
+NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+ raw_ostream &O) {
+ getVirtualRegisterName(vr, isVec, O);
+}
+
+void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
+ const char *Modifier,
+ raw_ostream &O) {
+char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
+ int Imm = (int)MO.getImm();
+ if(0 == strcmp(Modifier, "vecelem"))
+ O << "_" << vecelem[Imm];
+ else if(0 == strcmp(Modifier, "vecv4comm1")) {
+ if((Imm < 0) || (Imm > 3))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv4comm2")) {
+ if((Imm < 4) || (Imm > 7))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv4pos")) {
+ if(Imm < 0) Imm = 0;
+ O << "_" << vecelem[Imm%4];
+ }
+ else if(0 == strcmp(Modifier, "vecv2comm1")) {
+ if((Imm < 0) || (Imm > 1))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv2comm2")) {
+ if((Imm < 2) || (Imm > 3))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv2pos")) {
+ if(Imm < 0) Imm = 0;
+ O << "_" << vecelem[Imm%2];
+ }
+ else
+ assert(0 && "Unknown Modifier on immediate operand");
+}
+
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg() == NVPTX::VRDepot)
+ O << DEPOTNAME << getFunctionNumber();
+ else
+ O << getRegisterName(MO.getReg());
+ } else {
+ if (!Modifier)
+ emitVirtualRegister(MO.getReg(), false, O);
+ else {
+ if (strcmp(Modifier, "vecfull") == 0)
+ emitVirtualRegister(MO.getReg(), true, O);
+ else
+ assert(0 &&
+ "Don't know how to handle the modifier on virtual register.");
+ }
+ }
+ return;
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier)
+ O << MO.getImm();
+ else if (strstr(Modifier, "vec") == Modifier)
+ printVecModifiedImmediate(MO, Modifier, O);
+ else
+ assert(0 && "Don't know how to handle modifier on immediate operand");
+ return;
+
+ case MachineOperand::MO_FPImmediate:
+ printFPConstant(MO.getFPImm(), O);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol: {
+ const char * symbname = MO.getSymbolName();
+ if (strstr(symbname, ".PARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname+6, "%u[];", &index);
+ printParamName(index, O);
+ }
+ else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname+9, "%u[];", &index);
+ O << *CurrentFnSym << "_param_" << index << "_offset";
+ }
+ else
+ O << symbname;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ default:
+ assert(0 && " Operand type not supported.");
+ }
+}
+
+void NVPTXAsmPrinter::
+printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+#ifndef __OPTIMIZE__
+ O << "\t// Implicit def :";
+ //printOperand(MI, 0);
+ O << "\n";
+#endif
+}
+
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ if (Modifier && !strcmp(Modifier, "add")) {
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ } else {
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print ',0' or '+0'
+ O << "+";
+ printOperand(MI, opNum+1, O);
+ }
+}
+
+void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier)
+{
+ if (Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ int Imm = (int)MO.getImm();
+ if (!strcmp(Modifier, "volatile")) {
+ if (Imm)
+ O << ".volatile";
+ } else if (!strcmp(Modifier, "addsp")) {
+ switch (Imm) {
+ case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
+ case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
+ case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
+ case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
+ case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+ case NVPTX::PTXLdStInstCode::GENERIC:
+ if (!nvptxSubtarget.hasGenericLdSt())
+ O << ".global";
+ break;
+ default:
+ assert("wrong value");
+ }
+ }
+ else if (!strcmp(Modifier, "sign")) {
+ if (Imm==NVPTX::PTXLdStInstCode::Signed)
+ O << "s";
+ else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+ O << "u";
+ else
+ O << "f";
+ }
+ else if (!strcmp(Modifier, "vec")) {
+ if (Imm==NVPTX::PTXLdStInstCode::V2)
+ O << ".v2";
+ else if (Imm==NVPTX::PTXLdStInstCode::V4)
+ O << ".v4";
+ }
+ else
+ assert("unknown modifier");
+ }
+ else
+ assert("unknown modifier");
+}
+
+void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+
+ emitLinkageDirective(F,O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else
+ O << ".func ";
+ printReturnValStr(F, O);
+ O << *CurrentFnSym << "\n";
+ emitFunctionParamList(F, O);
+ O << ";\n";
+}
+
+static bool usedInGlobalVarDef(const Constant *C)
+{
+ if (!C)
+ return false;
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->getName().str() == "llvm.used")
+ return false;
+ return true;
+ }
+
+ for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
+ ui!=ue; ++ui) {
+ const Constant *C = dyn_cast<Constant>(*ui);
+ if (usedInGlobalVarDef(C))
+ return true;
+ }
+ return false;
+}
+
+static bool usedInOneFunc(const User *U, Function const *&oneFunc)
+{
+ if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
+ if (othergv->getName().str() == "llvm.used")
+ return true;
+ }
+
+ if (const Instruction *instr = dyn_cast<Instruction>(U)) {
+ if (instr->getParent() && instr->getParent()->getParent()) {
+ const Function *curFunc = instr->getParent()->getParent();
+ if (oneFunc && (curFunc != oneFunc))
+ return false;
+ oneFunc = curFunc;
+ return true;
+ }
+ else
+ return false;
+ }
+
+ if (const MDNode *md = dyn_cast<MDNode>(U))
+ if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
+ (md->getName().str() == "llvm.dbg.sp")))
+ return true;
+
+
+ for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
+ ui!=ue; ++ui) {
+ if (usedInOneFunc(*ui, oneFunc) == false)
+ return false;
+ }
+ return true;
+}
+
+/* Find out if a global variable can be demoted to local scope.
+ * Currently, this is valid for CUDA shared variables, which have local
+ * scope and global lifetime. So the conditions to check are :
+ * 1. Is the global variable in shared address space?
+ * 2. Does it have internal linkage?
+ * 3. Is the global variable referenced only in one function?
+ */
+static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
+ if (gv->hasInternalLinkage() == false)
+ return false;
+ const PointerType *Pty = gv->getType();
+ if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
+ return false;
+
+ const Function *oneFunc = 0;
+
+ bool flag = usedInOneFunc(gv, oneFunc);
+ if (flag == false)
+ return false;
+ if (!oneFunc)
+ return false;
+ f = oneFunc;
+ return true;
+}
+
+static bool useFuncSeen(const Constant *C,
+ llvm::DenseMap<const Function *, bool> &seenMap) {
+ for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
+ ui!=ue; ++ui) {
+ if (const Constant *cu = dyn_cast<Constant>(*ui)) {
+ if (useFuncSeen(cu, seenMap))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
+ const BasicBlock *bb = I->getParent();
+ if (!bb) continue;
+ const Function *caller = bb->getParent();
+ if (!caller) continue;
+ if (seenMap.find(caller) != seenMap.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+ llvm::DenseMap<const Function *, bool> seenMap;
+ for (Module::const_iterator FI=M.begin(), FE=M.end();
+ FI!=FE; ++FI) {
+ const Function *F = FI;
+
+ if (F->isDeclaration()) {
+ if (F->use_empty())
+ continue;
+ if (F->getIntrinsicID())
+ continue;
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ continue;
+ }
+ for (Value::const_use_iterator iter=F->use_begin(),
+ iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+ if (const Constant *C = dyn_cast<Constant>(*iter)) {
+ if (usedInGlobalVarDef(C)) {
+ // The use is in the initialization of a global variable
+ // that is a function pointer, so print a declaration
+ // for the original function
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ // Emit a declaration of this function if the function that
+ // uses this constant expr has already been seen.
+ if (useFuncSeen(C, seenMap)) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+
+ if (!isa<Instruction>(*iter)) continue;
+ const Instruction *instr = cast<Instruction>(*iter);
+ const BasicBlock *bb = instr->getParent();
+ if (!bb) continue;
+ const Function *caller = bb->getParent();
+ if (!caller) continue;
+
+ // If a caller has already been seen, then the caller is
+ // appearing in the module before the callee. so print out
+ // a declaration for the callee.
+ if (seenMap.find(caller) != seenMap.end()) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+ seenMap[F] = true;
+ }
+}
+
+void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ unsigned i=1;
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef Filename(DIUnit.getFilename());
+ StringRef Dirname(DIUnit.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+ ++i;
+ }
+
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I) {
+ DISubprogram SP(*I);
+ StringRef Filename(SP.getFilename());
+ StringRef Dirname(SP.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ ++i;
+ }
+}
+
+bool NVPTXAsmPrinter::doInitialization (Module &M) {
+
+ SmallString<128> Str1;
+ raw_svector_ostream OS1(Str1);
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // We need to call the parent's one explicitly.
+ //bool Result = AsmPrinter::doInitialization(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // Emit header before any dwarf directives are emitted below.
+ emitHeader(M, OS1);
+ OutStreamer.EmitRawText(OS1.str());
+
+
+ // Already commented out
+ //bool Result = AsmPrinter::doInitialization(M);
+
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ recordAndEmitFilenames(M);
+
+ SmallString<128> Str2;
+ raw_svector_ostream OS2(Str2);
+
+ emitDeclarations(M, OS2);
+
+ // Print out module-level global variables here.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I, OS2);
+
+ OS2 << '\n';
+
+ OutStreamer.EmitRawText(OS2.str());
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+ O << "//\n";
+ O << "// Generated by LLVM NVPTX Back-End\n";
+ O << "//\n";
+ O << "\n";
+
+ O << ".version 3.0\n";
+
+ O << ".target ";
+ O << nvptxSubtarget.getTargetName();
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+ O << ", texmode_independent";
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (!nvptxSubtarget.hasDouble())
+ O << ", map_f64_to_f32";
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ O << ", debug";
+
+ O << "\n";
+
+ O << ".address_size ";
+ if (nvptxSubtarget.is64Bit())
+ O << "64";
+ else
+ O << "32";
+ O << "\n";
+
+ O << "\n";
+}
+
+bool NVPTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+
+
+ //bool Result = AsmPrinter::doFinalization(M);
+ // Instead of calling the parents doFinalization, we may
+ // clone parents doFinalization and customize here.
+ // Currently, we if NVISA out the EmitGlobals() in
+ // parent's doFinalization, which is too intrusive.
+ //
+ // Same for the doInitialization.
+ //return Result;
+}
+
+// This function emits appropriate linkage directives for
+// functions and global variables.
+//
+// extern function declaration -> .extern
+// extern function definition -> .visible
+// external global variable with init -> .visible
+// external without init -> .extern
+// appending -> not allowed, assert.
+
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
+{
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (V->hasExternalLinkage()) {
+ if (isa<GlobalVariable>(V)) {
+ const GlobalVariable *GVar = cast<GlobalVariable>(V);
+ if (GVar) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+ } else if (V->isDeclaration())
+ O << ".extern ";
+ else
+ O << ".visible ";
+ } else if (V->hasAppendingLinkage()) {
+ std::string msg;
+ msg.append("Error: ");
+ msg.append("Symbol ");
+ if (V->hasName())
+ msg.append(V->getName().str());
+ msg.append("has unsupported appending linkage type");
+ llvm_unreachable(msg.c_str());
+ }
+ }
+}
+
+
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+ bool processDemoted) {
+
+ // Skip meta data
+ if (GVar->hasSection()) {
+ if (GVar->getSection() == "llvm.metadata")
+ return;
+ }
+
+ const TargetData *TD = TM.getTargetData();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ if (GVar->hasExternalLinkage()) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+
+ if (llvm::isTexture(*GVar)) {
+ O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
+ return;
+ }
+
+ if (llvm::isSurface(*GVar)) {
+ O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
+ return;
+ }
+
+ if (GVar->isDeclaration()) {
+ // (extern) declarations, no definition or initializer
+ // Currently the only known declaration is for an automatic __local
+ // (.shared) promoted to global.
+ emitPTXGlobalVariable(GVar, O);
+ O << ";\n";
+ return;
+ }
+
+ if (llvm::isSampler(*GVar)) {
+ O << ".global .samplerref " << llvm::getSamplerName(*GVar);
+
+ Constant *Initializer = NULL;
+ if (GVar->hasInitializer())
+ Initializer = GVar->getInitializer();
+ ConstantInt *CI = NULL;
+ if (Initializer)
+ CI = dyn_cast<ConstantInt>(Initializer);
+ if (CI) {
+ unsigned sample=CI->getZExtValue();
+
+ O << " = { ";
+
+ for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
+ __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+ O << "addr_mode_" << i << " = ";
+ switch (addr) {
+ case 0: O << "wrap"; break;
+ case 1: O << "clamp_to_border"; break;
+ case 2: O << "clamp_to_edge"; break;
+ case 3: O << "wrap"; break;
+ case 4: O << "mirror"; break;
+ }
+ O <<", ";
+ }
+ O << "filter_mode = ";
+ switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
+ case 0: O << "nearest"; break;
+ case 1: O << "linear"; break;
+ case 2: assert ( 0 && "Anisotropic filtering is not supported");
+ default: O << "nearest"; break;
+ }
+ if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+ O << ", force_unnormalized_coords = 1";
+ }
+ O << " }";
+ }
+
+ O << ";\n";
+ return;
+ }
+
+ if (GVar->hasPrivateLinkage()) {
+
+ if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
+ return;
+
+ // FIXME - need better way (e.g. Metadata) to avoid generating this global
+ if (!strncmp(GVar->getName().data(), "filename", 8))
+ return;
+ if (GVar->use_empty())
+ return;
+ }
+
+ const Function *demotedFunc = 0;
+ if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
+ O << "// " << GVar->getName().str() << " has been demoted\n";
+ if (localDecls.find(demotedFunc) != localDecls.end())
+ localDecls[demotedFunc].push_back(GVar);
+ else {
+ std::vector<GlobalVariable *> temp;
+ temp.push_back(GVar);
+ localDecls[demotedFunc] = temp;
+ }
+ return;
+ }
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy, false);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+
+ // Ptx allows variable initilization only for constant and global state
+ // spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
+ && GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!Initializer->isNullValue()) {
+ O << " = " ;
+ printScalarConstant(Initializer, O);
+ }
+ }
+ } else {
+ unsigned int ElementSize =0;
+
+ // Although PTX has direct support for struct type and array type and
+ // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
+ // targets that support these high level field accesses. Structs, arrays
+ // and vectors are lowered into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ // Ptx allows variable initilization only for constant and
+ // global state spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
+ && GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!isa<UndefValue>(Initializer) &&
+ !Initializer->isNullValue()) {
+ AggBuffer aggBuffer(ElementSize, O, *this);
+ bufferAggregateConstant(Initializer, &aggBuffer);
+ if (aggBuffer.numSymbols) {
+ if (nvptxSubtarget.is64Bit()) {
+ O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize/8;
+ }
+ else {
+ O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize/4;
+ }
+ O << "]";
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ O << " = {" ;
+ aggBuffer.print();
+ O << "}";
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar) ;
+ if (ElementSize) {
+ O <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
+ if (ElementSize) {
+ O <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ break;
+ default:
+ assert( 0 && "type not supported yet");
+ }
+
+ }
+ O << ";\n";
+}
+
+void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
+ if (localDecls.find(f) == localDecls.end())
+ return;
+
+ std::vector<GlobalVariable *> &gvars = localDecls[f];
+
+ for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+ O << "\t// demoted variable\n\t";
+ printModuleLevelGV(gvars[i], O, true);
+ }
+}
+
+void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
+ raw_ostream &O) const {
+ switch (AddressSpace) {
+ case llvm::ADDRESS_SPACE_LOCAL:
+ O << "local" ;
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ O << "global" ;
+ break;
+ case llvm::ADDRESS_SPACE_CONST:
+ // This logic should be consistent with that in
+ // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
+ if (nvptxSubtarget.hasGenericLdSt())
+ O << "global" ;
+ else
+ O << "const" ;
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << "const" ;
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << "shared" ;
+ break;
+ default:
+ assert(0 && "unexpected address space");
+ }
+}
+
+std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
+ bool useB4PTR) const {
+ switch (Ty->getTypeID()) {
+ default:
+ llvm_unreachable("unexpected type");
+ break;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return "pred";
+ else if (NumBits <= 64) {
+ std::string name = "u";
+ return name + utostr(NumBits);
+ } else {
+ llvm_unreachable("Integer too large");
+ break;
+ }
+ break;
+ }
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ if (nvptxSubtarget.is64Bit())
+ if (useB4PTR) return "b64";
+ else return "u64";
+ else
+ if (useB4PTR) return "b32";
+ else return "u32";
+ }
+ llvm_unreachable("unexpected type");
+ return NULL;
+}
+
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+ raw_ostream &O) {
+
+ const TargetData *TD = TM.getTargetData();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+
+ int64_t ElementSize =0;
+
+ // Although PTX has direct support for struct type and array type and LLVM IR
+ // is very similar to PTX, the LLVM CodeGen does not support for targets that
+ // support these high level field accesses. Structs and arrays are lowered
+ // into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ if (ElementSize) {
+ O << itostr(ElementSize) ;
+ }
+ O << "]";
+ break;
+ default:
+ assert( 0 && "type not supported yet");
+ }
+ return ;
+}
+
+
+static unsigned int
+getOpenCLAlignment(const TargetData *TD,
+ Type *Ty) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
+ return TD->getPrefTypeAlignment(Ty);
+
+ const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (ATy)
+ return getOpenCLAlignment(TD, ATy->getElementType());
+
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (VTy) {
+ Type *ETy = VTy->getElementType();
+ unsigned int numE = VTy->getNumElements();
+ unsigned int alignE = TD->getPrefTypeAlignment(ETy);
+ if (numE == 3)
+ return 4*alignE;
+ else
+ return numE*alignE;
+ }
+
+ const StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy) {
+ unsigned int alignStruct = 1;
+ // Go through each element of the struct and find the
+ // largest alignment.
+ for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+ Type *ETy = STy->getElementType(i);
+ unsigned int align = getOpenCLAlignment(TD, ETy);
+ if (align > alignStruct)
+ alignStruct = align;
+ }
+ return alignStruct;
+ }
+
+ const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ if (FTy)
+ return TD->getPointerPrefAlignment();
+ return TD->getPrefTypeAlignment(Ty);
+}
+
+void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
+ int paramIndex, raw_ostream &O) {
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ else {
+ std::string argName = I->getName();
+ const char *p = argName.c_str();
+ while (*p) {
+ if (*p == '.')
+ O << "_";
+ else
+ O << *p;
+ p++;
+ }
+ }
+}
+
+void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
+ Function::const_arg_iterator I, E;
+ int i = 0;
+
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ return;
+ }
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
+ if (i==paramIndex) {
+ printParamName(I, paramIndex, O);
+ return;
+ }
+ }
+ llvm_unreachable("paramIndex out of bound");
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
+ raw_ostream &O) {
+ const TargetData *TD = TM.getTargetData();
+ const AttrListPtr &PAL = F->getAttributes();
+ const TargetLowering *TLI = TM.getTargetLowering();
+ Function::const_arg_iterator I, E;
+ unsigned paramIndex = 0;
+ bool first = true;
+ bool isKernelFunc = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ MVT thePointerTy = TLI->getPointerTy();
+
+ O << "(\n";
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
+ const Type *Ty = I->getType();
+
+ if (!first)
+ O << ",\n";
+
+ first = false;
+
+ // Handle image/sampler parameters
+ if (llvm::isSampler(*I) || llvm::isImage(*I)) {
+ if (llvm::isImage(*I)) {
+ std::string sname = I->getName();
+ if (llvm::isImageWriteOnly(*I))
+ O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+ else // Default image is read_only
+ O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ else // Should be llvm::isSampler(*I)
+ O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+ << paramIndex;
+ continue;
+ }
+
+ if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) {
+ // Just a scalar
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (isKernelFunc) {
+ if (PTy) {
+ // Special handling for pointer arguments to kernel
+ O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
+
+ if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+ Type *ETy = PTy->getElementType();
+ int addrSpace = PTy->getAddressSpace();
+ switch(addrSpace) {
+ default:
+ O << ".ptr ";
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << ".ptr .const ";
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << ".ptr .shared ";
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ case llvm::ADDRESS_SPACE_CONST:
+ O << ".ptr .global ";
+ break;
+ }
+ O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+ }
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // non-pointer scalar to kernel func
+ O << "\t.param ."
+ << getPTXFundamentalTypeStr(Ty) << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+ // Non-kernel function, just print .param .b<size> for ABI
+ // and .reg .b<size> for non ABY
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32) sz = 32;
+ }
+ else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << "\t.param .b" << sz << " ";
+ else
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // param has byVal attribute. So should be a pointer
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy &&
+ "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI || isKernelFunc) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex+1);
+ unsigned sz = TD->getTypeAllocSize(ETy);
+ O << "\t.param .align " << align
+ << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+ continue;
+ } else {
+ // Split the ETy into constituent parts and
+ // print .param .b<size> <name> for each part.
+ // Further, if a part is vector, print the above for
+ // each vector element.
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, ETy, vtparts);
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ if (j<je-1) O << ",\n";
+ ++paramIndex;
+ }
+ if (i<e-1)
+ O << ",\n";
+ }
+ --paramIndex;
+ continue;
+ }
+ }
+
+ O << "\n)\n";
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ emitFunctionParamList(F, O);
+}
+
+
+void NVPTXAsmPrinter::
+setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Map the global virtual register number to a register class specific
+ // virtual register number starting from 1 with that class.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ //unsigned numRegClasses = TRI->getNumRegClasses();
+
+ // Emit the Fake Stack Object
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+ if (NumBytes) {
+ O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
+ << DEPOTNAME
+ << getFunctionNumber() << "[" << NumBytes << "];\n";
+ if (nvptxSubtarget.is64Bit()) {
+ O << "\t.reg .b64 \t%SP;\n";
+ O << "\t.reg .b64 \t%SPL;\n";
+ }
+ else {
+ O << "\t.reg .b32 \t%SP;\n";
+ O << "\t.reg .b32 \t%SPL;\n";
+ }
+ }
+
+ // Go through all virtual registers to establish the mapping between the
+ // global virtual
+ // register number and the per class virtual register number.
+ // We use the per class virtual register number in the ptx output.
+ unsigned int numVRs = MRI->getNumVirtRegs();
+ for (unsigned i=0; i< numVRs; i++) {
+ unsigned int vr = TRI->index2VirtReg(i);
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
+ std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()];
+ int n = regmap.size();
+ regmap.insert(std::make_pair(vr, n+1));
+ }
+
+ // Emit register declarations
+ // @TODO: Extract out the real register usage
+ O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
+
+ // Emit declaration of the virtual registers or 'physical' registers for
+ // each register class
+ //for (unsigned i=0; i< numRegClasses; i++) {
+ // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i];
+ // const TargetRegisterClass *RC = TRI->getRegClass(i);
+ // std::string rcname = getNVPTXRegClassName(RC);
+ // std::string rcStr = getNVPTXRegClassStr(RC);
+ // //int n = regmap.size();
+ // if (!isNVPTXVectorRegClass(RC)) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<"
+ // << NVPTXNumRegisters << ">;\n";
+ // }
+
+ // Only declare those registers that may be used. And do not emit vector
+ // registers as
+ // they are all elementized to scalar registers.
+ //if (n && !isNVPTXVectorRegClass(RC)) {
+ // if (RegAllocNilUsed) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
+ // << ">;\n";
+ // }
+ // else {
+ // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
+ // << "<" << 32 << ">;\n";
+ // }
+ //}
+ //}
+
+ OutStreamer.EmitRawText(O.str());
+}
+
+
+void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
+ APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
+ bool ignored;
+ unsigned int numHex;
+ const char *lead;
+
+ if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+ numHex = 8;
+ lead = "0f";
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &ignored);
+ } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
+ numHex = 16;
+ lead = "0d";
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ } else
+ llvm_unreachable("unsupported fp type");
+
+ APInt API = APF.bitcastToAPInt();
+ std::string hexstr(utohexstr(API.getZExtValue()));
+ O << lead;
+ if (hexstr.length() < numHex)
+ O << std::string(numHex - hexstr.length(), '0');
+ O << utohexstr(API.getZExtValue());
+}
+
+void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ O << CI->getValue();
+ return;
+ }
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ printFPConstant(CFP, O);
+ return;
+ }
+ if (isa<ConstantPointerNull>(CPV)) {
+ O << "0";
+ return;
+ }
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+ if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ } else {
+ O << *LowerConstant(CPV, *this);
+ return;
+ }
+ }
+ llvm_unreachable("Not scalar type found in printScalarConstant()");
+}
+
+
+void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+ AggBuffer *aggBuffer) {
+
+ const TargetData *TD = TM.getTargetData();
+
+ if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
+ int s = TD->getTypeAllocSize(CPV->getType());
+ if (s<Bytes)
+ s = Bytes;
+ aggBuffer->addZeros(s);
+ return;
+ }
+
+ unsigned char *ptr;
+ switch (CPV->getType()->getTypeID()) {
+
+ case Type::IntegerTyID: {
+ const Type *ETy = CPV->getType();
+ if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+ unsigned char c =
+ (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = &c;
+ aggBuffer->addBytes(ptr, 1, Bytes);
+ } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
+ short int16 =
+ (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = (unsigned char*)&int16;
+ aggBuffer->addBytes(ptr, 2, Bytes);
+ } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ int int32 =(int)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ }
+ else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt =
+ dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
+ Cexpr, TD))) {
+ int int32 =(int)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(4);
+ break;
+ }
+ }
+ assert(0 && "unsupported integer const type");
+ } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ long long int64 =(long long)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ }
+ else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ long long int64 =(long long)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(8);
+ break;
+ }
+ }
+ llvm_unreachable("unsupported integer const type");
+ }
+ else
+ llvm_unreachable("unsupported integer const type");
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const Type* Ty = CFP->getType();
+ if (Ty == Type::getFloatTy(CPV->getContext())) {
+ float float32 = (float)CFP->getValueAPF().convertToFloat();
+ ptr = (unsigned char*)&float32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
+ double float64 = CFP->getValueAPF().convertToDouble();
+ ptr = (unsigned char*)&float64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ }
+ else {
+ llvm_unreachable("unsupported fp const type");
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ aggBuffer->addSymbol(GVar);
+ }
+ else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ }
+ unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ aggBuffer->addZeros(s);
+ break;
+ }
+
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID: {
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
+ isa<ConstantStruct>(CPV)) {
+ int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ bufferAggregateConstant(CPV, aggBuffer);
+ if ( Bytes > ElementSize )
+ aggBuffer->addZeros(Bytes-ElementSize);
+ }
+ else if (isa<ConstantAggregateZero>(CPV))
+ aggBuffer->addZeros(Bytes);
+ else
+ llvm_unreachable("Unexpected Constant type");
+ break;
+ }
+
+ default:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+ AggBuffer *aggBuffer) {
+ const TargetData *TD = TM.getTargetData();
+ int Bytes;
+
+ // Old constants
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
+ if (CPV->getNumOperands())
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(CPV)) {
+ if (CDS->getNumElements())
+ for (unsigned i = 0; i < CDS->getNumElements(); ++i)
+ bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
+ aggBuffer);
+ return;
+ }
+
+
+ if (isa<ConstantStruct>(CPV)) {
+ if (CPV->getNumOperands()) {
+ StructType *ST = cast<StructType>(CPV->getType());
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
+ if ( i == (e - 1))
+ Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
+ TD->getTypeAllocSize(ST)
+ - TD->getStructLayout(ST)->getElementOffset(i);
+ else
+ Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
+ aggBuffer);
+ }
+ }
+ return;
+ }
+ assert(0 && "unsupported constant type in printAggregateConstant()");
+}
+
+// buildTypeNameMap - Run through symbol table looking for type names.
+//
+
+
+bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
+
+ std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
+
+ if (PI != TypeNameMap.end() &&
+ (!PI->second.compare("struct._image1d_t") ||
+ !PI->second.compare("struct._image2d_t") ||
+ !PI->second.compare("struct._image3d_t")))
+ return true;
+
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
+{
+ switch(MI.getOpcode()) {
+ default:
+ return false;
+ case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0:
+ case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32:
+ case NVPTX::CallArgF64: case NVPTX::CallArgI16:
+ case NVPTX::CallArgI32: case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgI64: case NVPTX::CallArgI8:
+ case NVPTX::CallArgParam: case NVPTX::CallVoidInst:
+ case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End:
+ case NVPTX::CallVoidInstReg64:
+ case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst:
+ case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst:
+ case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst:
+ case NVPTX::StoreParamF32: case NVPTX::StoreParamF64:
+ case NVPTX::StoreParamI16: case NVPTX::StoreParamI32:
+ case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
+ case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
+ case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
+ case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
+ case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
+ case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
+ case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
+ case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
+ case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
+ case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
+ case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
+ case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
+ case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
+ case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
+ case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
+ case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
+ case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
+ case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
+ case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
+ case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
+ case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
+ case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
+ case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
+ case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
+ case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
+ case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
+ case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
+ case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
+ case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
+ case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam:
+ case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64:
+ case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32:
+ case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8:
+ case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
+ case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
+ case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
+ case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
+ case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
+ case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
+ case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
+ case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
+ case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
+ case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
+ case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
+ case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
+ case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
+ case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
+ return true;
+ }
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
+
+
+void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
+ std::stringstream temp;
+ LineReader * reader = this->getReader(filename.str());
+ temp << "\n//";
+ temp << filename.str();
+ temp << ":";
+ temp << line;
+ temp << " ";
+ temp << reader->readLine(line);
+ temp << "\n";
+ this->OutStreamer.EmitRawText(Twine(temp.str()));
+}
+
+
+LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
+ if (reader == NULL) {
+ reader = new LineReader(filename);
+ }
+
+ if (reader->fileName() != filename) {
+ delete reader;
+ reader = new LineReader(filename);
+ }
+
+ return reader;
+}
+
+
+std::string
+LineReader::readLine(unsigned lineNum) {
+ if (lineNum < theCurLine) {
+ theCurLine = 0;
+ fstr.seekg(0,std::ios::beg);
+ }
+ while (theCurLine < lineNum) {
+ fstr.getline(buff,500);
+ theCurLine++;
+ }
+ return buff;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
--- /dev/null
+//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXASMPRINTER_H
+#define NVPTXASMPRINTER_H
+
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <fstream>
+
+// The ptx syntax and format is very different from that usually seem in a .s
+// file,
+// therefore we are not able to use the MCAsmStreamer interface here.
+//
+// We are handcrafting the output method here.
+//
+// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
+// (subclass of MCStreamer).
+
+// This is defined in AsmPrinter.cpp.
+// Used to process the constant expressions in initializers.
+namespace nvptx {
+const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
+ llvm::AsmPrinter &AP) ;
+}
+
+namespace llvm {
+
+class LineReader {
+private:
+ unsigned theCurLine ;
+ std::ifstream fstr;
+ char buff[512];
+ std::string theFileName;
+ SmallVector<unsigned, 32> lineOffset;
+public:
+ LineReader(std::string filename) {
+ theCurLine = 0;
+ fstr.open(filename.c_str());
+ theFileName = filename;
+ }
+ std::string fileName() { return theFileName; }
+ ~LineReader() {
+ fstr.close();
+ }
+ std::string readLine(unsigned line);
+};
+
+
+
+class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
+
+
+ class AggBuffer {
+ // Used to buffer the emitted string for initializing global
+ // aggregates.
+ //
+ // Normally an aggregate (array, vector or structure) is emitted
+ // as a u8[]. However, if one element/field of the aggregate
+ // is a non-NULL address, then the aggregate is emitted as u32[]
+ // or u64[].
+ //
+ // We first layout the aggregate in 'buffer' in bytes, except for
+ // those symbol addresses. For the i-th symbol address in the
+ //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
+ // are filled with 0s. symbolPosInBuffer[i-1] records its position
+ // in 'buffer', and Symbols[i-1] records the Value*.
+ //
+ // Once we have this AggBuffer setup, we can choose how to print
+ // it out.
+ public:
+ unsigned size; // size of the buffer in bytes
+ unsigned char *buffer; // the buffer
+ unsigned numSymbols; // number of symbol addresses
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<Value *, 4> Symbols;
+
+ private:
+ unsigned curpos;
+ raw_ostream &O;
+ NVPTXAsmPrinter &AP;
+
+ public:
+ AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
+ :O(_O),AP(_AP) {
+ buffer = new unsigned char[_size];
+ size = _size;
+ curpos = 0;
+ numSymbols = 0;
+ }
+ ~AggBuffer() {
+ delete [] buffer;
+ }
+ unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
+ assert((curpos+Num) <= size);
+ assert((curpos+Bytes) <= size);
+ for ( int i= 0; i < Num; ++i) {
+ buffer[curpos] = Ptr[i];
+ curpos ++;
+ }
+ for ( int i=Num; i < Bytes ; ++i) {
+ buffer[curpos] = 0;
+ curpos ++;
+ }
+ return curpos;
+ }
+ unsigned addZeros(int Num) {
+ assert((curpos+Num) <= size);
+ for ( int i= 0; i < Num; ++i) {
+ buffer[curpos] = 0;
+ curpos ++;
+ }
+ return curpos;
+ }
+ void addSymbol(Value *GVar) {
+ symbolPosInBuffer.push_back(curpos);
+ Symbols.push_back(GVar);
+ numSymbols++;
+ }
+ void print() {
+ if (numSymbols == 0) {
+ // print out in bytes
+ for (unsigned i=0; i<size; i++) {
+ if (i)
+ O << ", ";
+ O << (unsigned int)buffer[i];
+ }
+ }
+ else {
+ // print out in 4-bytes or 8-bytes
+ unsigned int pos = 0;
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ unsigned int nBytes = 4;
+ if (AP.nvptxSubtarget.is64Bit())
+ nBytes = 8;
+ for (pos=0; pos<size; pos+=nBytes) {
+ if (pos)
+ O << ", ";
+ if (pos == nextSymbolPos) {
+ Value *v = Symbols[nSym];
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ MCSymbol *Name = AP.Mang->getSymbol(GVar);
+ O << *Name;
+ }
+ else if (ConstantExpr *Cexpr =
+ dyn_cast<ConstantExpr>(v)) {
+ O << *nvptx::LowerConstant(Cexpr, AP);
+ }
+ else
+ assert(0 && "symbol type unknown");
+ nSym++;
+ if (nSym >= numSymbols)
+ nextSymbolPos = size+1;
+ else
+ nextSymbolPos = symbolPosInBuffer[nSym];
+ }
+ else
+ if (nBytes == 4)
+ O << *(unsigned int*)(buffer+pos);
+ else
+ O << *(unsigned long long*)(buffer+pos);
+ }
+ }
+ }
+ };
+
+ friend class AggBuffer;
+
+ virtual void emitSrcInText(StringRef filename, unsigned line);
+
+private :
+ virtual const char *getPassName() const {
+ return "NVPTX Assembly Printer";
+ }
+
+ const Function *F;
+ std::string CurrentFnName;
+
+ void EmitFunctionEntryLabel();
+ void EmitFunctionBodyStart();
+ void EmitFunctionBodyEnd();
+
+ void EmitInstruction(const MachineInstr *);
+
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
+
+ void printGlobalVariable(const GlobalVariable *GVar);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printVecModifiedImmediate(const MachineOperand &MO,
+ const char *Modifier, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
+ // definition autogenerated.
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+ bool=false);
+ void printParamName(int paramIndex, raw_ostream &O);
+ void printParamName(Function::const_arg_iterator I, int paramIndex,
+ raw_ostream &O);
+ void emitHeader(Module &M, raw_ostream &O);
+ void emitKernelFunctionDirectives(const Function& F,
+ raw_ostream &O) const;
+ void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
+ void emitFunctionExternParamList(const MachineFunction &MF);
+ void emitFunctionParamList(const Function *, raw_ostream &O);
+ void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
+ void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
+ void emitFunctionTempData(const MachineFunction &MF,
+ unsigned &FrameSize);
+ bool isImageType(const Type *Ty);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ void printReturnValStr(const Function *, raw_ostream &O);
+ void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
+
+protected:
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+private:
+ std::string CurrentBankselLabelInBasicBlock;
+
+ // This is specific per MachineFunction.
+ const MachineRegisterInfo *MRI;
+ // The contents are specific for each
+ // MachineFunction. But the size of the
+ // array is not.
+ std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
+ // cache the subtarget here.
+ const NVPTXSubtarget &nvptxSubtarget;
+ // Build the map between type name and ID based on module's type
+ // symbol table.
+ std::map<const Type *, std::string> TypeNameMap;
+
+ // List of variables demoted to a function scope.
+ std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+
+ // To record filename to ID mapping
+ std::map<std::string, unsigned> filenameMap;
+ void recordAndEmitFilenames(Module &);
+
+ void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
+ void emitPTXAddressSpace(unsigned int AddressSpace,
+ raw_ostream &O) const;
+ std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
+ void printScalarConstant(Constant *CPV, raw_ostream &O) ;
+ void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
+ void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
+ void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+
+ void printOperandProper(const MachineOperand &MO);
+
+ void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+ void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclaration(const Function *, raw_ostream &O);
+
+ static const char *getRegisterName(unsigned RegNo);
+ void emitDemotedVars(const Function *, raw_ostream &);
+
+ LineReader *reader;
+ LineReader *getReader(std::string);
+public:
+ NVPTXAsmPrinter(TargetMachine &TM,
+ MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ CurrentBankselLabelInBasicBlock = "";
+ VRidGlobal2LocalMap = NULL;
+ reader = NULL;
+ }
+
+ ~NVPTXAsmPrinter() {
+ if (!reader)
+ delete reader;
+ }
+
+ bool ignoreLoc(const MachineInstr &);
+
+ virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
+
+ DebugLoc prevDebugLoc;
+ void emitLineNumberAsDotLoc(const MachineInstr &);
+};
+} // end of namespace
+
+#endif
--- /dev/null
+//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXFrameLowering.h"
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
+ return true;
+}
+
+void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+ if (MF.getFrameInfo()->hasStackObjects()) {
+ MachineBasicBlock &MBB = MF.front();
+ // Insert "mov.u32 %SP, %Depot"
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ // This instruction really occurs before first instruction
+ // in the BB, so giving it no debug location.
+ DebugLoc dl = DebugLoc();
+
+ if (tm.getSubtargetImpl()->hasGenericLdSt()) {
+ // mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
+ if (is64bit) {
+ MachineInstr *MI = BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
+ .addReg(NVPTX::VRDepot);
+ } else {
+ MachineInstr *MI = BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
+ .addReg(NVPTX::VRDepot);
+ }
+ }
+ else {
+ // mov %SP, %depot;
+ if (is64bit)
+ BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
+ .addReg(NVPTX::VRDepot);
+ else
+ BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
+ .addReg(NVPTX::VRDepot);
+ }
+ }
+}
+
+void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
--- /dev/null
+//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_FRAMELOWERING_H
+#define NVPTX_FRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+
+namespace llvm {
+class NVPTXTargetMachine;
+
+class NVPTXFrameLowering : public TargetFrameLowering {
+ NVPTXTargetMachine &tm;
+ bool is64bit;
+
+public:
+ explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
+ tm(_tm), is64bit(_is64bit) {}
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
--- /dev/null
+//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "NVPTXISelDAGToDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/GlobalValue.h"
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-isel"
+
+using namespace llvm;
+
+
+static cl::opt<bool>
+UseFMADInstruction("nvptx-mad-enable",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+ cl::init(false));
+
+static cl::opt<int>
+FMAContractLevel("nvptx-fma-level",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
+
+static cl::opt<int>
+UsePrecDivF32("nvptx-prec-divf32",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if avaiable."),
+ cl::init(2));
+
+/// createNVPTXISelDag - This pass converts a legalized DAG into a
+/// NVPTX-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new NVPTXDAGToDAGISel(TM, OptLevel);
+}
+
+
+NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+: SelectionDAGISel(tm, OptLevel),
+ Subtarget(tm.getSubtarget<NVPTXSubtarget>())
+{
+ // Always do fma.f32 fpcontract if the target supports the instruction.
+ // Always do fma.f64 fpcontract if the target supports the instruction.
+ // Do mad.f32 is nvptx-mad-enable is specified and the target does not
+ // support fma.f32.
+
+ doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
+ doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
+ (FMAContractLevel>=1);
+ doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
+ (FMAContractLevel>=1);
+ doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
+ (FMAContractLevel==2);
+ doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
+ (FMAContractLevel==2);
+
+ allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
+
+ doMulWide = (OptLevel > 0);
+
+ // Decide how to translate f32 div
+ do_DIVF32_PREC = UsePrecDivF32;
+ // sm less than sm_20 does not support div.rnd. Use div.full.
+ if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
+ do_DIVF32_PREC = 1;
+
+}
+
+/// Select - Select instructions not customized! Used for
+/// expanded, promoted and normal instructions.
+SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ SDNode *ResNode = NULL;
+ switch (N->getOpcode()) {
+ case ISD::LOAD:
+ ResNode = SelectLoad(N);
+ break;
+ case ISD::STORE:
+ ResNode = SelectStore(N);
+ break;
+ }
+ if (ResNode)
+ return ResNode;
+ return SelectCode(N);
+}
+
+
+static unsigned int
+getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
+{
+ const Value *Src = N->getSrcValue();
+ if (!Src)
+ return NVPTX::PTXLdStInstCode::LOCAL;
+
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ switch (PT->getAddressSpace()) {
+ case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_CONST:
+ // If the arch supports generic address space, translate it to GLOBAL
+ // for correctness.
+ // If the arch does not support generic address space, then the arch
+ // does not really support ADDRESS_SPACE_CONST, translate it to
+ // to CONSTANT for better performance.
+ if (Subtarget.hasGenericLdSt())
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ else
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ default: break;
+ }
+ }
+ return NVPTX::PTXLdStInstCode::LOCAL;
+}
+
+
+SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDNode *NVPTXLD= NULL;
+
+ // do not support pre/post inc/dec
+ if (LD->isIndexed())
+ return NULL;
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = LD->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int fromType;
+ if ((LD->getExtensionType() == ISD::SEXTLOAD))
+ fromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ fromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ fromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N1, Addr)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Addr, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 7);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N1.getNode(), N1, Base, Offset):
+ SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N1.getNode(), N1, Base, Offset):
+ SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 8);
+ }
+ else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ N1, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 7);
+ }
+
+ if (NVPTXLD != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXLD;
+}
+
+SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ EVT StoreVT = ST->getMemoryVT();
+ SDNode *NVPTXST = NULL;
+
+ // do not support pre/post inc/dec
+ if (ST->isIndexed())
+ return NULL;
+
+ if (!StoreVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = ST->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = StoreVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ //
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int toType;
+ if (ScalarVT.isFloatingPoint())
+ toType = NVPTX::PTXLdStInstCode::Float;
+ else
+ toType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType SourceVT =
+ N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Addr, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N2.getNode(), N2, Base, Offset):
+ SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 9);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N2.getNode(), N2, Base, Offset):
+ SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 9);
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ N2, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 8);
+ }
+
+ if (NVPTXST != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXST;
+}
+
+// SelectDirectAddr - Match a direct address for DAG.
+// A direct address could be a globaladdress or externalsymbol.
+bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
+ // Return true if TGA or ES.
+ if (N.getOpcode() == ISD::TargetGlobalAddress
+ || N.getOpcode() == ISD::TargetExternalSymbol) {
+ Address = N;
+ return true;
+ }
+ if (N.getOpcode() == NVPTXISD::Wrapper) {
+ Address = N.getOperand(0);
+ return true;
+ }
+ if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
+ if (IID == Intrinsic::nvvm_ptr_gen_to_param)
+ if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
+ return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset,
+ MVT mvt) {
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ SDValue base=Addr.getOperand(0);
+ if (SelectDirectAddr(base, Base)) {
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset,
+ MVT mvt) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ Offset = CurDAG->getTargetConstant(0, mvt);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
+ return false;
+ }
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ else
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ return false;
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
+ unsigned int spN) const {
+ const Value *Src = NULL;
+ // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
+ // the classof() for MemSDNode does not include MemIntrinsicSDNode
+ // (See SelectionDAGNodes.h). So we need to check for both.
+ if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
+ Src = mN->getSrcValue();
+ }
+ else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ Src = mN->getSrcValue();
+ }
+ if (!Src)
+ return false;
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return (PT->getAddressSpace() == spN);
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (SelectDirectAddr(Op, Op0)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ }
+ if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+ break;
+ }
+ return true;
+}
+
+// Return true if N is a undef or a constant.
+// If N was undef, return a (i8imm 0) in Retval
+// If N was imm, convert it to i8imm and return in Retval
+// Note: The convert to i8imm is required, otherwise the
+// pattern matcher inserts a bunch of IMOVi8rr to convert
+// the imm to i8imm, and this causes instruction selection
+// to fail.
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
+ SDValue &Retval) {
+ if (!(N.getOpcode() == ISD::UNDEF) &&
+ !(N.getOpcode() == ISD::Constant))
+ return false;
+
+ if (N.getOpcode() == ISD::UNDEF)
+ Retval = CurDAG->getTargetConstant(0, MVT::i8);
+ else {
+ ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
+ unsigned retval = cn->getZExtValue();
+ Retval = CurDAG->getTargetConstant(retval, MVT::i8);
+ }
+ return true;
+}
--- /dev/null
+//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nvptx-isel"
+
+#include "NVPTX.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Intrinsics.h"
+using namespace llvm;
+
+namespace {
+
+class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+
+ // If true, generate corresponding FPCONTRACT. This is
+ // language dependent (i.e. CUDA and OpenCL works differently).
+ bool doFMADF32;
+ bool doFMAF64;
+ bool doFMAF32;
+ bool doFMAF64AGG;
+ bool doFMAF32AGG;
+ bool allowFMA;
+
+ // 0: use div.approx
+ // 1: use div.full
+ // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
+ // Otherwise, use div.full
+ int do_DIVF32_PREC;
+
+ // If true, add .ftz to f32 instructions.
+ // This is only meaningful for sm_20 and later, as the default
+ // is not ftz.
+ // For sm earlier than sm_20, f32 denorms are always ftz by the
+ // hardware.
+ // We always add the .ftz modifier regardless of the sm value
+ // when Use32FTZ is true.
+ bool UseF32FTZ;
+
+ // If true, generate mul.wide from sext and mul
+ bool doMulWide;
+
+public:
+ explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel);
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "NVPTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ const NVPTXSubtarget &Subtarget;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+private:
+ // Include the pieces autogenerated from the target description.
+#include "NVPTXGenDAGISel.inc"
+
+ SDNode *Select(SDNode *N);
+ SDNode* SelectLoad(SDNode *N);
+ SDNode* SelectStore(SDNode *N);
+
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Match direct address complex pattern.
+ bool SelectDirectAddr(SDValue N, SDValue &Address);
+
+ bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+
+ bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
+
+ bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
+
+};
+}
--- /dev/null
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "NVPTX.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetObjectFile.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+#include <sstream>
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-lower"
+
+using namespace llvm;
+
+static unsigned int uniqueCallSite = 0;
+
+static cl::opt<bool>
+RetainVectorOperands("nvptx-codegen-vectors",
+ cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
+ cl::init(true));
+
+static cl::opt<bool>
+sched4reg("nvptx-sched4reg",
+ cl::desc("NVPTX Specific: schedule for register pressue"),
+ cl::init(false));
+
+// NVPTXTargetLowering Constructor.
+NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
+: TargetLowering(TM, new NVPTXTargetObjectFile()),
+ nvTM(&TM),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+
+ // always lower memset, memcpy, and memmove intrinsics to load/store
+ // instructions, rather
+ // then generating calls to memset, mempcy or memmove.
+ maxStoresPerMemset = (unsigned)0xFFFFFFFF;
+ maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
+ maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ // Jump is Expensive. Don't create extra control flow for 'and', 'or'
+ // condition branches.
+ setJumpIsExpensive(true);
+
+ // By default, use the Source scheduling
+ if (sched4reg)
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Source);
+
+ addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
+ addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
+ addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
+ addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
+ addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
+
+ if (RetainVectorOperands) {
+ addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
+ addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
+ addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
+ addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
+ addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
+ addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
+ addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
+ addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
+ addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
+ addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
+ }
+
+ // Operations not directly supported by NVPTX.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ if (nvptxSubtarget.hasROT64()) {
+ setOperationAction(ISD::ROTL , MVT::i64, Legal);
+ setOperationAction(ISD::ROTR , MVT::i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ }
+ if (nvptxSubtarget.hasROT32()) {
+ setOperationAction(ISD::ROTL , MVT::i32, Legal);
+ setOperationAction(ISD::ROTR , MVT::i32, Legal);
+ }
+ else {
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::ROTL , MVT::i16, Expand);
+ setOperationAction(ISD::ROTR , MVT::i16, Expand);
+ setOperationAction(ISD::ROTL , MVT::i8, Expand);
+ setOperationAction(ISD::ROTR , MVT::i8, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+
+ // Indirect branch is not supported.
+ // This also disables Jump Table creation.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+
+ // We want to legalize constant related memmove and memcopy
+ // intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PTX does not support load / store predicate registers
+ setOperationAction(ISD::LOAD, MVT::i1, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setOperationAction(ISD::STORE, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ // This is legal in NVPTX
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+
+ // TRAP can be lowered to PTX trap
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // By default, CONCAT_VECTORS is implemented via store/load
+ // through stack. It is slow and uses local memory. We need
+ // to custom-lowering them.
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
+
+ // Expand vector int to float and float to int conversions
+ // - For SINT_TO_FP and UINT_TO_FP, the src type
+ // (Node->getOperand(0).getValueType())
+ // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
+ // the dest type (Node->getValueType(0)) is used.
+ //
+ // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
+ // case, and
+ // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
+ //
+ // That is why v4i32 or v2i32 are used here.
+ //
+ // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
+ // (LegalizeVectorOps.cpp).
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+
+ // Now deduce the information based on the above mentioned
+ // actions
+ computeRegisterProperties();
+}
+
+
+const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case NVPTXISD::CALL: return "NVPTXISD::CALL";
+ case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
+ case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
+ case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
+ case NVPTXISD::DeclareScalarParam:
+ return "NVPTXISD::DeclareScalarParam";
+ case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
+ case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
+ case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
+ case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
+ case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
+ case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
+ case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
+ case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
+ case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
+ case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
+ case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
+ case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
+ case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
+ case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
+ case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
+ case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
+ case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
+ case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
+ case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
+ case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
+ case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
+ case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
+ case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
+ }
+}
+
+
+SDValue
+NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+}
+
+std::string NVPTXTargetLowering::getPrototype(Type *retTy,
+ const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ unsigned retAlignment) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::stringstream O;
+ O << "prototype_" << uniqueCallSite << " : .callprototype ";
+
+ if (retTy->getTypeID() == Type::VoidTyID)
+ O << "()";
+ else {
+ O << "(";
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ size = ITy->getBitWidth();
+ if (size < 32) size = 32;
+ }
+ else {
+ assert(retTy->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = retTy->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " _";
+ }
+ else if (isa<PointerType>(retTy))
+ O << ".param .b" << getPointerTy().getSizeInBits()
+ << " _";
+ else {
+ if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ totalsz += sz/8;
+ }
+ }
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << totalsz << "]";
+ }
+ else {
+ assert(false &&
+ "Unknown return type");
+ }
+ }
+ }
+ else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned idx = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " _";
+ if (j<je-1) O << ", ";
+ ++idx;
+ }
+ if (i < e-1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ }
+ O << "_ (";
+
+ bool first = true;
+ MVT thePointerTy = getPointerTy();
+
+ for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+ const Type *Ty = Args[i].Ty;
+ if (!first) {
+ O << ", ";
+ }
+ first = false;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32) sz = 32;
+ }
+ else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << ".param .b" << sz << " ";
+ else
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ continue;
+ }
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy &&
+ "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI) {
+ unsigned align = Outs[i].Flags.getByValAlign();
+ unsigned sz = getTargetData()->getTypeAllocSize(ETy);
+ O << ".param .align " << align
+ << " .b8 ";
+ O << "_";
+ O << "[" << sz << "]";
+ continue;
+ }
+ else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, ETy, vtparts);
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ if (j<je-1) O << ", ";
+ }
+ if (i<e-1)
+ O << ", ";
+ }
+ continue;
+ }
+ }
+ O << ");";
+ return O.str();
+}
+
+
+#if 0
+SDValue
+NVPTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals, Type *retTy,
+ const ArgListTy &Args) const {
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ SDValue tempChain = Chain;
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, true));
+ SDValue InFlag = Chain.getValue(1);
+
+ assert((Outs.size() == Args.size()) &&
+ "Unexpected number of arguments to function call");
+ unsigned paramCount = 0;
+ // Declare the .params or .reg need to pass values
+ // to the function
+ for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ EVT VT = Outs[i].VT;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ // Plain scalar
+ // for ABI, declare .param .b<size> .param<n>;
+ // for nonABI, declare .reg .b<size> .param<n>;
+ unsigned isReg = 1;
+ if (isABI)
+ isReg = 0;
+ unsigned sz = VT.getSizeInBits();
+ if (VT.isInteger() && (sz < 32)) sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(isReg, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+
+ unsigned opcode = NVPTXISD::StoreParam;
+ if (isReg)
+ opcode = NVPTXISD::MoveToParam;
+ else {
+ if (Outs[i].Flags.isZExt())
+ opcode = NVPTXISD::StoreParamU32;
+ else if (Outs[i].Flags.isSExt())
+ opcode = NVPTXISD::StoreParamS32;
+ }
+ Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
+
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ continue;
+ }
+ // struct or vector
+ SmallVector<EVT, 16> vtparts;
+ const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ assert(PTy &&
+ "Type of a byval parameter should be pointer");
+ ComputeValueVTs(*this, PTy->getElementType(), vtparts);
+
+ if (isABI) {
+ // declare .param .align 16 .b8 .param<n>[<size>];
+ unsigned sz = Outs[i].Flags.getByValSize();
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
+ // don't need to
+ // worry about natural alignment or not. See TargetLowering::LowerCallTo()
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ unsigned curOffset = 0;
+ for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ OutVals[i],
+ DAG.getConstant(curOffset,
+ getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
+ MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ theVal, InFlag };
+ Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ curOffset += sz/8;
+ }
+ }
+ ++paramCount;
+ continue;
+ }
+ // Non-abi, struct or vector
+ // Declare a bunch or .reg .b<size> .param<n>
+ unsigned curOffset = 0;
+ for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
+ MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset,
+ getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), theVal,
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ }
+ }
+ }
+
+ GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+ unsigned retAlignment = 0;
+
+ // Handle Result
+ unsigned retCount = 0;
+ if (Ins.size() > 0) {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
+ // individual .reg .b<size> func_retval<0..> for non ABI
+ unsigned resultsz = 0;
+ for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = resvtparts[i];
+ if (resvtparts[i].isVector()) {
+ elems = resvtparts[i].getVectorNumElements();
+ elemtype = resvtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (isABI == false) {
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ }
+ else {
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ }
+ if (isABI == false) {
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(retCount, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ ++retCount;
+ }
+ resultsz += sz;
+ }
+ }
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
+ retTy->isPointerTy() ) {
+ // Scalar needs to be at least 32bit wide
+ if (resultsz < 32)
+ resultsz = 32;
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resultsz, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ else {
+ // @TODO: Re-enable getAlign calls. We do not have the
+ // ImmutableCallSite object here anymore.
+ //if (Func) { // direct call
+ //if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
+ //retAlignment = TD->getABITypeAlignment(retTy);
+ //}
+ //else { // indirect call
+ //const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
+ //if (!llvm::getAlign(*CallI, 0, retAlignment))
+ //retAlignment = TD->getABITypeAlignment(retTy);
+ //}
+ // @TODO: Remove this hack!
+ // Functions with explicit alignment metadata will be broken, for now.
+ retAlignment = 16;
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
+ MVT::i32),
+ DAG.getConstant(resultsz/8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ }
+ }
+
+ if (!Func) {
+ // This is indirect function call case : PTX requires a prototype of the
+ // form
+ // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
+ // to be emitted, and the label has to used as the last arg of call
+ // instruction.
+ // The prototype is embedded in a string and put as the operand for an
+ // INLINEASM SDNode.
+ SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
+ const char *asmstr = nvTM->getManagedStrPool()->
+ getManagedString(proto_string.c_str())->c_str();
+ SDValue InlineAsmOps[] = { Chain,
+ DAG.getTargetExternalSymbol(asmstr,
+ getPointerTy()),
+ DAG.getMDNode(0),
+ DAG.getTargetConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ // Op to just print "call"
+ SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrintCallOps[] = { Chain,
+ DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
+ : retCount, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
+ PrintCallVTs, PrintCallOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the function name
+ SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallVoidOps[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the param list
+ SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgBeginOps[] = { Chain, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
+ CallArgBeginOps, 2);
+ InFlag = Chain.getValue(1);
+
+ for (unsigned i=0, e=paramCount; i!=e; ++i) {
+ unsigned opcode;
+ if (i==(e-1))
+ opcode = NVPTXISD::LastCallArg;
+ else
+ opcode = NVPTXISD::CallArg;
+ SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(i, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
+ InFlag = Chain.getValue(1);
+ }
+ SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgEndOps[] = { Chain,
+ DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
+ 3);
+ InFlag = Chain.getValue(1);
+
+ if (!Func) {
+ SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrototypeOps[] = { Chain,
+ DAG.getConstant(uniqueCallSite, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Generate loads from param memory/moves from registers for result
+ if (Ins.size() > 0) {
+ if (isABI) {
+ unsigned resoffset = 0;
+ for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ unsigned sz = Ins[i].VT.getSizeInBits();
+ if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
+ std::vector<EVT> LoadRetVTs;
+ LoadRetVTs.push_back(Ins[i].VT);
+ LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
+ &LoadRetOps[0], LoadRetOps.size());
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ InVals.push_back(retval);
+ resoffset += sz/8;
+ }
+ }
+ else {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ assert(Ins.size() == resvtparts.size() &&
+ "Unexpected number of return values in non-ABI case");
+ unsigned paramNum = 0;
+ for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ assert(EVT(Ins[i].VT) == resvtparts[i] &&
+ "Unexpected EVT type in non-ABI case");
+ unsigned numelems = 1;
+ EVT elemtype = Ins[i].VT;
+ if (Ins[i].VT.isVector()) {
+ numelems = Ins[i].VT.getVectorNumElements();
+ elemtype = Ins[i].VT.getVectorElementType();
+ }
+ std::vector<SDValue> tempRetVals;
+ for (unsigned j=0; j<numelems; ++j) {
+ std::vector<EVT> MoveRetVTs;
+ MoveRetVTs.push_back(elemtype);
+ MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> MoveRetOps;
+ MoveRetOps.push_back(Chain);
+ MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
+ MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
+ MoveRetOps.push_back(InFlag);
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
+ &MoveRetOps[0], MoveRetOps.size());
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ tempRetVals.push_back(retval);
+ ++paramNum;
+ }
+ if (Ins[i].VT.isVector())
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
+ &tempRetVals[0], tempRetVals.size()));
+ else
+ InVals.push_back(tempRetVals[0]);
+ }
+ }
+ }
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, true),
+ DAG.getIntPtrConstant(uniqueCallSite+1, true),
+ InFlag);
+ uniqueCallSite++;
+
+ // set isTailCall to false for now, until we figure out how to express
+ // tail call optimization in PTX
+ isTailCall = false;
+ return Chain;
+}
+#endif
+
+// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
+// (see LegalizeDAG.cpp). This is slow and uses local memory.
+// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
+SDValue NVPTXTargetLowering::
+LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumOperands = Node->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue SubOp = Node->getOperand(i);
+ EVT VVT = SubOp.getNode()->getValueType(0);
+ EVT EltVT = VVT.getVectorElementType();
+ unsigned NumSubElem = VVT.getVectorNumElements();
+ for (unsigned j=0; j < NumSubElem; ++j) {
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
+ DAG.getIntPtrConstant(j)));
+ }
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+SDValue NVPTXTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return Op;
+ case ISD::BUILD_VECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ return Op;
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ default:
+ assert(0 && "Custom lowering not defined for operation");
+ }
+}
+
+SDValue
+NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
+ EVT v) const {
+ std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
+ std::stringstream suffix;
+ suffix << idx;
+ *name += suffix.str();
+ return DAG.getTargetExternalSymbol(name->c_str(), v);
+}
+
+SDValue
+NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
+ return getExtSymb(DAG, ".PARAM", idx, v);
+}
+
+SDValue
+NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+ return getExtSymb(DAG, ".HLPPARAM", idx);
+}
+
+// Check to see if the kernel argument is image*_t or sampler_t
+
+bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
+ const char *specialTypes[] = {
+ "struct._image2d_t",
+ "struct._image3d_t",
+ "struct._sampler_t"
+ };
+
+ const Type *Ty = arg->getType();
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+
+ if (!PTy)
+ return false;
+
+ if (!context)
+ return false;
+
+ const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ const std::string TypeName = STy ? STy->getName() : "";
+
+ for (int i=0, e=sizeof(specialTypes)/sizeof(specialTypes[0]); i!=e; ++i)
+ if (TypeName == specialTypes[i])
+ return true;
+
+ return false;
+}
+
+SDValue
+NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetData *TD = getTargetData();
+
+ const Function *F = MF.getFunction();
+ const AttrListPtr &PAL = F->getAttributes();
+
+ SDValue Root = DAG.getRoot();
+ std::vector<SDValue> OutChains;
+
+ bool isKernel = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::vector<Type *> argTypes;
+ std::vector<const Argument *> theArgs;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ theArgs.push_back(I);
+ argTypes.push_back(I->getType());
+ }
+ assert(argTypes.size() == Ins.size() &&
+ "Ins types and function types did not match");
+
+ int idx = 0;
+ for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+ Type *Ty = argTypes[i];
+ EVT ObjectVT = getValueType(Ty);
+ assert(ObjectVT == Ins[i].VT &&
+ "Ins type did not match function type");
+
+ // If the kernel argument is image*_t or sampler_t, convert it to
+ // a i32 constant holding the parameter position. This can later
+ // matched in the AsmPrinter to output the correct mangled name.
+ if (isImageOrSamplerVal(theArgs[i],
+ (theArgs[i]->getParent() ?
+ theArgs[i]->getParent()->getParent() : 0))) {
+ assert(isKernel && "Only kernels can have image/sampler params");
+ InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+ continue;
+ }
+
+ if (theArgs[i]->use_empty()) {
+ // argument is dead
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ continue;
+ }
+
+ // In the following cases, assign a node order of "idx+1"
+ // to newly created nodes. The SDNOdes for params have to
+ // appear in the same order as their order of appearance
+ // in the original function. "idx+1" holds that order.
+ if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) {
+ // A plain scalar.
+ if (isABI || isKernel) {
+ // If ABI, load from the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx);
+ Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
+ F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
+ SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
+ MachinePointerInfo(srcValue), false, false,
+ false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
+ F->getContext())));
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ InVals.push_back(p);
+ }
+ else {
+ // If no ABI, just move the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ InVals.push_back(p);
+ }
+ continue;
+ }
+
+ // Param has ByVal attribute
+ if (isABI || isKernel) {
+ // Return MoveParam(param symbol).
+ // Ideally, the param symbol can be returned directly,
+ // but when SDNode builder decides to use it in a CopyToReg(),
+ // machine instruction fails because TargetExternalSymbol
+ // (not lowered) is target dependent, and CopyToReg assumes
+ // the source is lowered.
+ SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ if (isKernel)
+ InVals.push_back(p);
+ else {
+ SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
+ p);
+ InVals.push_back(p2);
+ }
+ } else {
+ // Have to move a set of param symbols to registers and
+ // store them locally and return the local pointer in InVals
+ const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
+ assert(elemPtrType &&
+ "Byval parameter should be a pointer type");
+ Type *elemType = elemPtrType->getElementType();
+ // Compute the constituent parts
+ SmallVector<EVT, 16> vtparts;
+ SmallVector<uint64_t, 16> offsets;
+ ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
+ unsigned totalsize = 0;
+ for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+ totalsize += vtparts[j].getStoreSizeInBits();
+ SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
+ CreateStackObject(totalsize/8, 16, false),
+ getPointerTy());
+ unsigned sizesofar = 0;
+ std::vector<SDValue> theChains;
+ for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+ unsigned numElems = 1;
+ if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
+ for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+ EVT tmpvt = vtparts[j];
+ if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+ SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
+ getParamSymbol(DAG, idx, tmpvt));
+ SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+ DAG.getConstant(sizesofar, getPointerTy()));
+ theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
+ MachinePointerInfo(), false, false, 0));
+ sizesofar += tmpvt.getStoreSizeInBits()/8;
+ ++idx;
+ }
+ }
+ --idx;
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
+ theChains.size());
+ InVals.push_back(localcopy);
+ }
+ }
+
+ // Clang will check explicit VarArg and issue error if any. However, Clang
+ // will let code with
+ // implicit var arg like f() pass.
+ // We treat this case as if the arg list is empty.
+ //if (F.isVarArg()) {
+ // assert(0 && "VarArg not supported yet!");
+ //}
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size()));
+
+ return Chain;
+}
+
+SDValue
+NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ unsigned sizesofar = 0;
+ unsigned idx = 0;
+ for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ SDValue theVal = OutVals[i];
+ EVT theValType = theVal.getValueType();
+ unsigned numElems = 1;
+ if (theValType.isVector()) numElems = theValType.getVectorNumElements();
+ for (unsigned j=0,je=numElems; j!=je; ++j) {
+ SDValue tmpval = theVal;
+ if (theValType.isVector())
+ tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ theValType.getVectorElementType(),
+ tmpval, DAG.getIntPtrConstant(j));
+ Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
+ dl, MVT::Other,
+ Chain,
+ DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+ tmpval);
+ if (theValType.isVector())
+ sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+ else
+ sizesofar += theValType.getStoreSizeInBits()/8;
+ ++idx;
+ }
+ }
+
+ return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+void
+NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const
+{
+ if (Constraint.length() > 1)
+ return;
+ else
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// NVPTX suuport vector of legal types of any length in Intrinsics because the
+// NVPTX specific type legalizer
+// will legalize them to the PTX supported length.
+bool
+NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+ if (isTypeLegal(VT))
+ return true;
+ if (VT.isVector()) {
+ MVT eVT = VT.getVectorElementType();
+ if (isTypeLegal(eVT))
+ return true;
+ }
+ return false;
+}
+
+
+// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
+// TgtMemIntrinsic
+// because we need the information that is only available in the "Value" type
+// of destination
+// pointer. In particular, the address space information.
+bool
+NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+
+ case Intrinsic::nvvm_atomic_load_add_f32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_atomic_load_inc_32:
+ case Intrinsic::nvvm_atomic_load_dec_32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
+ Info.memVT = MVT::i32;
+ else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 0;
+ return true;
+
+ }
+ return false;
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+/// Used to guide target specific optimizations, like loop strength reduction
+/// (LoopStrengthReduce.cpp) and memory optimization for address mode
+/// (CodeGenPrepare.cpp)
+bool
+NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+
+ // AddrMode - This represents an addressing mode of:
+ // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+ //
+ // The legal address modes are
+ // - [avar]
+ // - [areg]
+ // - [areg+immoff]
+ // - [immAddr]
+
+ if (AM.BaseGV) {
+ if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
+ return false;
+ return true;
+ }
+
+ switch (AM.Scale) {
+ case 0: // "r", "r+i" or "i" is allowed
+ break;
+ case 1:
+ if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
+ return false;
+ // Otherwise we have r+i.
+ break;
+ default:
+ // No scale > 1 is allowed
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+NVPTXTargetLowering::ConstraintType
+NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'r':
+ case 'h':
+ case 'c':
+ case 'l':
+ case 'f':
+ case 'd':
+ case '0':
+ case 'N':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*>
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'c':
+ return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
+ case 'h':
+ return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
+ case 'r':
+ return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
+ case 'l':
+ case 'N':
+ return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
+ case 'f':
+ return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
+ case 'd':
+ return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
+ return 4;
+}
--- /dev/null
+//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXISELLOWERING_H
+#define NVPTXISELLOWERING_H
+
+#include "NVPTX.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace NVPTXISD {
+enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ Wrapper,
+ CALL,
+ RET_FLAG,
+ LOAD_PARAM,
+ NVBuiltin,
+ DeclareParam,
+ DeclareScalarParam,
+ DeclareRetParam,
+ DeclareRet,
+ DeclareScalarRet,
+ LoadParam,
+ StoreParam,
+ StoreParamS32, // to sext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
+ MoveToParam,
+ PrintCall,
+ PrintCallUni,
+ CallArgBegin,
+ CallArg,
+ LastCallArg,
+ CallArgEnd,
+ CallVoid,
+ CallVal,
+ CallSymbol,
+ Prototype,
+ MoveParam,
+ MoveRetval,
+ MoveToRetval,
+ StoreRetval,
+ PseudoUseParam,
+ RETURN,
+ CallSeqBegin,
+ CallSeqEnd,
+ Dummy
+};
+}
+
+//===--------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===--------------------------------------------------------------------===//
+class NVPTXTargetLowering : public TargetLowering {
+public:
+ explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ bool isTypeSupportedInIntrinsic(MVT VT) const;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ unsigned Intrinsic) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type
+ /// Used to guide target specific optimizations, like loop strength
+ /// reduction (LoopStrengthReduce.cpp) and memory optimization for
+ /// address mode (CodeGenPrepare.cpp)
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // This will be re-added once the necessary changes to LowerCallTo are
+ // upstreamed.
+ // virtual SDValue
+ // LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ // bool isVarArg, bool doesNotRet, bool &isTailCall,
+ // const SmallVectorImpl<ISD::OutputArg> &Outs,
+ // const SmallVectorImpl<SDValue> &OutVals,
+ // const SmallVectorImpl<ISD::InputArg> &Ins,
+ // DebugLoc dl, SelectionDAG &DAG,
+ // SmallVectorImpl<SDValue> &InVals,
+ // Type *retTy, const ArgListTy &Args) const;
+
+ std::string getPrototype(Type *, const ArgListTy &,
+ const SmallVectorImpl<ISD::OutputArg> &,
+ unsigned retAlignment) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ NVPTXTargetMachine *nvTM;
+
+ // PTX always uses 32-bit shift amounts
+ virtual MVT getShiftAmountTy(EVT LHSTy) const {
+ return MVT::i32;
+ }
+
+private:
+ const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+
+ SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
+ MVT::i32) const;
+ SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
+ SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
+
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+};
+} // namespace llvm
+
+#endif // NVPTXISELLOWERING_H
--- /dev/null
+//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe NVPTX instructions format
+//
+//===----------------------------------------------------------------------===//
+
+// Vector instruction type enum
+class VecInstTypeEnum<bits<4> val> {
+ bits<4> Value=val;
+}
+def VecNOP : VecInstTypeEnum<0>;
+
+// Generic NVPTX Format
+
+class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<14> Inst;
+
+ let Namespace = "NVPTX";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+
+ // TSFlagFields
+ bits<4> VecInstType = VecNOP.Value;
+ bit IsSimpleMove = 0;
+ bit IsLoad = 0;
+ bit IsStore = 0;
+
+ let TSFlags{3-0} = VecInstType;
+ let TSFlags{4-4} = IsSimpleMove;
+ let TSFlags{5-5} = IsLoad;
+ let TSFlags{6-6} = IsStore;
+}
--- /dev/null
+//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXTargetMachine.h"
+#define GET_INSTRINFO_CTOR
+#include "NVPTXGenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <cstdio>
+
+
+using namespace llvm;
+
+// FIXME: Add the subtarget support on this constructor.
+NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
+: NVPTXGenInstrInfo(),
+ TM(tm),
+ RegInfo(*this, *TM.getSubtargetImpl()) {}
+
+
+void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
+ NVPTX::Int32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
+ NVPTX::Int8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
+ NVPTX::Int1RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
+ NVPTX::Float32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
+ NVPTX::Int16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
+ NVPTX::Int64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
+ NVPTX::Float64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
+ NVPTX::V4F32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
+ NVPTX::V2F32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
+ NVPTX::V2F64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else {
+ assert(0 && "Don't know how to copy a register");
+ }
+}
+
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg,
+ unsigned &DestReg) const {
+ // Look for the appropriate part of TSFlags
+ bool isMove = false;
+
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
+ NVPTX::SimpleMoveShift;
+ isMove = (TSFlags == 1);
+
+ if (isMove) {
+ MachineOperand dest = MI.getOperand(0);
+ MachineOperand src = MI.getOperand(1);
+ assert(dest.isReg() && "dest of a movrr is not a reg");
+ assert(src.isReg() && "src of a movrr is not a reg");
+
+ SrcReg = src.getReg();
+ DestReg = dest.getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
+{
+ switch (MI.getOpcode()) {
+ default: return false;
+ case NVPTX::INT_PTX_SREG_NTID_X:
+ case NVPTX::INT_PTX_SREG_NTID_Y:
+ case NVPTX::INT_PTX_SREG_NTID_Z:
+ case NVPTX::INT_PTX_SREG_TID_X:
+ case NVPTX::INT_PTX_SREG_TID_Y:
+ case NVPTX::INT_PTX_SREG_TID_Z:
+ case NVPTX::INT_PTX_SREG_CTAID_X:
+ case NVPTX::INT_PTX_SREG_CTAID_Y:
+ case NVPTX::INT_PTX_SREG_CTAID_Z:
+ case NVPTX::INT_PTX_SREG_NCTAID_X:
+ case NVPTX::INT_PTX_SREG_NCTAID_Y:
+ case NVPTX::INT_PTX_SREG_NCTAID_Z:
+ case NVPTX::INT_PTX_SREG_WARPSIZE:
+ return true;
+ }
+}
+
+
+bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isLoad = false;
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
+ NVPTX::isLoadShift;
+ isLoad = (TSFlags == 1);
+ if (isLoad)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isLoad;
+}
+
+bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isStore = false;
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
+ NVPTX::isStoreShift;
+ isStore = (TSFlags == 1);
+ if (isStore)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isStore;
+}
+
+
+bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
+ unsigned addrspace = 0;
+ if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
+ return false;
+ if (isLoadInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ if (isStoreInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ return true;
+}
+
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == NVPTX::CBranch) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
+ if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two NVPTX:GOTOs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != NVPTX::CBranch)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "NVPTX branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(NVPTX::CBranch))
+ .addReg(Cond[0].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, DL, get(NVPTX::CBranch))
+ .addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
+ return 2;
+}
--- /dev/null
+//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the niversity of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXINSTRUCTIONINFO_H
+#define NVPTXINSTRUCTIONINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "NVPTXGenInstrInfo.inc"
+
+namespace llvm {
+
+class NVPTXInstrInfo : public NVPTXGenInstrInfo
+{
+ NVPTXTargetMachine &TM;
+ const NVPTXRegisterInfo RegInfo;
+public:
+ explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
+
+ virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
+ /* The following virtual functions are used in register allocation.
+ * They are not implemented because the existing interface and the logic
+ * at the caller side do not work for the elementized vector load and store.
+ *
+ * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned SrcReg, bool isKill, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned DestReg, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ */
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const ;
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg,
+ unsigned &DestReg) const;
+ bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isReadSpecialReg(MachineInstr &MI) const;
+
+ virtual bool CanTailMerge(const MachineInstr *MI) const ;
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
+ return MI.getOperand(2).getImm();
+ }
+
+};
+
+} // namespace llvm
+
+#endif
--- /dev/null
+//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "NVPTXInstrFormats.td"
+
+// A NOP instruction
+def NOP : NVPTXInst<(outs), (ins), "", []>;
+
+// List of vector specific properties
+def isVecLD : VecInstTypeEnum<1>;
+def isVecST : VecInstTypeEnum<2>;
+def isVecBuild : VecInstTypeEnum<3>;
+def isVecShuffle : VecInstTypeEnum<4>;
+def isVecExtract : VecInstTypeEnum<5>;
+def isVecInsert : VecInstTypeEnum<6>;
+def isVecDest : VecInstTypeEnum<7>;
+def isVecOther : VecInstTypeEnum<15>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instruction Predicate Definitions
+//===----------------------------------------------------------------------===//
+
+
+def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def useAtomRedG32forGen32 :
+ Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+def useAtomRedG64forGen64 :
+ Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget.hasVote()">;
+def hasDouble : Predicate<"Subtarget.hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDU : Predicate<"Subtarget.hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+
+def doF32FTZ : Predicate<"UseF32FTZ">;
+
+def doFMAF32 : Predicate<"doFMAF32">;
+def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
+def doFMAF32AGG : Predicate<"doFMAF32AGG">;
+def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
+def doFMAF64 : Predicate<"doFMAF64">;
+def doFMAF64AGG : Predicate<"doFMAF64AGG">;
+def doFMADF32 : Predicate<"doFMADF32">;
+def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
+
+def doMulWide : Predicate<"doMulWide">;
+
+def allowFMA : Predicate<"allowFMA">;
+def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
+
+def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
+def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+
+def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
+
+def true : Predicate<"1">;
+
+//===----------------------------------------------------------------------===//
+// Special Handling for 8-bit Operands and Operations
+//
+// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
+// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
+// 8-bit registers.
+//
+// PTX ld, st and cvt instructions permit source and destination data operands
+// to be wider than the instruction-type size, so that narrow values may be
+// loaded, stored, and converted using regular-width registers.
+//
+// So in PTX generation, we
+// - always use 16-bit registers in place in 8-bit registers.
+// (8-bit variables should stay as 8-bit as they represent memory layout.)
+// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
+// before operation
+// . div
+// . rem
+// . neg (sign)
+// . set, setp
+// . shr
+//
+// We are patching the operations by inserting the cvt instructions in the
+// asm strings of the affected instructions.
+//
+// Since vector operations, except for ld/st, are eventually elementized. We
+// do not need to special-hand the vector 8-bit operations.
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt.s16.s8 %temp2, %b;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// mov.b16 %temp2, %b;
+// cvt.s16.s8 %temp2, %temp2;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .",
+ !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat("mov.b16 \t%temp2, $b;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// mov.b16 %temp1, %b;
+// cvt.s16.s8 %temp1, %temp1;
+// cvt.s16.s8 %temp2, %a;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat("mov.b16 \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Some Common Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+multiclass I3<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_noi8<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+}
+
+multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+}
+
+multiclass F3<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+}
+
+multiclass F3_rn<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>;
+}
+
+multiclass F2<string OpcStr, SDNode OpNode> {
+ def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
+ !strconcat(OpcStr, ".f64 \t$dst, $a;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
+ def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
+ Requires<[doF32FTZ]>;
+ def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instructions.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------
+// Integer Arithmetic
+//-----------------------------------
+
+multiclass ADD_SUB_i1<SDNode OpNode> {
+ def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
+}
+
+defm ADD_i1 : ADD_SUB_i1<add>;
+defm SUB_i1 : ADD_SUB_i1<sub>;
+
+
+defm ADD : I3<"add.s", add>;
+defm SUB : I3<"sub.s", sub>;
+
+defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
+defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
+
+defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
+defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
+
+//mul.wide PTX instruction
+def SInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(32))
+ return true;
+ return false;
+}]>;
+
+def UInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(32))
+ return true;
+ return false;
+}]>;
+
+def SInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(16))
+ return true;
+ return false;
+}]>;
+
+def UInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(16))
+ return true;
+ return false;
+}]>;
+
+def Int5Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 32
+ // Only then the result from (x << v) will be i32
+ if (v.sge(0) && v.slt(32))
+ return true;
+ return false;
+}]>;
+
+def Int4Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 16
+ // Only then the result from (x << v) will be i16
+ if (v.sge(0) && v.slt(16))
+ return true;
+ return false;
+}]>;
+
+def SHL2MUL32 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(32, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
+}]>;
+
+def SHL2MUL16 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(16, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
+}]>;
+
+def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+
+def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+
+def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+
+def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+
+def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+defm MULT : I3<"mul.lo.s", mul>;
+
+defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
+defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
+def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
+def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
+
+
+defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
+defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
+
+defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
+// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
+defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
+// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
+
+def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ Int8Regs:$c))]>;
+def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ imm:$c))]>;
+def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ Int8Regs:$c))]>;
+def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
+def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
+def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
+def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
+def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
+def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
+def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), imm:$c))]>;
+
+def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
+def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
+def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
+def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), imm:$c))]>;
+
+
+def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
+ "neg.s16 \t$dst, $dst;"),
+ [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
+def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "neg.s16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
+def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "neg.s32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "neg.s64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
+
+//-----------------------------------
+// Floating Point Arithmetic
+//-----------------------------------
+
+// Constant 1.0f
+def FloatConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
+ return false;
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==1.0f);
+}]>;
+// Constand (double)1.0
+def DoubleConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
+ return false;
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==1.0);
+}]>;
+
+defm FADD : F3<"add", fadd>;
+defm FSUB : F3<"sub", fsub>;
+defm FMUL : F3<"mul", fmul>;
+
+defm FADD_rn : F3_rn<"add", fadd>;
+defm FSUB_rn : F3_rn<"sub", fsub>;
+defm FMUL_rn : F3_rn<"mul", fmul>;
+
+defm FABS : F2<"abs", fabs>;
+defm FNEG : F2<"neg", fneg>;
+defm FSQRT : F2<"sqrt.rn", fsqrt>;
+
+//
+// F64 division
+//
+def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ "rcp.rn.f64 \t$dst, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
+def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, Float64Regs:$b))]>;
+def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, fpimm:$b))]>;
+
+//
+// F32 Approximate reciprocal
+//
+def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Approximate division
+//
+def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Semi-accurate reciprocal
+//
+// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
+//
+def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Semi-accurate division
+//
+def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Accurate reciprocal
+//
+def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20, doF32FTZ]>;
+def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+//
+// F32 Accurate division
+//
+def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[reqPTX20]>;
+
+
+multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b),
+ Float32Regs:$c))]>, Requires<[Pred]>;
+ // This is to WAR a wierd bug in Tablegen that does not automatically
+ // generate the following permutated rule rrr2 from the above rrr.
+ // So we explicitly add it here. This happens to FMA32 only.
+ // See the comments at FMAD32 and FMA32 for more information.
+ def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd Float32Regs:$c,
+ (fmul Float32Regs:$a, Float32Regs:$b)))]>,
+ Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, Float64Regs:$b),
+ Float64Regs:$c))]>, Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
+ Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
+// automatically generate the rrr2 rule from
+// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
+// If we reverse the order of the following two lines, then rrr2 rule will be
+// generated for FMA32, but not for rrr.
+// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
+defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
+defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
+defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
+
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
+// b*c-a => fmad(b, c, -a)
+// - legal because b*c-a <=> b*c+(-a)
+multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
+ (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
+ Requires<[Pred]>;
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
+ (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
+ (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
+defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
+defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
+defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
+defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
+
+def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "sin.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
+def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "cos.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
+
+//-----------------------------------
+// Logical Arithmetic
+//-----------------------------------
+
+multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
+ def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
+ def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+}
+
+defm OR : LOG_FORMAT<"or", or>;
+defm AND : LOG_FORMAT<"and", and>;
+defm XOR : LOG_FORMAT<"xor", xor>;
+
+def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
+ "not.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
+def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
+def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
+def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "not.b32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
+def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "not.b64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
+
+// For shifts, the second src operand must be 32-bit value
+multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
+
+// For shifts, the second src operand must be 32-bit value
+// Need to add cvt for the 8-bits.
+multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
+defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
+
+// 32bit
+def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
+def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
+
+def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
+
+// 64bit
+def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_64 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
+def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
+
+def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+
+
+//-----------------------------------
+// Data Movement (Load / Store, Move)
+//-----------------------------------
+
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
+ [SDNPWantRoot]>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
+ [SDNPWantRoot]>;
+
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int32Regs, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int64Regs, i64imm);
+}
+
+def imem : Operand<iPTR> {
+ let PrintMethod = "printOperand";
+}
+
+def imemAny : Operand<iPTRAny> {
+ let PrintMethod = "printOperand";
+}
+
+def LdStCode : Operand<i32> {
+ let PrintMethod = "printLdStCode";
+}
+
+def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
+
+def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
+ "mov.u32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
+ "mov.u64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
+let IsSimpleMove=1 in {
+def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
+ "mov.pred \t$dst, $sss;", []>;
+def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.u32 \t$dst, $sss;", []>;
+def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.u64 \t$dst, $sss;", []>;
+
+def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "mov.f32 \t$dst, $src;", []>;
+def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
+ "mov.f64 \t$dst, $src;", []>;
+}
+def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
+ "mov.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, imm:$src)]>;
+def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int8Regs:$dst, imm:$src)]>;
+def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int16Regs:$dst, imm:$src)]>;
+def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.u32 \t$dst, $src;",
+ [(set Int32Regs:$dst, imm:$src)]>;
+def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.u64 \t$dst, $src;",
+ [(set Int64Regs:$dst, imm:$src)]>;
+
+def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
+ "mov.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, fpimm:$src)]>;
+def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
+ "mov.f64 \t$dst, $src;",
+ [(set Float64Regs:$dst, fpimm:$src)]>;
+
+def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
+
+//---- Copy Frame Index ----
+def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
+ "add.u32 \t$dst, ${addr:add};",
+ [(set Int32Regs:$dst, ADDRri:$addr)]>;
+def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
+ "add.u64 \t$dst, ${addr:add};",
+ [(set Int64Regs:$dst, ADDRri64:$addr)]>;
+
+//-----------------------------------
+// Comparison and Selection
+//-----------------------------------
+
+// Generate string block like
+// {
+// .reg .pred p;
+// setp.gt.s16 p, %a, %b;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
+class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
+ string b> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1 , OpcStr);
+ string t3 = !strconcat(t2 , sz1);
+ string t4 = !strconcat(t3 , " \tp, ");
+ string t5 = !strconcat(t4 , a);
+ string t6 = !strconcat(t5 , ", ");
+ string t7 = !strconcat(t6 , b);
+ string t8 = !strconcat(t7 , ";\n\tselp.s");
+ string t9 = !strconcat(t8 , sz2);
+ string t10 = !strconcat(t9, " \t");
+ string t11 = !strconcat(t10, d);
+ string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
+}
+
+// Generate string block like
+// {
+// .reg .pred p;
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt s16.s8 %temp1, %b;
+// setp.gt.s16 p, %temp1, %temp2;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
+class Set_Stri8<string OpcStr, string d, string a, string b, string type,
+ string cvt> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1, ".reg .");
+ string t3 = !strconcat(t2, type);
+ string t4 = !strconcat(t3, " %temp1;\n\t");
+ string t5 = !strconcat(t4, ".reg .");
+ string t6 = !strconcat(t5, type);
+ string t7 = !strconcat(t6, " %temp2;\n\t");
+ string t8 = !strconcat(t7, cvt);
+ string t9 = !strconcat(t8, " \t%temp1, ");
+ string t10 = !strconcat(t9, a);
+ string t11 = !strconcat(t10, ";\n\t");
+ string t12 = !strconcat(t11, cvt);
+ string t13 = !strconcat(t12, " \t%temp2, ");
+ string t14 = !strconcat(t13, b);
+ string t15 = !strconcat(t14, ";\n\t");
+ string t16 = !strconcat(t15, OpcStr);
+ string t17 = !strconcat(t16, "16");
+ string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
+ string t19 = !strconcat(t18, "selp.s16 \t");
+ string t20 = !strconcat(t19, d);
+ string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
+}
+
+multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
+ string TypeStr, string CVTStr> {
+ def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
+ []>;
+ def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int16Regs:$b),
+ Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
+ []>;
+ def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
+ []>;
+ def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int64Regs:$b),
+ Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
+ []>;
+
+ def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
+ Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
+ def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins&nbs