def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
"Do not demote .f64 to .f32">;
+def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
+ "Disable Fused-Multiply Add">;
+
//===- PTX Version --------------------------------------------------------===//
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+// Fused-Multiply Add
+def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
+
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
// In the short term, mad is supported on all PTX versions and we use a
// default rounding mode no matter what shader model or PTX version.
// TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//
///===- Comparison and Selection Instructions -----------------------------===//
+// .setp
+
// Compare u16
defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
: PTXShaderModel(PTX_SM_1_0),
PTXVersion(PTX_VERSION_2_0),
SupportsDouble(false),
- Is64Bit(is64Bit) {
+ SupportsFMA(true),
+ Is64Bit(is64Bit) {
std::string TARGET = "generic";
ParseSubtargetFeatures(FS, TARGET);
}
// The native .f64 type is supported on the hardware.
bool SupportsDouble;
-
+
+ // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions
+ bool SupportsFMA;
+
// Use .u64 instead of .u32 for addresses.
bool Is64Bit;
bool is64Bit() const { return Is64Bit; }
+ bool supportsFMA() const { return SupportsFMA; }
+
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
--- /dev/null
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+ %a = fmul float %x, %y
+ %b = fadd float %a, %z
+ ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ ret double %b
+}