//===----------------------------------------------------------------------===//
// Addressing
-def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
-def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
// Shader Model Support
def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
+def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+
+// Fused-Multiply Add
+def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
: SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
def PTXret
: SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>;
+def PTXcopyaddress
+ : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+ def rr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RRegf32:$d, (opnode RRegf32:$a))]>;
+ def ri32 : InstPTX<(outs RRegf32:$d),
+ (ins f32imm:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RRegf32:$d, (opnode fpimm:$a))]>;
+ def rr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RRegf64:$d, (opnode RRegf64:$a))]>;
+ def ri64 : InstPTX<(outs RRegf64:$d),
+ (ins f64imm:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RRegf64:$d, (opnode fpimm:$a))]>;
+}
+
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
def rr32 : InstPTX<(outs RRegf32:$d),
}
multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+ def ripreds : InstPTX<(outs Preds:$d),
+ (ins Preds:$a, i1imm:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set Preds:$d, (opnode Preds:$a, imm:$b))]>;
+ def rrpreds : InstPTX<(outs Preds:$d),
+ (ins Preds:$a, Preds:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>;
def rr16 : InstPTX<(outs RRegu16:$d),
(ins RRegu16:$a, RRegu16:$b),
!strconcat(opcstr, ".b16\t$d, $a, $b"),
}
multiclass INT3ntnc<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegu32:$d),
- (ins RRegu32:$a, RRegu32:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
- def ri : InstPTX<(outs RRegu32:$d),
- (ins RRegu32:$a, i32imm:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
- def ir : InstPTX<(outs RRegu32:$d),
- (ins i32imm:$a, RRegu32:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+ def ir16 : InstPTX<(outs RRegu16:$d),
+ (ins i16imm:$a, RRegu16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>;
+ def ir32 : InstPTX<(outs RRegu32:$d),
+ (ins i32imm:$a, RRegu32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
+ def ir64 : InstPTX<(outs RRegu64:$d),
+ (ins i64imm:$a, RRegu64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
}
-multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
def rr
- : InstPTX<(outs Preds:$d), (ins RC:$a, RC:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"),
- [(set Preds:$d, (setcc RC:$a, RC:$b, cmp))]>;
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
def ri
- : InstPTX<(outs Preds:$d), (ins RC:$a, immcls:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"),
- [(set Preds:$d, (setcc RC:$a, imm:$b, cmp))]>;
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+ def rr_and_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_and_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+ def rr_or_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_or_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+ def rr_xor_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_xor_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+
+ def rr_and_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_and_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+ def rr_or_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_or_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+ def rr_xor_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_xor_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+}
+
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+}
+
+multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+ def rr
+ : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>;
}
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
defm ADD : INT3<"add", add>;
defm SUB : INT3<"sub", sub>;
defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : INT3<"div", udiv>;
+defm REM : INT3<"rem", urem>;
///===- Floating-Point Arithmetic Instructions ----------------------------===//
+// Standard Unary Operations
+defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+
// Standard Binary Operations
defm FADD : PTX_FLOAT_3OP<"add", fadd>;
defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
// In the short term, mad is supported on all PTX versions and we use a
// default rounding mode no matter what shader model or PTX version.
// TODO: Allow the rounding mode to be selectable through llc.
-defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//
///===- Comparison and Selection Instructions -----------------------------===//
-defm SETPEQu32 : PTX_SETP<RRegu32, "u32", i32imm, SETEQ, "eq">;
-defm SETPNEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETNE, "ne">;
-defm SETPLTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULT, "lt">;
-defm SETPLEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULE, "le">;
-defm SETPGTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGT, "gt">;
-defm SETPGEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGE, "ge">;
+// .setp
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">;
///===- Logic and Shift Instructions --------------------------------------===//
-defm SHL : INT3ntnc<"shl.b32", PTXshl>;
-defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+defm SHL : INT3ntnc<"shl.b", PTXshl>;
+defm SRL : INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s", PTXsra>;
defm AND : PTX_LOGIC<"and", and>;
defm OR : PTX_LOGIC<"or", or>;
def MOVU32ri
: InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
[(set RRegu32:$d, imm:$a)]>;
- def MOVU164ri
+ def MOVU64ri
: InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
[(set RRegu64:$d, imm:$a)]>;
def MOVF32ri
[(set RRegf64:$d, fpimm:$a)]>;
}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVaddr32
+ : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVaddr64
+ : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
// Loads
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
// TODO: Do something with st.param if/when it is needed.
+// Conversion to pred
+
+def CVT_pred_u16
+ : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
+ [(set Preds:$d, (trunc RRegu16:$a))]>;
+
+def CVT_pred_u32
+ : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
+ [(set Preds:$d, (trunc RRegu32:$a))]>;
+
+def CVT_pred_u64
+ : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
+ [(set Preds:$d, (trunc RRegu64:$a))]>;
+
+def CVT_pred_f32
+ : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_pred_f64
+ : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u16
+
+def CVT_u16_pred
+ : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
+ [(set RRegu16:$d, (zext Preds:$a))]>;
+
+def CVT_u16_u32
+ : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+
+def CVT_u16_u64
+ : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u16_f32
+ : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u16_f64
+ : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u32
+
def CVT_u32_pred
: InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
[(set RRegu32:$d, (zext Preds:$a))]>;
+def CVT_u32_u16
+ : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
+ [(set RRegu32:$d, (zext RRegu16:$a))]>;
+
+def CVT_u32_u64
+ : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
+ [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u32_f32
+ : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u32_f64
+ : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+ : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
+ [(set RRegu64:$d, (zext Preds:$a))]>;
+
+def CVT_u64_u16
+ : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
+ [(set RRegu64:$d, (zext RRegu16:$a))]>;
+
+def CVT_u64_u32
+ : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
+ [(set RRegu64:$d, (zext RRegu32:$a))]>;
+
+def CVT_u64_f32
+ : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u64_f64
+ : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+ : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f32_u16
+ : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f32_u32
+ : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f32_u64
+ : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f32_f64
+ : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a",
+ [(set RRegf32:$d, (fround RRegf64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+ : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f64_u16
+ : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f64_u32
+ : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f64_u64
+ : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f64_f32
+ : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
+ [(set RRegf64:$d, (fextend RRegf32:$a))]>;
+
///===- Control Flow Instructions -----------------------------------------===//
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def BRAd
: InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
- // FIXME: should be able to write a pattern for brcond, but can't use
- // a two-value operand where a dag node expects two operands. :(
- // NOTE: ARM & PowerPC backend also report the same problem
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a Preds register) here
def BRAdp
: InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
- [/*(brcond bb:$d, Preds:$p, i32imm:$c)*/]>;
+ [/*(brcond pred:$_p, bb:$d)*/]>;
}
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {