class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1, sub2, sub3];
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
+}
+
+class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
}
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+ "V"#lo#hi#"_"#chan,
+ [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+ lo
+>;
+
foreach Index = 0-127 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
// 32-bit Temporary Registers
// Indirect addressing offset registers
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
Index, Chan>;
- def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
- Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
!cast<Register>("T"#Index#"_Z"),
!cast<Register>("T"#Index#"_W")],
Index>;
+
+ def T#Index#_XY : R600Reg_64 <"T"#Index#"",
+ [!cast<Register>("T"#Index#"_X"),
+ !cast<Register>("T"#Index#"_Y")],
+ Index>;
+}
+
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+ let chan_encoding = !if(!eq(Chan, "X"), 0,
+ !if(!eq(Chan, "Y"), 1,
+ !if(!eq(Chan, "Z"), 2,
+ !if(!eq(Chan, "W"), 3, 0)))) in {
+ def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+ [!cast<Register>("T0_"#Chan),
+ !cast<Register>("T1_"#Chan),
+ !cast<Register>("T2_"#Chan),
+ !cast<Register>("T3_"#Chan)],
+ 0>;
+ def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+ def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+ }
}
+
// KCACHE_BANK0
foreach Index = 159-128 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
// Special Registers
+def OQA : R600Reg<"OQA", 219>;
+def OQB : R600Reg<"OQB", 220>;
+def OQAP : R600Reg<"OQAP", 221>;
+def OQBP : R600Reg<"OQAP", 222>;
+def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
+def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
def ZERO : R600Reg<"0.0", 248>;
def ONE : R600Reg<"1.0", 249>;
def NEG_ONE : R600Reg<"-1.0", 249>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
def AR_X : R600Reg<"AR.x", 0>;
-def OQAP : R600Reg<"OQAP", 221>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 480))>;
let isAllocatable = 0 in {
-// XXX: Only use the X channel, until we support wider stack widths
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
-} // End isAllocatable = 0
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
+
+def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
+ (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "KC0_%u_X", 128, 159))>;
(interleave R600_KC1_X, R600_KC1_Y,
R600_KC1_Z, R600_KC1_W)>;
+} // End isAllocatable = 0
+
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127), AR_X)>;
R600_TReg32,
R600_ArrayBase,
R600_Addr,
+ R600_KC0, R600_KC1,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
ALU_CONST, ALU_PARAM, OQAP
)>;
let CopyCost = -1;
}
-//===----------------------------------------------------------------------===//
-// Register classes for indirect addressing
-//===----------------------------------------------------------------------===//
-
-// Super register for all the Indirect Registers. This register class is used
-// by the REG_SEQUENCE instruction to specify the registers to use for direct
-// reads / writes which may be written / read by an indirect address.
-class IndirectSuper<string n, list<Register> subregs> :
- RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices =
- [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
-}
-
-def IndirectSuperReg : IndirectSuper<"Indirect",
- [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
- TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
- TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add V0123_W, V0123_Z, V0123_Y, V0123_X)
>;
-def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+ (add (sequence "T%u_XY", 0, 63))>;
-// This register class defines the registers that are the storage units for
-// the "Indirect Addressing" pseudo memory space.
-// XXX: Only use the X channel, until we support wider stack widths
-def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add (sequence "TRegMem%u_X", 0, 16))
->;
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+ (add V01_X, V01_Y, V01_Z, V01_W,
+ V23_X, V23_Y, V23_Z, V23_W)>;