This instructions writes to an 32-bit SGPR. This change required adding
the 32-bit VCC_LO and VCC_HI registers, because the full VCC register
is 64 bits.
This fixes verifier errors on several of the indirect addressing piglit
tests.
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204055
91177308-0d34-0410-b5e6-
96231b3b80d8
continue;
}
unsigned reg = MO.getReg();
- if (reg == AMDGPU::VCC) {
+ if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
+ reg == AMDGPU::VCC_HI) {
VCCUsed = true;
continue;
}
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
} // End neverHasSideEffects = 1, isMoveImm = 1
-defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+let Uses = [EXEC] in {
+
+def V_READFIRSTLANE_B32 : VOP1 <
+ 0x00000002,
+ (outs SReg_32:$vdst),
+ (ins VReg_32:$src0),
+ "V_READFIRSTLANE_B32 $vdst, $src0",
+ []
+>;
+
+}
+
defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
[(set i32:$dst, (fp_to_sint f64:$src0))]
>;
.addReg(AMDGPU::EXEC);
// Read the next variant into VCC (lower 32 bits) <- also loop target
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ AMDGPU::VCC_LO)
.addReg(Idx);
// Move index from VCC into M0
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(AMDGPU::VCC);
+ .addReg(AMDGPU::VCC_LO);
// Compare the just read M0 value to all possible Idx values
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
}
// Special Registers
-def VCC : SIReg<"VCC", 106>;
+def VCC_LO : SIReg<"vcc_lo", 106>;
+def VCC_HI : SIReg<"vcc_hi", 107>;
+
+// VCC for 64-bit instructions
+def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = 106;
+}
+
def EXEC : SIReg<"EXEC", 126>;
def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, M0Reg)
+ (add SGPR_32, M0Reg, VCC_LO)
>;
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
; R600-CHECK-NOT: ALU clause
; R600-CHECK: 0 + AR.x
-; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
; SI-CHECK: V_MOVRELD
; SI-CHECK: S_CBRANCH
-; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
; SI-CHECK: V_MOVRELD
; SI-CHECK: S_CBRANCH
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {