MO.ChangeToRegister(Reg, false);
}
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ MachineOperand &SuperReg,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC)
+ const {
+ assert(SuperReg.isReg());
+
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+ // Just in case the super register is itself a sub-register, copy it to a new
+ // value so we don't need to wory about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
+ // eliminate this extra copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ NewSuperReg)
+ .addOperand(SuperReg);
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
+ return SubReg;
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
MI->getOperand(i).setReg(DstReg);
}
}
+
+ // Legalize MUBUF* instructions
+ // FIXME: If we start using the non-addr64 instructions for compute, we
+ // may need to legalize them here.
+
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::srsrc);
+ int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (SRsrcIdx != -1 && VAddrIdx != -1) {
+ const TargetRegisterClass *VAddrRC =
+ RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+ if(VAddrRC->getSize() == 8 &&
+ MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+ // We have a MUBUF instruction that uses a 64-bit vaddr register and
+ // srsrc has the incorrect register class. In order to fix this, we
+ // need to extract the pointer from the resource descriptor (srsrc),
+ // add it to the value of vadd, then store the result in the vaddr
+ // operand. Then, we need to set the pointer field of the resource
+ // descriptor to zero.
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+ MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+ unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // SRsrcPtrLo = srsrc:sub0
+ SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // VAddrLo = vaddr:sub0
+ VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // VAddrHi = vaddr:sub1
+ VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+ NewVAddrLo)
+ .addReg(SRsrcPtrLo)
+ .addReg(VAddrLo)
+ .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+ // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+ NewVAddrHi)
+ .addReg(SRsrcPtrHi)
+ .addReg(VAddrHi)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+ .addReg(AMDGPU::VCC, RegState::Implicit);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ // Update the instruction to use NewVaddr
+ MI->getOperand(VAddrIdx).setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ }
+ }
}
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
unsigned NewOpcode = getVALUOp(*Inst);
- if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+ // We cannot move this instruction to the VALU, so we should try to
+ // legalize its operands instead.
+ legalizeOperands(Inst);
continue;
+ }
// Use the new VALU Opcode.
const MCInstrDesc &NewDesc = get(NewOpcode);