switch (MI->getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
return true;
default:
return false;
uint8_t Offset0 = Offset0Imm->getImm();
uint8_t Offset1 = Offset1Imm->getImm();
- assert(Offset1 > Offset0);
- if (Offset1 - Offset0 == 1) {
+ if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
// Each of these offsets is in element sized units, so we need to convert
// to bytes of the individual reads.
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
.addImm(0)
.addReg(SrcReg, getKillRegState(KillSrc));
}
}
if (Opcode != -1) {
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+ unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+ MachineMemOperand *MMO
+ = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ Size, Align);
+
FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
+ .addReg(SrcReg)
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addMemOperand(MMO);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
}
if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
- BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
+ unsigned Align = 4;
+ FrameInfo->setObjectAlignment(FrameIndex, Align);
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addMemOperand(MMO);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
-
- if (MI->getNumOperands() < 3)
- return nullptr;
-
int CommutedOpcode = commuteOpcode(*MI);
if (CommutedOpcode == -1)
return nullptr;
}
}
-bool
-SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- return RC != &AMDGPU::EXECRegRegClass;
-}
-
static void removeModOperands(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
return false;
unsigned Opc = UseMI->getOpcode();
- if (Opc == AMDGPU::V_MAD_F32) {
+ if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.
if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
unsigned Src1Reg = Src1->getReg();
Src1->setSubReg(Src2SubReg);
Src1->setIsKill(Src2->isKill());
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
Src2->ChangeToImmediate(Imm);
removeModOperands(*UseMI);
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
+ // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
// These come before src2.
return false;
}
-bool
-SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- switch(MI->getOpcode()) {
- default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- return MI->getOperand(1).isImm();
- }
-}
-
static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
int WidthB, int OffsetB) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
- // underlying addres space, even if it was lowered to a different one,
+ // underlying address space, even if it was lowered to a different one,
// e.g. private accesses lowered to use MUBUF instructions on a scratch
// buffer.
if (isDS(Opc0)) {
return false;
}
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineBasicBlock::iterator &MI,
+ LiveVariables *LV) const {
+
+ switch (MI->getOpcode()) {
+ default: return nullptr;
+ case AMDGPU::V_MAC_F32_e64: break;
+ case AMDGPU::V_MAC_F32_e32: {
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+ return nullptr;
+ break;
+ }
+ }
+
+ const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
+
+ return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
+ .addOperand(*Dst)
+ .addImm(0) // Src0 mods
+ .addOperand(*Src0)
+ .addImm(0) // Src1 mods
+ .addOperand(*Src1)
+ .addImm(0) // Src mods
+ .addOperand(*Src2)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
int64_t SVal = Imm.getSExtValue();
if (SVal >= -16 && SVal <= 64)
return MachineOperand::CreateReg(SubReg, false);
}
-unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const {
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned Dst = MRI.createVirtualRegister(RC);
-
- MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- LoDst)
- .addImm(Op.getImm() & 0xFFFFFFFF);
- MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- HiDst)
- .addImm(Op.getImm() >> 32);
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
- .addReg(LoDst)
- .addImm(AMDGPU::sub0)
- .addReg(HiDst)
- .addImm(AMDGPU::sub1);
-
- Worklist.push_back(Lo);
- Worklist.push_back(Hi);
-
- return Dst;
-}
-
// Change the order of operands from (0, 1, 2) to (0, 2, 1)
void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
assert(Inst->getNumExplicitOperands() == 3);
}
MachineBasicBlock &MBB = *MI->getParent();
- // Extract the ptr from the resource descriptor.
- // SRsrcPtrLo = srsrc:sub0
- unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
-
- // SRsrcPtrHi = srsrc:sub1
- unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+ // Extract the ptr from the resource descriptor.
+ unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
// Create an empty resource descriptor
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
.addImm(RsrcDataFormat >> 32);
// NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned NewVAddrLo;
- unsigned NewVAddrHi;
if (VAddr) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
- NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
- NewVAddrLo)
- .addReg(SRsrcPtrLo)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
-
- // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
- NewVAddrHi)
- .addReg(SRsrcPtrHi)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
- .addReg(AMDGPU::VCC, RegState::Implicit);
-
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+
+ // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
} else {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
// Create the new instruction.
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
MachineInstr *Addr64 =
- BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // tfe
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
MI->removeFromParent();
MI = Addr64;
- NewVAddrLo = SRsrcPtrLo;
- NewVAddrHi = SRsrcPtrHi;
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addImm(AMDGPU::sub1);
+
VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
}
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
-
-
// Update the instruction to use NewVaddr
VAddr->setReg(NewVAddr);
// Update the instruction to use NewSRsrc
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
MachineBasicBlock *MBB = MI->getParent();
- switch (MI->getOpcode()) {
- case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR:
- case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: {
+ int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass;
+ switch(RI.getRegClass(DstRCID)->getSize()) {
+ case 4:
+ case 8:
+ case 16: {
unsigned NewOpcode = getVALUOp(*MI);
unsigned RegOffset;
unsigned ImmOffset;
MRI.replaceRegWith(DstReg, NewDstReg);
break;
}
- case AMDGPU::S_LOAD_DWORDX8_IMM:
- case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+ case 32: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
break;
}
- case AMDGPU::S_LOAD_DWORDX16_IMM:
- case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+ case 64: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
moveSMRDToVALU(Inst, MRI);
}
break;
- case AMDGPU::S_MOV_B64: {
- DebugLoc DL = Inst->getDebugLoc();
-
- // If the source operand is a register we can replace this with a
- // copy.
- if (Inst->getOperand(1).isReg()) {
- MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
- .addOperand(Inst->getOperand(0))
- .addOperand(Inst->getOperand(1));
- Worklist.push_back(Copy);
- } else {
- // Otherwise, we need to split this into two movs, because there is
- // no 64-bit VALU move instruction.
- unsigned Reg = Inst->getOperand(0).getReg();
- unsigned Dst = split64BitImm(Worklist,
- Inst,
- MRI,
- MRI.getRegClass(Reg),
- Inst->getOperand(1));
- MRI.replaceRegWith(Reg, Dst);
- }
- Inst->eraseFromParent();
- continue;
- }
case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Inst->eraseFromParent();
continue;
Inst->addOperand(MachineOperand::CreateImm(0));
}
- addDescImplicitUseDef(NewDesc, Inst);
+ Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
// Legalize the operands
legalizeOperands(Inst);
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
- E = MRI.use_end(); I != E; ++I) {
- MachineInstr &UseMI = *I->getParent();
- if (!canReadVGPR(UseMI, I.getOperandNo())) {
- Worklist.push_back(&UseMI);
- }
- }
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
}
}
AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
- MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
- MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ // We don't need to legalizeOperands here because for a single operand, src0
+ // will support any kind of input.
+
+ // Move all users of this moved value.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0)
.addOperand(SrcReg1Sub0);
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub1, Src1SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1)
.addOperand(SrcReg1Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
// Try to legalize the operands in case we need to swap the order to keep it
// valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ legalizeOperands(LoHalf);
+ legalizeOperands(HiHalf);
+
+ // Move all users of this moved vlaue.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub1, SrcSubRC);
- MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ BuildMI(MBB, MII, DL, InstDesc, MidReg)
.addOperand(SrcRegSub0)
.addImm(0);
- MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ BuildMI(MBB, MII, DL, InstDesc, ResultReg)
.addOperand(SrcRegSub1)
.addReg(MidReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- Worklist.push_back(First);
- Worklist.push_back(Second);
+ // We don't need to legalize operands here. src0 for etiher instruction can be
+ // an SGPR, and the second input is unused or determined here.
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
return;
}
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
- MachineInstr *Inst) const {
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
- }
- }
-
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+void SIInstrInfo::addUsersToMoveToVALUWorklist(
+ unsigned DstReg,
+ MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I->getParent();
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
}
}
}