BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg) // src
.addFrameIndex(FrameIndex) // frame_idx
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
.addMemOperand(MMO);
}
unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // frame_idx
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
.addMemOperand(MMO);
}
if (MFI->getShaderType() == ShaderType::COMPUTE &&
WorkGroupSize > WavefrontSize) {
- unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
- unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
- unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+ unsigned TIDIGXReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
+ unsigned TIDIGYReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
+ unsigned TIDIGZReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
unsigned InputPtrReg =
- TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+ TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
return TmpReg;
}
-void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
- int Count) const {
+void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
+ int Count) const {
while (Count > 0) {
int Arg;
if (Count >= 8)
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- case AMDGPU::SI_CONSTDATA_PTR: {
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
- // Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi)
- .addImm(0)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
- .addReg(AMDGPU::SCC, RegState::Implicit);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::SGPR_USE:
// This is just a placeholder for register allocation.
MI->eraseFromParent();
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+ MachineFunction &MF = *MBB.getParent();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+
+ // Create a bundle so these instructions won't be re-ordered by the
+ // post-RA scheduler.
+ MIBundleBuilder Bundler(MBB, MI);
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
+
+ // Add 32-bit offset from this instruction to the start of the
+ // constant data.
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
+ .addReg(RegLo)
+ .addOperand(MI->getOperand(1)));
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0));
+
+ llvm::finalizeBundle(MBB, Bundler.begin());
+
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
MachineOperand &Src1 = MI->getOperand(Src1Idx);
- // Make sure it's legal to commute operands for VOP2.
- if (isVOP2(*MI) &&
- (!isOperandLegal(MI, Src0Idx, &Src1) ||
- !isOperandLegal(MI, Src1Idx, &Src0))) {
- return nullptr;
+
+ if (isVOP2(*MI)) {
+ const MCInstrDesc &InstrDesc = MI->getDesc();
+ // For VOP2 instructions, any operand type is valid to use for src0. Make
+ // sure we can use the src1 as src0.
+ //
+ // We could be stricter here and only allow commuting if there is a reason
+ // to do so. i.e. if both operands are VGPRs there is no real benefit,
+ // although MachineCSE attempts to find matches by commuting.
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
+ return nullptr;
}
if (!Src1.isReg()) {
// Allow commuting instructions with Imm operands.
if (NewMI || !Src1.isImm() ||
- (!isVOP2(*MI) && !isVOP3(*MI))) {
+ (!isVOP2(*MI) && !isVOP3(*MI))) {
return nullptr;
}
-
// Be sure to copy the source modifiers to the right place.
if (MachineOperand *Src0Mods
= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
Inst->addOperand(Op1);
}
+bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (!MO.isReg())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isVirtualRegister(Reg) ?
+ MRI.getRegClass(Reg) :
+ RI.getPhysRegClass(Reg);
+
+ // In order to be legal, the common sub-class must be equal to the
+ // class of the current operand. For example:
+ //
+ // v_mov_b32 s0 ; Operand defined as vsrc_32
+ // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+ //
+ // s_sendmsg 0, s0 ; Operand defined as m0reg
+ // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+ return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+}
+
+bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return isLegalRegOperand(MRI, OpInfo, MO);
+
+ // Handle non-register types that are treated like immediates.
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ return true;
+}
+
bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
if (MO->isReg()) {
assert(DefinedRC);
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
- MRI.getRegClass(MO->getReg()) :
- RI.getPhysRegClass(MO->getReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_32
- // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
-
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ return isLegalRegOperand(MRI, OpInfo, *MO);
}
return isImmOperandLegal(MI, OpIdx, *MO);
}
+void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
+ MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &InstrDesc = get(Opc);
+
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
+ // we need to only have one constant bus use.
+ //
+ // Note we do not need to worry about literal constants here. They are
+ // disabled for the operand type for instructions because they will always
+ // violate the one constant bus use rule.
+ bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
+ if (HasImplicitSGPR) {
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+
+ if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
+ legalizeOpWithMove(MI, Src0Idx);
+ }
+
+ // VOP2 src0 instructions support all operand types, so we don't need to check
+ // their legality. If src1 is already legal, we don't need to do anything.
+ if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
+ return;
+
+ // We do not use commuteInstruction here because it is too aggressive and will
+ // commute if it is possible. We only want to commute here if it improves
+ // legality. This can be called a fairly large number of times so don't waste
+ // compile time pointlessly swapping and checking legality again.
+ if (HasImplicitSGPR || !MI->isCommutable()) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+
+ // If src0 can be used as src1, commuting will make the operands legal.
+ // Otherwise we have to give up and insert a move.
+ //
+ // TODO: Other immediate-like operand kinds could be commuted if there was a
+ // MachineOperand::ChangeTo* for them.
+ if ((!Src1.isImm() && !Src1.isReg()) ||
+ !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ int CommutedOpc = commuteOpcode(*MI);
+ if (CommutedOpc == -1) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ MI->setDesc(get(CommutedOpc));
+
+ unsigned Src0Reg = Src0.getReg();
+ unsigned Src0SubReg = Src0.getSubReg();
+ bool Src0Kill = Src0.isKill();
+
+ if (Src1.isImm())
+ Src0.ChangeToImmediate(Src1.getImm());
+ else if (Src1.isReg()) {
+ Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
+ Src0.setSubReg(Src1.getSubReg());
+ } else
+ llvm_unreachable("Should only have register or immediate operands");
+
+ Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
+ Src1.setSubReg(Src0SubReg);
+}
+
// Legalize VOP3 operands. Because all operand types are supported for any
// operand, and since literal constants are not allowed and should never be
// seen, we only need to worry about inserting copies if we use multiple SGPR
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- unsigned Opc = MI->getOpcode();
// Legalize VOP2
if (isVOP2(*MI)) {
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
-
- // Legalize src0
- if (!isOperandLegal(MI, Src0Idx))
- legalizeOpWithMove(MI, Src0Idx);
-
- // Legalize src1
- if (isOperandLegal(MI, Src1Idx))
- return;
-
- // Usually src0 of VOP2 instructions allow more types of inputs
- // than src1, so try to commute the instruction to decrease our
- // chances of having to insert a MOV instruction to legalize src1.
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- // If we are successful in commuting, then we know MI is legal, so
- // we are done.
- return;
- }
-
- legalizeOpWithMove(MI, Src1Idx);
+ legalizeOperandsVOP2(MRI, MI);
return;
}
}
break;
+ case AMDGPU::S_ABS_I32:
+ lowerScalarAbs(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
return &AMDGPU::VGPR_32RegClass;
}
+void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+ .addImm(0)
+ .addReg(Src.getReg());
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+ .addReg(Src.getReg())
+ .addReg(TmpReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::splitScalar64BitUnaryOp(
SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst,