//
#include "AMDGPU.h"
+#include "AMDGPUMCInstLower.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/Statistic.h"
if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
return false;
- // We don't need to check src0, all input types are legal, so just make
- // sure src0 isn't using any modifiers.
- const MachineOperand *Src0Mod =
- TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
- if (Src0Mod && Src0Mod->getImm() != 0)
+ // We don't need to check src0, all input types are legal, so just make sure
+ // src0 isn't using any modifiers.
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
return false;
// Check output modifiers
- const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
- if (Omod && Omod->getImm() != 0)
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
return false;
- const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
- return !Clamp || Clamp->getImm() == 0;
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return false;
+
+ return true;
}
/// \brief This function checks \p MI for operands defined by a move immediate
TII->isVOPC(MI.getOpcode()));
const SIRegisterInfo &TRI = TII->getRegisterInfo();
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
// Only one literal constant is allowed per instruction, so if src0 is a
// literal constant then we can't do any folding.
- if (Src0->isImm() && TII->isLiteralConstant(*Src0))
+ if (Src0.isImm() &&
+ TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
return;
-
// Literal constants and SGPRs can only be used in Src0, so if Src0 is an
// SGPR, we cannot commute the instruction, so we can't fold any literal
// constants.
- if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+ if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
return;
// Try to fold Src0
- if (Src0->isReg()) {
- unsigned Reg = Src0->getReg();
+ if (Src0.isReg()) {
+ unsigned Reg = Src0.getReg();
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
- Src0->ChangeToImmediate(MovSrc.getImm());
+ Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
- } else if (MovSrc.isFPImm()) {
- const APFloat &APF = MovSrc.getFPImm()->getValueAPF();
- if (&APF.getSemantics() == &APFloat::IEEEsingle) {
- MRI.removeRegOperandFromUseList(Src0);
- Src0->ChangeToImmediate(APF.bitcastToAPInt().getZExtValue());
- ConstantFolded = true;
- }
}
if (ConstantFolded) {
if (MRI.use_empty(Reg))
Next = std::next(I);
MachineInstr &MI = *I;
+ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+ if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+ const MachineOperand &Src = MI.getOperand(1);
+
+ if (Src.isImm()) {
+ if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+ }
+
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
if (!canShrink(MI, TII, TRI, MRI)) {
- // Try commtuing the instruction and see if that enables us to shrink
+ // Try commuting the instruction and see if that enables us to shrink
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
!canShrink(MI, TII, TRI, MRI))
continue;
}
- int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
-
- // Op32 could be -1 here if we started with an instruction that had a
+ // getVOPe32 could be -1 here if we started with an instruction that had
// a 32-bit encoding and then commuted it to an instruction that did not.
- if (Op32 == -1)
+ if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
+ int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
if (TII->isVOPC(Op32)) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
// vreg1 = VOPC;
// S_AND_B64 vreg0, vreg1
//
- // So, instead of forcing the instruction to write to VCC, we provide a
- // hint to the register allocator to use VCC and then we
- // we will run this pass again after RA and shrink it if it outpus to
- // VCC.
+ // So, instead of forcing the instruction to write to VCC, we provide
+ // a hint to the register allocator to use VCC and then we we will run
+ // this pass again after RA and shrink it if it outputs to VCC.
MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
continue;
}