using namespace llvm;
SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI(st) {}
+ : AMDGPUInstrInfo(st), RI() {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
}
+bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ // TODO: The generic check fails for VALU instructions that should be
+ // rematerializable due to implicit reads of exec. We really want all of the
+ // generic logic for this except for this.
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
int64_t &Offset0,
int64_t &Offset1) const {
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
}
}
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+
int NewOpc;
// Try to map original to commuted opcode
unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
unsigned InputPtrReg =
TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- static const unsigned TIDIGRegs[3] = {
- TIDIGXReg, TIDIGYReg, TIDIGZReg
- };
- for (unsigned Reg : TIDIGRegs) {
+ for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ unsigned Src0 = MI->getOperand(1).getReg();
+ unsigned Src1 = MI->getOperand(2).getReg();
+ const MachineOperand &SrcCond = MI->getOperand(3);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+ .addOperand(SrcCond);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+ .addOperand(SrcCond);
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
}
if (MI)
- MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+ MI->setDesc(get(commuteOpcode(*MI)));
return MI;
}
unsigned Src2SubReg = Src2->getSubReg();
Src0->setReg(Src1Reg);
Src0->setSubReg(Src1SubReg);
+ Src0->setIsKill(Src1->isKill());
+
Src1->setReg(Src2Reg);
Src1->setSubReg(Src2SubReg);
+ Src1->setIsKill(Src2->isKill());
Src2->ChangeToImmediate(Imm);
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
int Op32 = AMDGPU::getVOPe32(Opcode);
if (Op32 == -1)
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
MachineInstr *Addr64 =
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
.addOperand(*VData)
- .addOperand(*SRsrc)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
+ .addOperand(*SRsrc)
.addOperand(*SOffset)
- .addOperand(*Offset);
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe
MI->removeFromParent();
MI = Addr64;
// The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
// on VI.
+
+ bool IsKill = SBase->isKill();
if (OffOp) {
- bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ bool isVI =
+ MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS;
unsigned OffScale = isVI ? 1 : 4;
// Handle the _IMM variant
unsigned LoOffset = OffOp->getImm() * OffScale;
unsigned HiOffset = LoOffset + HalfSize;
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- .addOperand(*SBase)
+ // Use addReg instead of addOperand
+ // to make sure kill flag is cleared.
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addImm(LoOffset / OffScale);
if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
.addImm(HiOffset); // The offset in register is in bytes.
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
} else {
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addImm(HiOffset / OffScale);
}
} else {
// Handle the _SGPR variant
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
.addOperand(*SOff)
.addImm(HalfSize);
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
}
// SMRD instructions take a dword offsets on SI and byte offset on VI
// and MUBUF instructions always take a byte offset.
ImmOffset = MI->getOperand(2).getImm();
- if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+ AMDGPUSubtarget::SEA_ISLANDS)
ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ MI->getOperand(2).setReg(SRsrc);
} else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ MI->getOperand(2).ChangeToRegister(SRsrc, false);
}
- MI->getOperand(1).setReg(SRsrc);
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
const TargetRegisterClass *NewDstRC =
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);