#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrDesc.h"
using namespace llvm;
-SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm) { }
+SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUInstrInfo(st),
+ RI(st) { }
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
+ .addReg(SrcReg);
+ } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
+ unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
- MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
} else if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for vector
// Reserve a spot in the spill tracker for each sub-register of
// the vector register.
unsigned NumSubRegs = RC->getSize() / 4;
- unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
- NumSubRegs);
+ unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
FirstLane);
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
- if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
- SIMachineFunctionInfo::SpilledReg Spill =
- MFI->SpillTracker.getSpilledReg(FrameIndex);
- assert(Spill.VGPR);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
- insertNOPs(MI, 3);
+
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addImm(0);
} else if (RI.isSGPRClass(RC)){
unsigned Opcode;
switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addReg(Spill.VGPR)
.addImm(FrameIndex);
- insertNOPs(MI, 3);
} else {
llvm_unreachable("VGPR spilling not supported");
}
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
return 2;
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return 1;
default: llvm_unreachable("Invalid spill opcode");
}
}
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE: {
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
.addReg(MI->getOperand(1).getReg())
.addImm(Spill.Lane + i);
}
+ insertNOPs(MI, 3);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
+
+ // Add 32-bit offset from this instruction to the start of the constant data.
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo)
+ .addReg(RegLo)
+ .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
+ .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0)
+ .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
+ .addReg(AMDGPU::SCC, RegState::Implicit);
MI->eraseFromParent();
break;
}
return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
}
+static bool compareMachineOp(const MachineOperand &Op0,
+ const MachineOperand &Op1) {
+ if (Op0.getType() != Op1.getType())
+ return false;
+
+ switch (Op0.getType()) {
+ case MachineOperand::MO_Register:
+ return Op0.getReg() == Op1.getReg();
+ case MachineOperand::MO_Immediate:
+ return Op0.getImm() == Op1.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return Op0.getFPImm() == Op1.getFPImm();
+ default:
+ llvm_unreachable("Didn't expect to be comparing these operand types");
+ }
+}
+
+bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
+ const MachineOperand &MO) const {
+ const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
+
+ assert(MO.isImm() || MO.isFPImm());
+
+ if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
+ return true;
+
+ if (OpInfo.RegClass < 0)
+ return false;
+
+ return RI.regClassCanUseImmediate(OpInfo.RegClass);
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
// Make sure the register classes are correct
for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
switch (Desc.OpInfo[i].OperandType) {
- case MCOI::OPERAND_REGISTER:
+ case MCOI::OPERAND_REGISTER: {
+ int RegClass = Desc.OpInfo[i].RegClass;
+ if (!RI.regClassCanUseImmediate(RegClass) &&
+ (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
+ ErrInfo = "Expected register, but got immediate";
+ return false;
+ }
+ }
break;
case MCOI::OPERAND_IMMEDIATE:
- if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
+ // Check if this operand is an immediate.
+ // FrameIndex operands will be replaced by immediates, so they are
+ // allowed.
+ if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() &&
+ !MI->getOperand(i).isFI()) {
ErrInfo = "Expected immediate, but got non-immediate";
return false;
}
return false;
}
}
+
+ // Verify misc. restrictions on specific instructions.
+ if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
+ Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
+ MI->dump();
+
+ const MachineOperand &Src0 = MI->getOperand(2);
+ const MachineOperand &Src1 = MI->getOperand(3);
+ const MachineOperand &Src2 = MI->getOperand(4);
+ if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
+ if (!compareMachineOp(Src0, Src1) &&
+ !compareMachineOp(Src0, Src2)) {
+ ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
+ return false;
+ }
+ }
+ }
+
return true;
}
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+ case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+ case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
}
}
case AMDGPU::COPY:
case AMDGPU::REG_SEQUENCE:
case AMDGPU::PHI:
+ case AMDGPU::INSERT_SUBREG:
return RI.hasVGPRs(getOpRegClass(MI, 0));
default:
return RI.hasVGPRs(getOpRegClass(MI, OpNo));
unsigned SubReg = MRI.createVirtualRegister(SubRC);
// Just in case the super register is itself a sub-register, copy it to a new
- // value so we don't need to wory about merging its subreg index with the
- // SubIdx passed to this function. The register coalescer should be able to
+ // value so we don't need to worry about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
NewSuperReg)
}
}
+ // Legalize INSERT_SUBREG
+ // src0 must have the same register class as dst
+ if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned Src0 = MI->getOperand(1).getReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
+ if (DstRC != Src0RC) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
+ .addReg(Src0);
+ MI->getOperand(1).setReg(NewSrc0);
+ }
+ return;
+ }
+
// Legalize MUBUF* instructions
// FIXME: If we start using the non-addr64 instructions for compute, we
// may need to legalize them here.
continue;
}
case AMDGPU::S_AND_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BCNT1_I32_B64:
+ splitScalar64BitBCNT(Worklist, Inst);
Inst->eraseFromParent();
continue;
// We are converting these to a BFE, so we need to add the missing
// operands for the size and offset.
unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(Size));
// 3 to not hit an assertion later in MCInstLower.
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
Inst->addOperand(MachineOperand::CreateImm(0));
}
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Inst->RemoveOperand(2); // Remove old immediate.
- Inst->addOperand(MachineOperand::CreateImm(Offset));
- Inst->addOperand(MachineOperand::CreateImm(BitWidth));
-
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Offset));
Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
}
return &AMDGPU::VReg_32RegClass;
}
-void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst,
- unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitUnaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src0 = Inst->getOperand(1);
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ const TargetRegisterClass *Src0RC = Src0.isReg() ?
+ MRI.getRegClass(Src0.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+
+ MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub0, Src0SubRC);
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+ const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+ unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ .addOperand(SrcReg0Sub0);
+
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+
+ unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ .addOperand(SrcReg0Sub1);
+
+ unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep it
+ // valid.
+ Worklist.push_back(LoHalf);
+ Worklist.push_back(HiHalf);
+}
+
+void SIInstrInfo::splitScalar64BitBinaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst->getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
Worklist.push_back(HiHalf);
}
+void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+
+ const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
+ const TargetRegisterClass *SrcRC = Src.isReg() ?
+ MRI.getRegClass(Src.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub0, SrcSubRC);
+ MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub1, SrcSubRC);
+
+ MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ .addOperand(SrcRegSub0)
+ .addImm(0);
+
+ MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ .addOperand(SrcRegSub1)
+ .addReg(MidReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+
+ Worklist.push_back(First);
+ Worklist.push_back(Second);
+}
+
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
MachineInstr *Inst) const {
// Add the implict and explicit register definitions.
for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
+
+const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI,
+ unsigned OperandName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
+ if (Idx == -1)
+ return nullptr;
+
+ return &MI.getOperand(Idx);
+}