+bool
+SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ switch(MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ return MI->getOperand(1).isImm();
+ }
+}
+
+namespace llvm {
+namespace AMDGPU {
+// Helper function generated by tablegen. We are wrapping this with
+// an SIInstrInfo function that reutrns bool rather than int.
+int isDS(uint16_t Opcode);
+}
+}
+
+bool SIInstrInfo::isDS(uint16_t Opcode) const {
+ return ::AMDGPU::isDS(Opcode) != -1;
+}
+
+int SIInstrInfo::isMIMG(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+}
+
+int SIInstrInfo::isSMRD(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SMRD;
+}
+
+bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+}
+
+bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP2;
+}
+
+bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3;
+}
+
+bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOPC;
+}
+
+bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+}
+
+bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
+ int32_t Val = Imm.getSExtValue();
+ if (Val >= -16 && Val <= 64)
+ return true;
+
+ // The actual type of the operand does not seem to matter as long
+ // as the bits match one of the inline immediate values. For example:
+ //
+ // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+ // so it is a legal inline immediate.
+ //
+ // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+ // floating-point, so it is a legal inline immediate.
+
+ return (APInt::floatToBits(0.0f) == Imm) ||
+ (APInt::floatToBits(1.0f) == Imm) ||
+ (APInt::floatToBits(-1.0f) == Imm) ||
+ (APInt::floatToBits(0.5f) == Imm) ||
+ (APInt::floatToBits(-0.5f) == Imm) ||
+ (APInt::floatToBits(2.0f) == Imm) ||
+ (APInt::floatToBits(-2.0f) == Imm) ||
+ (APInt::floatToBits(4.0f) == Imm) ||
+ (APInt::floatToBits(-4.0f) == Imm);
+}
+
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
+ if (MO.isImm())
+ return isInlineConstant(APInt(32, MO.getImm(), true));
+
+ if (MO.isFPImm()) {
+ APFloat FpImm = MO.getFPImm()->getValueAPF();
+ return isInlineConstant(FpImm.bitcastToAPInt());
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
+ return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
+}
+
+bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ uint16_t Opcode = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ // Make sure the number of operands is correct.
+ const MCInstrDesc &Desc = get(Opcode);
+ if (!Desc.isVariadic() &&
+ Desc.getNumOperands() != MI->getNumExplicitOperands()) {
+ ErrInfo = "Instruction has wrong number of operands.";
+ return false;
+ }
+
+ // Make sure the register classes are correct
+ for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+ switch (Desc.OpInfo[i].OperandType) {
+ case MCOI::OPERAND_REGISTER:
+ break;
+ case MCOI::OPERAND_IMMEDIATE:
+ if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
+ ErrInfo = "Expected immediate, but got non-immediate";
+ return false;
+ }
+ // Fall-through
+ default:
+ continue;
+ }
+
+ if (!MI->getOperand(i).isReg())
+ continue;
+
+ int RegClass = Desc.OpInfo[i].RegClass;
+ if (RegClass != -1) {
+ unsigned Reg = MI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = RI.getRegClass(RegClass);
+ if (!RC->contains(Reg)) {
+ ErrInfo = "Operand has incorrect register class.";
+ return false;
+ }
+ }
+ }
+
+
+ // Verify VOP*
+ if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ unsigned ConstantBusCount = 0;
+ unsigned SGPRUsed = AMDGPU::NoRegister;
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ ++ConstantBusCount;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ if (SGPRUsed != MO.getReg()) {
+ ++ConstantBusCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ }
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ ++ConstantBusCount;
+ }
+ if (ConstantBusCount > 1) {
+ ErrInfo = "VOP* instruction uses the constant bus more than once";
+ return false;
+ }
+ }
+
+ // Verify SRC1 for VOP2 and VOPC
+ if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm() || Src1.isFPImm()) {
+ ErrInfo = "VOP[2C] src1 cannot be an immediate.";
+ return false;
+ }
+ }
+
+ // Verify VOP3
+ if (isVOP3(Opcode)) {
+ if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
+ ErrInfo = "VOP3 src0 cannot be a literal constant.";
+ return false;
+ }
+ if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
+ ErrInfo = "VOP3 src1 cannot be a literal constant.";
+ return false;
+ }
+ if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
+ ErrInfo = "VOP3 src2 cannot be a literal constant.";
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default: return AMDGPU::INSTRUCTION_LIST_END;
+ case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+ case AMDGPU::COPY: return AMDGPU::COPY;
+ case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
+ case AMDGPU::S_MOV_B32:
+ return MI.getOperand(1).isReg() ?
+ AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
+ case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+ case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
+ case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+ case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
+ case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
+ case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
+ case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
+ case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
+ case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
+ case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+ case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
+ case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
+ case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
+ case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
+ case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
+ case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
+ case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
+ case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+ case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+ case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+ return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &Desc = get(MI.getOpcode());
+ if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+ Desc.OpInfo[OpNo].RegClass == -1)
+ return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::PHI:
+ case AMDGPU::INSERT_SUBREG:
+ return RI.hasVGPRs(getOpRegClass(MI, 0));
+ default:
+ return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+ }
+}
+
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+ const TargetRegisterClass *RC = RI.getRegClass(RCID);
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (MO.isReg()) {
+ Opcode = AMDGPU::COPY;
+ } else if (RI.isSGPRClass(RC)) {
+ Opcode = AMDGPU::S_MOV_B32;
+ }
+
+ const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+ unsigned Reg = MRI.createVirtualRegister(VRC);
+ BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
+ Reg).addOperand(MO);
+ MO.ChangeToRegister(Reg, false);
+}
+
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ MachineOperand &SuperReg,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC)
+ const {
+ assert(SuperReg.isReg());
+
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+ // Just in case the super register is itself a sub-register, copy it to a new
+ // value so we don't need to worry about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
+ // eliminate this extra copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ NewSuperReg)
+ .addOperand(SuperReg);
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
+ return SubReg;
+}
+
+MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
+ MachineBasicBlock::iterator MII,
+ MachineRegisterInfo &MRI,
+ MachineOperand &Op,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC) const {
+ if (Op.isImm()) {
+ // XXX - Is there a better way to do this?
+ if (SubIdx == AMDGPU::sub0)
+ return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
+ if (SubIdx == AMDGPU::sub1)
+ return MachineOperand::CreateImm(Op.getImm() >> 32);
+
+ llvm_unreachable("Unhandled register index for immediate");
+ }
+
+ unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
+ SubIdx, SubRC);
+ return MachineOperand::CreateReg(SubReg, false);
+}
+
+unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC,
+ const MachineOperand &Op) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned Dst = MRI.createVirtualRegister(RC);
+
+ MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+ LoDst)
+ .addImm(Op.getImm() & 0xFFFFFFFF);
+ MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+ HiDst)
+ .addImm(Op.getImm() >> 32);
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
+ .addReg(LoDst)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiDst)
+ .addImm(AMDGPU::sub1);
+
+ Worklist.push_back(Lo);
+ Worklist.push_back(Hi);
+
+ return Dst;
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src2);
+
+ // Legalize VOP2
+ if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // If the instruction implicitly reads VCC, we can't have any SGPR operands,
+ // so move any.
+ bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
+ if (ReadsVCC && Src0.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+ legalizeOpWithMove(MI, Src0Idx);
+ return;
+ }
+
+ if (ReadsVCC && Src1.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
+ // be the first operand, and there can only be one.
+ if (Src1.isImm() || Src1.isFPImm() ||
+ (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ return;
+ }
+ legalizeOpWithMove(MI, Src1Idx);
+ }
+ }
+
+ // XXX - Do any VOP3 instructions read VCC?
+ // Legalize VOP3
+ if (isVOP3(MI->getOpcode())) {
+ int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
+ unsigned SGPRReg = AMDGPU::NoRegister;
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ continue;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ if (MO.isReg()) {
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
+ }
+ } else if (!isLiteralConstant(MO)) {
+ // If it is not a register and not a literal constant, then it must be
+ // an inline constant which is always legal.
+ continue;
+ }
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+ }
+
+ // Legalize REG_SEQUENCE and PHI
+ // The register class of the operands much be the same type as the register
+ // class of the output.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
+ MI->getOpcode() == AMDGPU::PHI) {
+ const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ const TargetRegisterClass *OpRC =
+ MRI.getRegClass(MI->getOperand(i).getReg());
+ if (RI.hasVGPRs(OpRC)) {
+ VRC = OpRC;
+ } else {
+ SRC = OpRC;
+ }
+ }
+
+ // If any of the operands are VGPR registers, then they all most be
+ // otherwise we will create illegal VGPR->SGPR copies when legalizing
+ // them.
+ if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+ if (!VRC) {
+ assert(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
+ }
+ RC = VRC;
+ } else {
+ RC = SRC;
+ }
+
+ // Update all the operands so they have the same type.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ unsigned DstReg = MRI.createVirtualRegister(RC);
+ MachineBasicBlock *InsertBB;
+ MachineBasicBlock::iterator Insert;
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ InsertBB = MI->getParent();
+ Insert = MI;
+ } else {
+ // MI is a PHI instruction.
+ InsertBB = MI->getOperand(i + 1).getMBB();
+ Insert = InsertBB->getFirstTerminator();
+ }
+ BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addOperand(MI->getOperand(i));
+ MI->getOperand(i).setReg(DstReg);
+ }
+ }
+
+ // Legalize INSERT_SUBREG
+ // src0 must have the same register class as dst
+ if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned Src0 = MI->getOperand(1).getReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
+ if (DstRC != Src0RC) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
+ .addReg(Src0);
+ MI->getOperand(1).setReg(NewSrc0);
+ }
+ return;
+ }
+
+ // Legalize MUBUF* instructions
+ // FIXME: If we start using the non-addr64 instructions for compute, we
+ // may need to legalize them here.
+
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::srsrc);
+ int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (SRsrcIdx != -1 && VAddrIdx != -1) {
+ const TargetRegisterClass *VAddrRC =
+ RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+ if(VAddrRC->getSize() == 8 &&
+ MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+ // We have a MUBUF instruction that uses a 64-bit vaddr register and
+ // srsrc has the incorrect register class. In order to fix this, we
+ // need to extract the pointer from the resource descriptor (srsrc),
+ // add it to the value of vadd, then store the result in the vaddr
+ // operand. Then, we need to set the pointer field of the resource
+ // descriptor to zero.
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+ MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+ unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // SRsrcPtrLo = srsrc:sub0
+ SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // VAddrLo = vaddr:sub0
+ VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // VAddrHi = vaddr:sub1
+ VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+ NewVAddrLo)
+ .addReg(SRsrcPtrLo)
+ .addReg(VAddrLo)
+ .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+ // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+ NewVAddrHi)
+ .addReg(SRsrcPtrHi)
+ .addReg(VAddrHi)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+ .addReg(AMDGPU::VCC, RegState::Implicit);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ // Update the instruction to use NewVaddr
+ MI->getOperand(VAddrIdx).setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ }
+ }
+}
+
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX2_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ unsigned NewOpcode = getVALUOp(*MI);
+ unsigned RegOffset;
+ unsigned ImmOffset;
+
+ if (MI->getOperand(2).isReg()) {
+ RegOffset = MI->getOperand(2).getReg();
+ ImmOffset = 0;
+ } else {
+ assert(MI->getOperand(2).isImm());
+ // SMRD instructions take a dword offsets and MUBUF instructions
+ // take a byte offset.
+ ImmOffset = MI->getOperand(2).getImm() << 2;
+ RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ if (isUInt<12>(ImmOffset)) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ RegOffset)
+ .addImm(0);
+ } else {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ RegOffset)
+ .addImm(ImmOffset);
+ ImmOffset = 0;
+ }
+ }
+
+ unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ unsigned DWord0 = RegOffset;
+ unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
+ .addImm(0);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
+ .addReg(DWord0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DWord1)
+ .addImm(AMDGPU::sub1)
+ .addReg(DWord2)
+ .addImm(AMDGPU::sub2)
+ .addReg(DWord3)
+ .addImm(AMDGPU::sub3);
+ MI->setDesc(get(NewOpcode));
+ if (MI->getOperand(2).isReg()) {
+ MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ } else {
+ MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ }
+ MI->getOperand(1).setReg(SRsrc);
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ }
+}
+
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+ SmallVector<MachineInstr *, 128> Worklist;
+ Worklist.push_back(&TopInst);
+
+ while (!Worklist.empty()) {
+ MachineInstr *Inst = Worklist.pop_back_val();
+ MachineBasicBlock *MBB = Inst->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ unsigned Opcode = Inst->getOpcode();
+ unsigned NewOpcode = getVALUOp(*Inst);
+
+ // Handle some special cases
+ switch (Opcode) {
+ default:
+ if (isSMRD(Inst->getOpcode())) {
+ moveSMRDToVALU(Inst, MRI);
+ }
+ break;
+ case AMDGPU::S_MOV_B64: {
+ DebugLoc DL = Inst->getDebugLoc();
+
+ // If the source operand is a register we can replace this with a
+ // copy.
+ if (Inst->getOperand(1).isReg()) {
+ MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
+ .addOperand(Inst->getOperand(0))
+ .addOperand(Inst->getOperand(1));
+ Worklist.push_back(Copy);
+ } else {
+ // Otherwise, we need to split this into two movs, because there is
+ // no 64-bit VALU move instruction.
+ unsigned Reg = Inst->getOperand(0).getReg();
+ unsigned Dst = split64BitImm(Worklist,
+ Inst,
+ MRI,
+ MRI.getRegClass(Reg),
+ Inst->getOperand(1));
+ MRI.replaceRegWith(Reg, Dst);
+ }
+ Inst->eraseFromParent();
+ continue;
+ }
+ case AMDGPU::S_AND_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_OR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_XOR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NOT_B64:
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BCNT1_I32_B64:
+ splitScalar64BitBCNT(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BFE_U64:
+ case AMDGPU::S_BFE_I64:
+ case AMDGPU::S_BFM_B64:
+ llvm_unreachable("Moving this op to VALU not implemented");
+ }
+
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+ // We cannot move this instruction to the VALU, so we should try to
+ // legalize its operands instead.
+ legalizeOperands(Inst);
+ continue;
+ }
+
+ // Use the new VALU Opcode.
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Inst->setDesc(NewDesc);
+
+ // Remove any references to SCC. Vector instructions can't read from it, and
+ // We're just about to add the implicit use / defs of VCC, and we don't want
+ // both.
+ for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
+ MachineOperand &Op = Inst->getOperand(i);
+ if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
+ Inst->RemoveOperand(i);
+ }
+
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Size));
+
+ // XXX - Other pointless operands. There are 4, but it seems you only need
+ // 3 to not hit an assertion later in MCInstLower.
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ }
+
+ addDescImplicitUseDef(NewDesc, Inst);
+
+ if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+ const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
+ // If we need to move this to VGPRs, we need to unpack the second operand
+ // back into the 2 separate ones for bit offset and width.
+ assert(OffsetWidthOp.isImm() &&
+ "Scalar BFE is only implemented for constant width and offset");
+ uint32_t Imm = OffsetWidthOp.getImm();
+
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+
+ Inst->RemoveOperand(2); // Remove old immediate.
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Offset));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(BitWidth));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ }
+
+ // Update the destination register class.
+
+ const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
+
+ switch (Opcode) {
+ // For target instructions, getOpRegClass just returns the virtual
+ // register class associated with the operand, so we need to find an
+ // equivalent VGPR register class in order to move the instruction to the
+ // VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
+ if (RI.hasVGPRs(NewDstRC))
+ continue;
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ unsigned DstReg = Inst->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+
+ // Legalize the operands
+ legalizeOperands(Inst);
+
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I->getParent();
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
+ }
+ }
+ }
+}
+