X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAMDGPU%2FSIInstrInfo.cpp;h=c7218dedddcc8dc75371bd5c17c97d502a9d2e13;hp=68c59a1f6ca9d77e1e5b1830ac1cea16db6cab62;hb=d89d4bccffbd23bea821af7fd09201f4e9abc546;hpb=d3ff1cd1f59149f23602146bcdcbae7ca4622468 diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 68c59a1f6ca..c7218dedddc 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -82,6 +82,7 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, switch (MI->getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: return true; default: return false; @@ -508,14 +509,23 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } if (Opcode != -1) { + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, FrameIndex); + unsigned Size = FrameInfo->getObjectSize(FrameIndex); + unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); + MachineMemOperand *MMO + = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + Size, Align); + FrameInfo->setObjectAlignment(FrameIndex, 4); BuildMI(MBB, MI, DL, get(Opcode)) - .addReg(SrcReg) - .addFrameIndex(FrameIndex) - // Place-holder registers, these will be filled in by - // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); + .addReg(SrcReg) + .addFrameIndex(FrameIndex) + // Place-holder registers, these will be filled in by + // SIPrepareScratchRegs. + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef) + .addMemOperand(MMO); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" @@ -556,14 +566,22 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } if (Opcode != -1) { - FrameInfo->setObjectAlignment(FrameIndex, 4); - BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) - // Place-holder registers, these will be filled in by - // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); + unsigned Align = 4; + FrameInfo->setObjectAlignment(FrameIndex, Align); + unsigned Size = FrameInfo->getObjectSize(FrameIndex); + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, FrameIndex); + MachineMemOperand *MMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, Size, Align); + + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addFrameIndex(FrameIndex) + // Place-holder registers, these will be filled in by + // SIPrepareScratchRegs. + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef) + .addMemOperand(MMO); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" @@ -766,10 +784,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { - - if (MI->getNumOperands() < 3) - return nullptr; - int CommutedOpcode = commuteOpcode(*MI); if (CommutedOpcode == -1) return nullptr; @@ -979,9 +993,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); } - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::src2)); - // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); removeModOperands(*UseMI); @@ -1088,7 +1099,7 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the - // underlying addres space, even if it was lowered to a different one, + // underlying address space, even if it was lowered to a different one, // e.g. private accesses lowered to use MUBUF instructions on a scratch // buffer. if (isDS(Opc0)) { @@ -1588,36 +1599,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm( return MachineOperand::CreateReg(SubReg, false); } -unsigned SIInstrInfo::split64BitImm(SmallVectorImpl &Worklist, - MachineBasicBlock::iterator MI, - MachineRegisterInfo &MRI, - const TargetRegisterClass *RC, - const MachineOperand &Op) const { - MachineBasicBlock *MBB = MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned Dst = MRI.createVirtualRegister(RC); - - MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), - LoDst) - .addImm(Op.getImm() & 0xFFFFFFFF); - MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), - HiDst) - .addImm(Op.getImm() >> 32); - - BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) - .addReg(LoDst) - .addImm(AMDGPU::sub0) - .addReg(HiDst) - .addImm(AMDGPU::sub1); - - Worklist.push_back(Lo); - Worklist.push_back(Hi); - - return Dst; -} - // Change the order of operands from (0, 1, 2) to (0, 2, 1) void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { assert(Inst->getNumExplicitOperands() == 3); @@ -1841,15 +1822,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } MachineBasicBlock &MBB = *MI->getParent(); - // Extract the ptr from the resource descriptor. - - // SRsrcPtrLo = srsrc:sub0 - unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, - &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); - // SRsrcPtrHi = srsrc:sub1 - unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, - &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); + // Extract the ptr from the resource descriptor. + unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, + &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); @@ -1874,40 +1850,39 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { .addImm(RsrcDataFormat >> 32); // NewSRsrc = {Zero64, SRsrcFormat} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewSRsrc) - .addReg(Zero64) - .addImm(AMDGPU::sub0_sub1) - .addReg(SRsrcFormatLo) - .addImm(AMDGPU::sub2) - .addReg(SRsrcFormatHi) - .addImm(AMDGPU::sub3); + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) + .addReg(Zero64) + .addImm(AMDGPU::sub0_sub1) + .addReg(SRsrcFormatLo) + .addImm(AMDGPU::sub2) + .addReg(SRsrcFormatHi) + .addImm(AMDGPU::sub3); MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned NewVAddrLo; - unsigned NewVAddrHi; if (VAddr) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. - NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), - NewVAddrLo) - .addReg(SRsrcPtrLo) - .addReg(VAddr->getReg(), 0, AMDGPU::sub0) - .addReg(AMDGPU::VCC, RegState::ImplicitDefine); - - // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), - NewVAddrHi) - .addReg(SRsrcPtrHi) - .addReg(VAddr->getReg(), 0, AMDGPU::sub1) - .addReg(AMDGPU::VCC, RegState::ImplicitDefine) - .addReg(AMDGPU::VCC, RegState::Implicit); - + unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 + DebugLoc DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) + .addReg(SRsrcPtr, 0, AMDGPU::sub0) + .addReg(VAddr->getReg(), 0, AMDGPU::sub0); + + // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 + BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) + .addReg(SRsrcPtr, 0, AMDGPU::sub1) + .addReg(VAddr->getReg(), 0, AMDGPU::sub1); + + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) + .addReg(NewVAddrLo) + .addImm(AMDGPU::sub0) + .addReg(NewVAddrHi) + .addImm(AMDGPU::sub1); } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. @@ -1918,36 +1893,33 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // Create the new instruction. unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); MachineInstr *Addr64 = - BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) - .addOperand(*VData) - .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. - // This will be replaced later - // with the new value of vaddr. - .addOperand(*SRsrc) - .addOperand(*SOffset) - .addOperand(*Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // tfe + BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*SRsrc) + .addOperand(*SOffset) + .addOperand(*Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); MI->removeFromParent(); MI = Addr64; - NewVAddrLo = SRsrcPtrLo; - NewVAddrHi = SRsrcPtrHi; + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) + .addReg(SRsrcPtr, 0, AMDGPU::sub0) + .addImm(AMDGPU::sub0) + .addReg(SRsrcPtr, 0, AMDGPU::sub1) + .addImm(AMDGPU::sub1); + VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); } - // NewVaddr = {NewVaddrHi, NewVaddrLo} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewVAddr) - .addReg(NewVAddrLo) - .addImm(AMDGPU::sub0) - .addReg(NewVAddrHi) - .addImm(AMDGPU::sub1); - - // Update the instruction to use NewVaddr VAddr->setReg(NewVAddr); // Update the instruction to use NewSRsrc @@ -2170,47 +2142,23 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { moveSMRDToVALU(Inst, MRI); } break; - case AMDGPU::S_MOV_B64: { - DebugLoc DL = Inst->getDebugLoc(); - - // If the source operand is a register we can replace this with a - // copy. - if (Inst->getOperand(1).isReg()) { - MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) - .addOperand(Inst->getOperand(0)) - .addOperand(Inst->getOperand(1)); - Worklist.push_back(Copy); - } else { - // Otherwise, we need to split this into two movs, because there is - // no 64-bit VALU move instruction. - unsigned Reg = Inst->getOperand(0).getReg(); - unsigned Dst = split64BitImm(Worklist, - Inst, - MRI, - MRI.getRegClass(Reg), - Inst->getOperand(1)); - MRI.replaceRegWith(Reg, Dst); - } - Inst->eraseFromParent(); - continue; - } case AMDGPU::S_AND_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_OR_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_XOR_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_NOT_B64: - splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); + splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); Inst->eraseFromParent(); continue; @@ -2347,13 +2295,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // Legalize the operands legalizeOperands(Inst); - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), - E = MRI.use_end(); I != E; ++I) { - MachineInstr &UseMI = *I->getParent(); - if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); - } - } + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } } @@ -2395,20 +2337,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp( AMDGPU::sub0, Src0SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); + const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(DestRC); - MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) + unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + BuildMI(MBB, MII, DL, InstDesc, DestSub0) .addOperand(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); - MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) + unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + BuildMI(MBB, MII, DL, InstDesc, DestSub1) .addOperand(SrcReg0Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -2417,10 +2360,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp( MRI.replaceRegWith(Dest.getReg(), FullDestReg); - // Try to legalize the operands in case we need to swap the order to keep it - // valid. - Worklist.push_back(LoHalf); - Worklist.push_back(HiHalf); + // We don't need to legalizeOperands here because for a single operand, src0 + // will support any kind of input. + + // Move all users of this moved value. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBinaryOp( @@ -2455,9 +2399,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp( AMDGPU::sub0, Src1SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); + const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(DestRC); + unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) .addOperand(SrcReg0Sub0) .addOperand(SrcReg1Sub0); @@ -2467,12 +2412,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp( MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); + unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) .addOperand(SrcReg0Sub1) .addOperand(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -2483,8 +2428,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp( // Try to legalize the operands in case we need to swap the order to keep it // valid. - Worklist.push_back(LoHalf); - Worklist.push_back(HiHalf); + legalizeOperands(LoHalf); + legalizeOperands(HiHalf); + + // Move all users of this moved vlaue. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl &Worklist, @@ -2513,18 +2461,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl &Worklist MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); - MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) + BuildMI(MBB, MII, DL, InstDesc, MidReg) .addOperand(SrcRegSub0) .addImm(0); - MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) + BuildMI(MBB, MII, DL, InstDesc, ResultReg) .addOperand(SrcRegSub1) .addReg(MidReg); MRI.replaceRegWith(Dest.getReg(), ResultReg); - Worklist.push_back(First); - Worklist.push_back(Second); + // We don't need to legalize operands here. src0 for etiher instruction can be + // an SGPR, and the second input is unused or determined here. + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, @@ -2568,6 +2517,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); return; } @@ -2586,6 +2536,20 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); +} + +void SIInstrInfo::addUsersToMoveToVALUWorklist( + unsigned DstReg, + MachineRegisterInfo &MRI, + SmallVectorImpl &Worklist) const { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), + E = MRI.use_end(); I != E; ++I) { + MachineInstr &UseMI = *I->getParent(); + if (!canReadVGPR(UseMI, I.getOperandNo())) { + Worklist.push_back(&UseMI); + } + } } unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,