X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUInstrInfo.cpp;h=f4de2d6ae89f58dc06b4bdacecd572294a2e6a06;hp=434c91a523156082bd4600d36ca75f8b0a031d57;hb=e003f1ac8cb8e921b50eae9a997dfc9258cc998f;hpb=04c559569f87d755c3f2828a765f5eb7308e6753 diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 434c91a5231..f4de2d6ae89 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -20,15 +20,18 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#define GET_INSTRINFO_CTOR +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRINFO_NAMED_OPS #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -using namespace llvm; +// Pin the vtable to this file. +void AMDGPUInstrInfo::anchor() {} -AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) - : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { } +AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st) + : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { } const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { return RI; @@ -81,22 +84,7 @@ AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { // TODO: Implement this function - return NULL; -} -bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, - MachineBasicBlock &MBB) const { - while (iter != MBB.end()) { - switch (iter->getOpcode()) { - default: - break; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: - case AMDGPU::BRANCH: - return true; - }; - ++iter; - } - return false; + return nullptr; } void @@ -106,7 +94,7 @@ AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); + llvm_unreachable("Not Implemented"); } void @@ -115,42 +103,49 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); + llvm_unreachable("Not Implemented"); } bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); - - switch(MI->getOpcode()) { - default: - if (isRegisterLoad(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); - unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); - if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { - buildMovInstr(MBB, MI, MI->getOperand(0).getReg(), - getIndirectAddrRegClass()->getRegister(Address)); - } else { - buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(), - Address, OffsetReg); - } - } else if (isRegisterStore(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); - unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); - if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { - buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), - MI->getOperand(0).getReg()); - } else { - buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(), + int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::addr); + // addr is a custom operand with multiple MI operands, and only the + // first MI operand is given a name. + int RegOpIdx = OffsetOpIdx + 1; + int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::chan); + if (isRegisterLoad(*MI)) { + int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::dst); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(), + getIndirectAddrRegClass()->getRegister(Address)); + } else { + buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(), + Address, OffsetReg); + } + } else if (isRegisterStore(*MI)) { + int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::val); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), + MI->getOperand(ValOpIdx).getReg()); + } else { + buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(), calculateIndirectAddress(RegIndex, Channel), OffsetReg); - } - } else { - return false; } + } else { + return false; } MBB->erase(MI); @@ -164,7 +159,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl &Ops, int FrameIndex) const { // TODO: Implement this function - return 0; + return nullptr; } MachineInstr* AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -172,7 +167,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl &Ops, MachineInstr *LoadMI) const { // TODO: Implement this function - return 0; + return nullptr; } bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, @@ -204,15 +199,30 @@ AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return 0; } -bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, - int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const { - assert(Offset2 > Offset1 - && "Second offset should be larger than first offset!"); - // If we have less than 16 loads in a row, and the offsets are within 16, - // then schedule together. - // TODO: Make the loads schedule near if it fits in a cacheline - return (NumLoads < 16 && (Offset2 - Offset1) < 16); +bool AMDGPUInstrInfo::enableClusterLoads() const { + return true; +} + +// FIXME: This behaves strangely. If, for example, you have 32 load + stores, +// the first 16 loads will be interleaved with the stores, and the next 16 will +// be clustered as expected. It should really split into 2 16 store batches. +// +// Loads are clustered until this returns false, rather than trying to schedule +// groups of stores. This also means we have to deal with saying different +// address space loads should be clustered, and ones which might cause bank +// conflicts. +// +// This might be deprecated so it might not be worth that much effort to fix. +bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, + int64_t Offset0, int64_t Offset1, + unsigned NumLoads) const { + assert(Offset1 > Offset0 && + "Second offset should be larger than first offset!"); + // If we have less than 16 loads in a row, and the offsets are within 64 + // bytes, then schedule together. + + // A cacheline is 64 bytes (for global memory). + return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } bool @@ -263,26 +273,55 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; } +int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = -1; -void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, - DebugLoc DL) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - const AMDGPURegisterInfo & RI = getRegisterInfo(); - - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - MachineOperand &MO = MI.getOperand(i); - // Convert dst regclass to one that is supported by the ISA - if (MO.isReg() && MO.isDef()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); - const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); + if (MFI->getNumObjects() == 0) { + return -1; + } - assert(newRegClass); + if (MRI.livein_empty()) { + return 0; + } - MRI.setRegClass(MO.getReg(), newRegClass); - } + const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + unsigned Reg = LI->first; + if (TargetRegisterInfo::isVirtualRegister(Reg) || + !IndirectRC->contains(Reg)) + continue; + + unsigned RegIndex; + unsigned RegEnd; + for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; + ++RegIndex) { + if (IndirectRC->getRegister(RegIndex) == Reg) + break; } + Offset = std::max(Offset, (int)RegIndex); } + + return Offset + 1; +} + +int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + if (MFI->getNumObjects() == 0) { + return -1; + } + + Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1); + + return getIndirectIndexBegin(MF) + Offset; } int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { @@ -293,3 +332,45 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); } } + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +namespace llvm { +namespace AMDGPU { +static int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcodeGen(Opcode, (enum Subtarget)Gen); +} +} +} + +// This must be kept in sync with the SISubtarget class in SIInstrInfo.td +enum SISubtarget { + SI = 0, + VI = 1 +}; + +static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { + switch (Gen) { + default: + return SI; + case AMDGPUSubtarget::VOLCANIC_ISLANDS: + return VI; + } +} + +int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { + int MCOp = AMDGPU::getMCOpcode(Opcode, + AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration())); + + // -1 means that Opcode is already a native instruction. + if (MCOp == -1) + return Opcode; + + // (uint16_t)-1 means that Opcode is a pseudo instruction that has + // no encoding in the given subtarget generation. + if (MCOp == (uint16_t)-1) + return -1; + + return MCOp; +}