#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
-using namespace llvm;
-
-
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
-AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const {
// TODO: Implement this function
- return NULL;
-}
-bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const {
- while (iter != MBB.end()) {
- switch (iter->getOpcode()) {
- default:
- break;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32:
- case AMDGPU::BRANCH:
- return true;
- };
- ++iter;
- }
- return false;
+ return nullptr;
}
void
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(!"Not Implemented");
+ llvm_unreachable("Not Implemented");
}
void
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(!"Not Implemented");
+ llvm_unreachable("Not Implemented");
}
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
MachineBasicBlock *MBB = MI->getParent();
- int OffsetOpIdx =
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
+ int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::addr);
// addr is a custom operand with multiple MI operands, and only the
// first MI operand is given a name.
int RegOpIdx = OffsetOpIdx + 1;
- int ChanOpIdx =
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
-
+ int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::chan);
if (isRegisterLoad(*MI)) {
- int DstOpIdx =
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::dst);
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
Address, OffsetReg);
}
} else if (isRegisterStore(*MI)) {
- int ValOpIdx =
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::val);
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// TODO: Implement this function
- return 0;
+ return nullptr;
}
MachineInstr*
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
// TODO: Implement this function
- return 0;
+ return nullptr;
}
bool
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
return 0;
}
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const {
- assert(Offset2 > Offset1
- && "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 16,
- // then schedule together.
- // TODO: Make the loads schedule near if it fits in a cacheline
- return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+bool AMDGPUInstrInfo::enableClusterLoads() const {
+ return true;
+}
+
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+ int64_t Offset0, int64_t Offset1,
+ unsigned NumLoads) const {
+ assert(Offset1 > Offset0 &&
+ "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 64
+ // bytes, then schedule together.
+
+ // A cacheline is 64 bytes (for global memory).
+ return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
bool
return -1;
}
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+ Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
-
-void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AMDGPURegisterInfo & RI = getRegisterInfo();
-
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- // Convert dst regclass to one that is supported by the ISA
- if (MO.isReg() && MO.isDef()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
- const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-
- assert(newRegClass);
-
- MRI.setRegClass(MO.getReg(), newRegClass);
- }
- }
- }
-}
-
int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
switch (Channels) {
default: return Opcode;
case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
}
}
+
+// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+namespace llvm {
+namespace AMDGPU {
+static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+ return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
+}
+}
+}
+
+// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
+enum SISubtarget {
+ SI = 0,
+ VI = 1
+};
+
+static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+ switch (Gen) {
+ default:
+ return SI;
+ case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ return VI;
+ }
+}
+
+int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
+ int MCOp = AMDGPU::getMCOpcode(Opcode,
+ AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+
+ // -1 means that Opcode is already a native instruction.
+ if (MCOp == -1)
+ return Opcode;
+
+ // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+ // no encoding in the given subtarget generation.
+ if (MCOp == (uint16_t)-1)
+ return -1;
+
+ return MCOp;
+}