1 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code emitters outputs bytecode that is understood by the r600g driver
11 // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12 // except that the size of the instruction fields are rounded up to the
15 // [1] http://www.mesa3d.org/
17 //===----------------------------------------------------------------------===//
20 #include "AMDGPUUtil.h"
21 #include "AMDILCodeEmitter.h"
22 #include "AMDILInstrInfo.h"
23 #include "AMDILUtilityFunctions.h"
24 #include "R600InstrInfo.h"
25 #include "R600RegisterInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Support/FormattedStream.h"
31 #include "llvm/Target/TargetMachine.h"
35 #define SRC_BYTE_COUNT 11
36 #define DST_BYTE_COUNT 5
42 class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
47 formatted_raw_ostream &_OS;
48 const TargetMachine * TM;
49 const MachineRegisterInfo * MRI;
50 const R600RegisterInfo * TRI;
55 unsigned currentElement;
60 R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
61 _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
64 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
66 bool runOnMachineFunction(MachineFunction &MF);
67 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
68 const MachineOperand &MO) const;
72 void EmitALUInstr(MachineInstr &MI);
73 void EmitSrc(const MachineOperand & MO, int chan_override = -1);
74 void EmitDst(const MachineOperand & MO);
75 void EmitALU(MachineInstr &MI, unsigned numSrc);
76 void EmitTexInstr(MachineInstr &MI);
77 void EmitFCInstr(MachineInstr &MI);
79 void EmitNullBytes(unsigned int byteCount);
81 void EmitByte(unsigned int byte);
83 void EmitTwoBytes(uint32_t bytes);
85 void Emit(uint32_t value);
86 void Emit(uint64_t value);
88 unsigned getHWReg(unsigned regNo) const;
92 } // End anonymous namespace
132 TEXTURE_SHADOW1D_ARRAY,
133 TEXTURE_SHADOW2D_ARRAY
136 char R600CodeEmitter::ID = 0;
138 FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
139 return new R600CodeEmitter(OS);
142 bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
144 TM = &MF.getTarget();
145 MRI = &MF.getRegInfo();
146 TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
147 const R600InstrInfo * TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
148 const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
149 std::string gpu = STM.getDeviceName();
151 if (STM.dumpCode()) {
155 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
157 MachineBasicBlock &MBB = *BB;
158 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
160 MachineInstr &MI = *I;
161 IsReduction = AMDGPU::isReductionOp(MI.getOpcode());
162 IsVector = TII->isVector(MI);
163 IsCube = AMDGPU::isCubeOp(MI.getOpcode());
164 if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
167 if (AMDGPU::isTexOp(MI.getOpcode())) {
169 } else if (AMDGPU::isFCOp(MI.getOpcode())){
171 } else if (IsReduction || IsVector || IsCube) {
173 // XXX: On Cayman, some (all?) of the vector instructions only need
174 // to fill the first three slots.
175 for (currentElement = 0; currentElement < 4; currentElement++) {
176 IsLast = (currentElement == 3);
182 } else if (MI.getOpcode() == AMDGPU::RETURN ||
183 MI.getOpcode() == AMDGPU::BUNDLE ||
184 MI.getOpcode() == AMDGPU::KILL) {
187 switch(MI.getOpcode()) {
188 case AMDGPU::RAT_WRITE_CACHELESS_eg:
190 uint64_t inst = getBinaryCodeForInstr(MI);
191 // Set End Of Program bit
192 // XXX: Need better check of end of program. EOP should be
193 // encoded in one of the operands of the MI, and it should be
194 // set in a prior pass.
195 MachineBasicBlock::iterator NextI = llvm::next(I);
196 MachineInstr &NextMI = *NextI;
197 if (NextMI.getOpcode() == AMDGPU::RETURN) {
198 inst |= (((uint64_t)1) << 53);
200 EmitByte(INSTR_NATIVE);
204 case AMDGPU::VTX_READ_PARAM_i32_eg:
205 case AMDGPU::VTX_READ_PARAM_f32_eg:
206 case AMDGPU::VTX_READ_GLOBAL_i32_eg:
207 case AMDGPU::VTX_READ_GLOBAL_f32_eg:
208 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
209 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
211 uint64_t InstWord01 = getBinaryCodeForInstr(MI);
212 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
230 void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
233 unsigned numOperands = MI.getNumExplicitOperands();
235 // Some instructions are just place holder instructions that represent
236 // operations that the GPU does automatically. They should be ignored.
237 if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
241 // XXX Check if instruction writes a result
242 if (numOperands < 1) {
245 const MachineOperand dstOp = MI.getOperand(0);
247 // Emit instruction type
251 static const int cube_src_swz[] = {2, 2, 0, 1};
252 EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
253 EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
254 EmitNullBytes(SRC_BYTE_COUNT);
256 unsigned int opIndex;
257 for (opIndex = 1; opIndex < numOperands; opIndex++) {
258 // Literal constants are always stored as the last operand.
259 if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
262 EmitSrc(MI.getOperand(opIndex));
265 // Emit zeros for unused sources
266 for ( ; opIndex < 4; opIndex++) {
267 EmitNullBytes(SRC_BYTE_COUNT);
273 EmitALU(MI, numOperands - 1);
276 void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
279 // Emit the source select (2 bytes). For GPRs, this is the register index.
280 // For other potential instruction operands, (e.g. constant registers) the
281 // value of the source select is defined in the r600isa docs.
283 unsigned reg = MO.getReg();
284 EmitTwoBytes(getHWReg(reg));
285 if (reg == AMDGPU::ALU_LITERAL_X) {
286 const MachineInstr * parent = MO.getParent();
287 unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
288 MachineOperand immOp = parent->getOperand(immOpIndex);
289 if (immOp.isFPImm()) {
290 value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
292 assert(immOp.isImm());
293 value = immOp.getImm();
297 // XXX: Handle other operand types.
301 // Emit the source channel (1 byte)
302 if (chan_override != -1) {
303 EmitByte(chan_override);
304 } else if (IsReduction) {
305 EmitByte(currentElement);
306 } else if (MO.isReg()) {
307 EmitByte(TRI->getHWRegChan(MO.getReg()));
312 // XXX: Emit isNegated (1 byte)
313 if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
314 && (MO.getTargetFlags() & MO_FLAG_NEG ||
316 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
322 // Emit isAbsolute (1 byte)
323 if (MO.getTargetFlags() & MO_FLAG_ABS) {
329 // XXX: Emit relative addressing mode (1 byte)
332 // Emit kc_bank, This will be adjusted later by r600_asm
335 // Emit the literal value, if applicable (4 bytes).
340 void R600CodeEmitter::EmitDst(const MachineOperand & MO)
343 // Emit the destination register index (1 byte)
344 EmitByte(getHWReg(MO.getReg()));
346 // Emit the element of the destination register (1 byte)
347 if (IsReduction || IsCube || IsVector) {
348 EmitByte(currentElement);
350 EmitByte(TRI->getHWRegChan(MO.getReg()));
353 // Emit isClamped (1 byte)
354 if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
360 // Emit writemask (1 byte).
361 if (((IsReduction || IsVector) &&
362 currentElement != TRI->getHWRegChan(MO.getReg()))
363 || MO.getTargetFlags() & MO_FLAG_MASK) {
369 // XXX: Emit relative addressing mode
372 // XXX: Handle other operand types. Are there any for destination regs?
373 EmitNullBytes(DST_BYTE_COUNT);
377 void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
379 // Emit the instruction (2 bytes)
380 EmitTwoBytes(getBinaryCodeForInstr(MI));
382 // Emit IsLast (for this instruction group) (1 byte)
388 // Emit isOp3 (1 byte)
395 // XXX: Emit predicate (1 byte)
398 // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
399 // r600_asm.c sets it.
402 // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
405 // XXX: Emit OMOD (1 byte) Not implemented.
408 // XXX: Emit index_mode. I think this is for indirect addressing, so we
409 // don't need to worry about it.
413 void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
416 unsigned opcode = MI.getOpcode();
417 bool hasOffsets = (opcode == AMDGPU::TEX_LD);
418 unsigned op_offset = hasOffsets ? 3 : 0;
419 int64_t sampler = MI.getOperand(op_offset+2).getImm();
420 int64_t textureType = MI.getOperand(op_offset+3).getImm();
421 unsigned srcSelect[4] = {0, 1, 2, 3};
423 // Emit instruction type
427 EmitByte(getBinaryCodeForInstr(MI));
429 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
430 EmitByte(sampler + 1 + 1);
432 // Emit source register
433 EmitByte(getHWReg(MI.getOperand(1).getReg()));
435 // XXX: Emit src isRelativeAddress
438 // Emit destination register
439 EmitByte(getHWReg(MI.getOperand(0).getReg()));
441 // XXX: Emit dst isRealtiveAddress
444 // XXX: Emit dst select
450 // XXX: Emit lod bias
453 // XXX: Emit coord types
454 unsigned coordType[4] = {1, 1, 1, 1};
456 if (textureType == TEXTURE_RECT
457 || textureType == TEXTURE_SHADOWRECT) {
458 coordType[ELEMENT_X] = 0;
459 coordType[ELEMENT_Y] = 0;
462 if (textureType == TEXTURE_1D_ARRAY
463 || textureType == TEXTURE_SHADOW1D_ARRAY) {
464 if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
465 coordType[ELEMENT_Y] = 0;
467 coordType[ELEMENT_Z] = 0;
468 srcSelect[ELEMENT_Z] = ELEMENT_Y;
470 } else if (textureType == TEXTURE_2D_ARRAY
471 || textureType == TEXTURE_SHADOW2D_ARRAY) {
472 coordType[ELEMENT_Z] = 0;
475 for (unsigned i = 0; i < 4; i++) {
476 EmitByte(coordType[i]);
481 for (unsigned i = 2; i < 5; i++)
482 EmitByte(MI.getOperand(i).getImm()<<1);
489 // XXX:Emit source select
490 if ((textureType == TEXTURE_SHADOW1D
491 || textureType == TEXTURE_SHADOW2D
492 || textureType == TEXTURE_SHADOWRECT
493 || textureType == TEXTURE_SHADOW1D_ARRAY)
494 && opcode != AMDGPU::TEX_SAMPLE_C_L
495 && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
496 srcSelect[ELEMENT_W] = ELEMENT_Z;
499 for (unsigned i = 0; i < 4; i++) {
500 EmitByte(srcSelect[i]);
504 void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
506 // Emit instruction type
510 unsigned numOperands = MI.getNumOperands();
511 if (numOperands > 0) {
512 assert(numOperands == 1);
513 EmitSrc(MI.getOperand(0));
515 EmitNullBytes(SRC_BYTE_COUNT);
518 // Emit FC Instruction
520 switch (MI.getOpcode()) {
521 case AMDGPU::BREAK_LOGICALZ_f32:
524 case AMDGPU::BREAK_LOGICALNZ_f32:
525 case AMDGPU::BREAK_LOGICALNZ_i32:
526 instr = FC_BREAK_NZ_INT;
528 case AMDGPU::BREAK_LOGICALZ_i32:
529 instr = FC_BREAK_Z_INT;
531 case AMDGPU::CONTINUE_LOGICALNZ_f32:
532 case AMDGPU::CONTINUE_LOGICALNZ_i32:
535 case AMDGPU::IF_LOGICALNZ_f32:
536 case AMDGPU::IF_LOGICALNZ_i32:
539 case AMDGPU::IF_LOGICALZ_f32:
548 case AMDGPU::ENDLOOP:
551 case AMDGPU::WHILELOOP:
561 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
563 for (unsigned int i = 0; i < byteCount; i++) {
568 void R600CodeEmitter::EmitByte(unsigned int byte)
570 _OS.write((uint8_t) byte & 0xff);
572 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
574 _OS.write((uint8_t) (bytes & 0xff));
575 _OS.write((uint8_t) ((bytes >> 8) & 0xff));
578 void R600CodeEmitter::Emit(uint32_t value)
580 for (unsigned i = 0; i < 4; i++) {
581 _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
585 void R600CodeEmitter::Emit(uint64_t value)
587 for (unsigned i = 0; i < 8; i++) {
588 EmitByte((value >> (8 * i)) & 0xff);
592 unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
596 HWReg = TRI->getEncodingValue(regNo);
597 if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
603 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
604 const MachineOperand &MO) const
607 return getHWReg(MO.getReg());
613 #include "AMDGPUGenCodeEmitter.inc"