lib/Target/R600/AMDGPUAsmPrinter.cpp

   1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 ///
  12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
  13 /// code.  When passed an MCAsmStreamer it prints assembly and when passed
  14 /// an MCObjectStreamer it outputs binary code.
  15 //
  16 //===----------------------------------------------------------------------===//
  17 //
  18
  19 #include "AMDGPUAsmPrinter.h"
  20 #include "AMDGPU.h"
  21 #include "AMDGPUSubtarget.h"
  22 #include "R600Defines.h"
  23 #include "R600MachineFunctionInfo.h"
  24 #include "R600RegisterInfo.h"
  25 #include "SIDefines.h"
  26 #include "SIMachineFunctionInfo.h"
  27 #include "SIRegisterInfo.h"
  28 #include "llvm/MC/MCContext.h"
  29 #include "llvm/MC/MCSectionELF.h"
  30 #include "llvm/MC/MCStreamer.h"
  31 #include "llvm/Support/ELF.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include "llvm/Support/TargetRegistry.h"
  34 #include "llvm/Target/TargetLoweringObjectFile.h"
  35
  36 using namespace llvm;
  37
  38 // TODO: This should get the default rounding mode from the kernel. We just set
  39 // the default here, but this could change if the OpenCL rounding mode pragmas
  40 // are used.
  41 //
  42 // The denormal mode here should match what is reported by the OpenCL runtime
  43 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
  44 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
  45 //
  46 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
  47 // precision, and leaves single precision to flush all and does not report
  48 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
  49 // CL_FP_DENORM for both.
  50 static uint32_t getFPMode(const MachineFunction &) {
  51   return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
  52          FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
  53          FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
  54          FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
  55 }
  56
  57 static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
  58                                               MCStreamer &Streamer) {
  59   return new AMDGPUAsmPrinter(tm, Streamer);
  60 }
  61
  62 extern "C" void LLVMInitializeR600AsmPrinter() {
  63   TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
  64 }
  65
  66 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
  67     : AsmPrinter(TM, Streamer) {
  68   DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
  69 }
  70
  71 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  72   SetupMachineFunction(MF);
  73
  74   OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
  75
  76   MCContext &Context = getObjFileLowering().getContext();
  77   const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
  78                                               ELF::SHT_PROGBITS, 0,
  79                                               SectionKind::getReadOnly());
  80   OutStreamer.SwitchSection(ConfigSection);
  81
  82   const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
  83   SIProgramInfo KernelInfo;
  84   if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
  85     getSIProgramInfo(KernelInfo, MF);
  86     EmitProgramInfoSI(MF, KernelInfo);
  87   } else {
  88     EmitProgramInfoR600(MF);
  89   }
  90
  91   DisasmLines.clear();
  92   HexLines.clear();
  93   DisasmLineMaxLen = 0;
  94
  95   OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
  96   EmitFunctionBody();
  97
  98   if (isVerbose()) {
  99     const MCSectionELF *CommentSection
 100       = Context.getELFSection(".AMDGPU.csdata",
 101                               ELF::SHT_PROGBITS, 0,
 102                               SectionKind::getReadOnly());
 103     OutStreamer.SwitchSection(CommentSection);
 104
 105     if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 106       OutStreamer.emitRawComment(" Kernel info:", false);
 107       OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
 108                                  false);
 109       OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
 110                                  false);
 111       OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
 112                                  false);
 113       OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
 114                                  false);
 115       OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
 116                                  false);
 117     } else {
 118       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 119       OutStreamer.emitRawComment(
 120         Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
 121     }
 122   }
 123
 124   if (STM.dumpCode()) {
 125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 126     MF.dump();
 127 #endif
 128
 129     if (DisasmEnabled) {
 130       OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
 131                                                   ELF::SHT_NOTE, 0,
 132                                                   SectionKind::getReadOnly()));
 133
 134       for (size_t i = 0; i < DisasmLines.size(); ++i) {
 135         std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
 136         Comment += " ; " + HexLines[i] + "\n";
 137
 138         OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
 139         OutStreamer.EmitBytes(StringRef(Comment));
 140       }
 141     }
 142   }
 143
 144   return false;
 145 }
 146
 147 void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
 148   unsigned MaxGPR = 0;
 149   bool killPixel = false;
 150   const R600RegisterInfo *RI
 151     = static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
 152   const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 153   const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
 154
 155   for (const MachineBasicBlock &MBB : MF) {
 156     for (const MachineInstr &MI : MBB) {
 157       if (MI.getOpcode() == AMDGPU::KILLGT)
 158         killPixel = true;
 159       unsigned numOperands = MI.getNumOperands();
 160       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
 161         const MachineOperand &MO = MI.getOperand(op_idx);
 162         if (!MO.isReg())
 163           continue;
 164         unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
 165
 166         // Register with value > 127 aren't GPR
 167         if (HWReg > 127)
 168           continue;
 169         MaxGPR = std::max(MaxGPR, HWReg);
 170       }
 171     }
 172   }
 173
 174   unsigned RsrcReg;
 175   if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
 176     // Evergreen / Northern Islands
 177     switch (MFI->getShaderType()) {
 178     default: // Fall through
 179     case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
 180     case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
 181     case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
 182     case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
 183     }
 184   } else {
 185     // R600 / R700
 186     switch (MFI->getShaderType()) {
 187     default: // Fall through
 188     case ShaderType::GEOMETRY: // Fall through
 189     case ShaderType::COMPUTE:  // Fall through
 190     case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
 191     case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
 192     }
 193   }
 194
 195   OutStreamer.EmitIntValue(RsrcReg, 4);
 196   OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
 197                            S_STACK_SIZE(MFI->StackSize), 4);
 198   OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
 199   OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
 200
 201   if (MFI->getShaderType() == ShaderType::COMPUTE) {
 202     OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
 203     OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
 204   }
 205 }
 206
 207 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 208                                         const MachineFunction &MF) const {
 209   uint64_t CodeSize = 0;
 210   unsigned MaxSGPR = 0;
 211   unsigned MaxVGPR = 0;
 212   bool VCCUsed = false;
 213   const SIRegisterInfo *RI
 214     = static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
 215
 216   for (const MachineBasicBlock &MBB : MF) {
 217     for (const MachineInstr &MI : MBB) {
 218       // TODO: CodeSize should account for multiple functions.
 219       CodeSize += MI.getDesc().Size;
 220
 221       unsigned numOperands = MI.getNumOperands();
 222       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
 223         const MachineOperand &MO = MI.getOperand(op_idx);
 224         unsigned width = 0;
 225         bool isSGPR = false;
 226
 227         if (!MO.isReg()) {
 228           continue;
 229         }
 230         unsigned reg = MO.getReg();
 231         if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
 232             reg == AMDGPU::VCC_HI) {
 233           VCCUsed = true;
 234           continue;
 235         }
 236
 237         switch (reg) {
 238         default: break;
 239         case AMDGPU::SCC:
 240         case AMDGPU::EXEC:
 241         case AMDGPU::M0:
 242           continue;
 243         }
 244
 245         if (AMDGPU::SReg_32RegClass.contains(reg)) {
 246           isSGPR = true;
 247           width = 1;
 248         } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
 249           isSGPR = false;
 250           width = 1;
 251         } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
 252           isSGPR = true;
 253           width = 2;
 254         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
 255           isSGPR = false;
 256           width = 2;
 257         } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
 258           isSGPR = false;
 259           width = 3;
 260         } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
 261           isSGPR = true;
 262           width = 4;
 263         } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
 264           isSGPR = false;
 265           width = 4;
 266         } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
 267           isSGPR = true;
 268           width = 8;
 269         } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
 270           isSGPR = false;
 271           width = 8;
 272         } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
 273           isSGPR = true;
 274           width = 16;
 275         } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
 276           isSGPR = false;
 277           width = 16;
 278         } else {
 279           llvm_unreachable("Unknown register class");
 280         }
 281         unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
 282         unsigned maxUsed = hwReg + width - 1;
 283         if (isSGPR) {
 284           MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
 285         } else {
 286           MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
 287         }
 288       }
 289     }
 290   }
 291
 292   if (VCCUsed)
 293     MaxSGPR += 2;
 294
 295   ProgInfo.NumVGPR = MaxVGPR;
 296   ProgInfo.NumSGPR = MaxSGPR;
 297
 298   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
 299   // register.
 300   ProgInfo.FloatMode = getFPMode(MF);
 301
 302   // XXX: Not quite sure what this does, but sc seems to unset this.
 303   ProgInfo.IEEEMode = 0;
 304
 305   // Do not clamp NAN to 0.
 306   ProgInfo.DX10Clamp = 0;
 307
 308   ProgInfo.CodeLen = CodeSize;
 309 }
 310
 311 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
 312                                          const SIProgramInfo &KernelInfo) {
 313   const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
 314   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 315
 316   unsigned RsrcReg;
 317   switch (MFI->getShaderType()) {
 318   default: // Fall through
 319   case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
 320   case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
 321   case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
 322   case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
 323   }
 324
 325   unsigned LDSAlignShift;
 326   if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
 327     // LDS is allocated in 64 dword blocks.
 328     LDSAlignShift = 8;
 329   } else {
 330     // LDS is allocated in 128 dword blocks.
 331     LDSAlignShift = 9;
 332   }
 333
 334   unsigned LDSBlocks =
 335     RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
 336
 337   if (MFI->getShaderType() == ShaderType::COMPUTE) {
 338     OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
 339
 340     const uint32_t ComputePGMRSrc1 =
 341       S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
 342       S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
 343       S_00B848_PRIORITY(KernelInfo.Priority) |
 344       S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
 345       S_00B848_PRIV(KernelInfo.Priv) |
 346       S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
 347       S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
 348       S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
 349
 350     OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
 351
 352     OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
 353     OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
 354   } else {
 355     OutStreamer.EmitIntValue(RsrcReg, 4);
 356     OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
 357                              S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
 358   }
 359
 360   if (MFI->getShaderType() == ShaderType::PIXEL) {
 361     OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
 362     OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
 363     OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
 364     OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
 365   }
 366 }