AMDGPU/SI: Correctly emit agent global segment variables when targeting HSA
[oota-llvm.git] / lib / Target / AMDGPU / MCTargetDesc / AMDGPUTargetStreamer.cpp
1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUTargetStreamer.h"
15 #include "SIDefines.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCELFStreamer.h"
20 #include "llvm/MC/MCObjectFileInfo.h"
21 #include "llvm/MC/MCSectionELF.h"
22 #include "llvm/Support/ELF.h"
23 #include "llvm/Support/FormattedStream.h"
24
25 using namespace llvm;
26
27 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
28     : MCTargetStreamer(S) { }
29
30 //===----------------------------------------------------------------------===//
31 // AMDGPUTargetAsmStreamer
32 //===----------------------------------------------------------------------===//
33
34 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
35                                                  formatted_raw_ostream &OS)
36     : AMDGPUTargetStreamer(S), OS(OS) { }
37
38 void
39 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
40                                                            uint32_t Minor) {
41   OS << "\t.hsa_code_object_version " <<
42         Twine(Major) << "," << Twine(Minor) << '\n';
43 }
44
45 void
46 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
47                                                        uint32_t Minor,
48                                                        uint32_t Stepping,
49                                                        StringRef VendorName,
50                                                        StringRef ArchName) {
51   OS << "\t.hsa_code_object_isa " <<
52         Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
53         ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
54
55 }
56
57 void
58 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
59   uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
60   bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
61       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
62   bool EnableSGPRDispatchPtr = (Header.code_properties &
63       AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
64   bool EnableSGPRQueuePtr = (Header.code_properties &
65       AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
66   bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
67       AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
68   bool EnableSGPRDispatchID = (Header.code_properties &
69       AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
70   bool EnableSGPRFlatScratchInit = (Header.code_properties &
71       AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
72   bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
73       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
74   bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
75       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
76   bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
77       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
78   bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
79       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
80   bool EnableOrderedAppendGDS = (Header.code_properties &
81       AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
82   uint32_t PrivateElementSize = (Header.code_properties &
83       AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
84           AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
85   bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
86   bool IsDynamicCallstack = (Header.code_properties &
87       AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
88   bool IsDebugEnabled = (Header.code_properties &
89       AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
90   bool IsXNackEnabled = (Header.code_properties &
91       AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
92
93   OS << "\t.amd_kernel_code_t\n" <<
94     "\t\tkernel_code_version_major = " <<
95         Header.amd_kernel_code_version_major << '\n' <<
96     "\t\tkernel_code_version_minor = " <<
97         Header.amd_kernel_code_version_minor << '\n' <<
98     "\t\tmachine_kind = " <<
99         Header.amd_machine_kind << '\n' <<
100     "\t\tmachine_version_major = " <<
101         Header.amd_machine_version_major << '\n' <<
102     "\t\tmachine_version_minor = " <<
103         Header.amd_machine_version_minor << '\n' <<
104     "\t\tmachine_version_stepping = " <<
105         Header.amd_machine_version_stepping << '\n' <<
106     "\t\tkernel_code_entry_byte_offset = " <<
107         Header.kernel_code_entry_byte_offset << '\n' <<
108     "\t\tkernel_code_prefetch_byte_size = " <<
109         Header.kernel_code_prefetch_byte_size << '\n' <<
110     "\t\tmax_scratch_backing_memory_byte_size = " <<
111         Header.max_scratch_backing_memory_byte_size << '\n' <<
112     "\t\tcompute_pgm_rsrc1_vgprs = " <<
113         G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
114     "\t\tcompute_pgm_rsrc1_sgprs = " <<
115         G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
116     "\t\tcompute_pgm_rsrc1_priority = " <<
117         G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
118     "\t\tcompute_pgm_rsrc1_float_mode = " <<
119         G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
120     "\t\tcompute_pgm_rsrc1_priv = " <<
121         G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
122     "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
123         G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
124     "\t\tcompute_pgm_rsrc1_debug_mode = " <<
125         G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
126     "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
127         G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
128     "\t\tcompute_pgm_rsrc2_scratch_en = " <<
129         G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
130     "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
131         G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
132     "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
133         G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
134     "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
135         G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
136     "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
137         G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
138     "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
139         G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
140     "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
141         G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
142     "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
143         G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
144     "\t\tcompute_pgm_rsrc2_lds_size = " <<
145         G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
146     "\t\tcompute_pgm_rsrc2_excp_en = " <<
147         G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
148
149     "\t\tenable_sgpr_private_segment_buffer = " <<
150         EnableSGPRPrivateSegmentBuffer << '\n' <<
151     "\t\tenable_sgpr_dispatch_ptr = " <<
152         EnableSGPRDispatchPtr << '\n' <<
153     "\t\tenable_sgpr_queue_ptr = " <<
154         EnableSGPRQueuePtr << '\n' <<
155     "\t\tenable_sgpr_kernarg_segment_ptr = " <<
156         EnableSGPRKernargSegmentPtr << '\n' <<
157     "\t\tenable_sgpr_dispatch_id = " <<
158         EnableSGPRDispatchID << '\n' <<
159     "\t\tenable_sgpr_flat_scratch_init = " <<
160         EnableSGPRFlatScratchInit << '\n' <<
161     "\t\tenable_sgpr_private_segment_size = " <<
162         EnableSGPRPrivateSegmentSize << '\n' <<
163     "\t\tenable_sgpr_grid_workgroup_count_x = " <<
164         EnableSGPRGridWorkgroupCountX << '\n' <<
165     "\t\tenable_sgpr_grid_workgroup_count_y = " <<
166         EnableSGPRGridWorkgroupCountY << '\n' <<
167     "\t\tenable_sgpr_grid_workgroup_count_z = " <<
168         EnableSGPRGridWorkgroupCountZ << '\n' <<
169     "\t\tenable_ordered_append_gds = " <<
170         EnableOrderedAppendGDS << '\n' <<
171     "\t\tprivate_element_size = " <<
172         PrivateElementSize << '\n' <<
173     "\t\tis_ptr64 = " <<
174         IsPtr64 << '\n' <<
175     "\t\tis_dynamic_callstack = " <<
176         IsDynamicCallstack << '\n' <<
177     "\t\tis_debug_enabled = " <<
178         IsDebugEnabled << '\n' <<
179     "\t\tis_xnack_enabled = " <<
180         IsXNackEnabled << '\n' <<
181     "\t\tworkitem_private_segment_byte_size = " <<
182         Header.workitem_private_segment_byte_size << '\n' <<
183     "\t\tworkgroup_group_segment_byte_size = " <<
184         Header.workgroup_group_segment_byte_size << '\n' <<
185     "\t\tgds_segment_byte_size = " <<
186         Header.gds_segment_byte_size << '\n' <<
187     "\t\tkernarg_segment_byte_size = " <<
188         Header.kernarg_segment_byte_size << '\n' <<
189     "\t\tworkgroup_fbarrier_count = " <<
190         Header.workgroup_fbarrier_count << '\n' <<
191     "\t\twavefront_sgpr_count = " <<
192         Header.wavefront_sgpr_count << '\n' <<
193     "\t\tworkitem_vgpr_count = " <<
194         Header.workitem_vgpr_count << '\n' <<
195     "\t\treserved_vgpr_first = " <<
196         Header.reserved_vgpr_first << '\n' <<
197     "\t\treserved_vgpr_count = " <<
198         Header.reserved_vgpr_count << '\n' <<
199     "\t\treserved_sgpr_first = " <<
200         Header.reserved_sgpr_first << '\n' <<
201     "\t\treserved_sgpr_count = " <<
202         Header.reserved_sgpr_count << '\n' <<
203     "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
204         Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
205     "\t\tdebug_private_segment_buffer_sgpr = " <<
206         Header.debug_private_segment_buffer_sgpr << '\n' <<
207     "\t\tkernarg_segment_alignment = " <<
208         (uint32_t)Header.kernarg_segment_alignment << '\n' <<
209     "\t\tgroup_segment_alignment = " <<
210         (uint32_t)Header.group_segment_alignment << '\n' <<
211     "\t\tprivate_segment_alignment = " <<
212         (uint32_t)Header.private_segment_alignment << '\n' <<
213     "\t\twavefront_size = " <<
214         (uint32_t)Header.wavefront_size << '\n' <<
215     "\t\tcall_convention = " <<
216         Header.call_convention << '\n' <<
217     "\t\truntime_loader_kernel_symbol = " <<
218         Header.runtime_loader_kernel_symbol << '\n' <<
219     // TODO: control_directives
220     "\t.end_amd_kernel_code_t\n";
221
222 }
223
224 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
225                                                    unsigned Type) {
226   switch (Type) {
227     default: llvm_unreachable("Invalid AMDGPU symbol type");
228     case ELF::STT_AMDGPU_HSA_KERNEL:
229       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
230       break;
231   }
232 }
233
234 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
235     StringRef GlobalName) {
236   OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
237 }
238
239 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
240     StringRef GlobalName) {
241   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
242 }
243
244 //===----------------------------------------------------------------------===//
245 // AMDGPUTargetELFStreamer
246 //===----------------------------------------------------------------------===//
247
248 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
249     : AMDGPUTargetStreamer(S), Streamer(S) { }
250
251 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
252   return static_cast<MCELFStreamer &>(Streamer);
253 }
254
255 void
256 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
257                                                            uint32_t Minor) {
258   MCStreamer &OS = getStreamer();
259   MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
260
261   unsigned NameSZ = 4;
262
263   OS.PushSection();
264   OS.SwitchSection(Note);
265   OS.EmitIntValue(NameSZ, 4);                            // namesz
266   OS.EmitIntValue(8, 4);                                 // descz
267   OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
268   OS.EmitBytes(StringRef("AMD", NameSZ));                // name
269   OS.EmitIntValue(Major, 4);                             // desc
270   OS.EmitIntValue(Minor, 4);
271   OS.EmitValueToAlignment(4);
272   OS.PopSection();
273 }
274
275 void
276 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
277                                                        uint32_t Minor,
278                                                        uint32_t Stepping,
279                                                        StringRef VendorName,
280                                                        StringRef ArchName) {
281   MCStreamer &OS = getStreamer();
282   MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
283
284   unsigned NameSZ = 4;
285   uint16_t VendorNameSize = VendorName.size() + 1;
286   uint16_t ArchNameSize = ArchName.size() + 1;
287   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
288                     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
289                     VendorNameSize + ArchNameSize;
290
291   OS.PushSection();
292   OS.SwitchSection(Note);
293   OS.EmitIntValue(NameSZ, 4);                            // namesz
294   OS.EmitIntValue(DescSZ, 4);                            // descsz
295   OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4);                 // type
296   OS.EmitBytes(StringRef("AMD", 4));                     // name
297   OS.EmitIntValue(VendorNameSize, 2);                    // desc
298   OS.EmitIntValue(ArchNameSize, 2);
299   OS.EmitIntValue(Major, 4);
300   OS.EmitIntValue(Minor, 4);
301   OS.EmitIntValue(Stepping, 4);
302   OS.EmitBytes(VendorName);
303   OS.EmitIntValue(0, 1); // NULL terminate VendorName
304   OS.EmitBytes(ArchName);
305   OS.EmitIntValue(0, 1); // NULL terminte ArchName
306   OS.EmitValueToAlignment(4);
307   OS.PopSection();
308 }
309
310 void
311 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
312
313   MCStreamer &OS = getStreamer();
314   OS.PushSection();
315   // The MCObjectFileInfo that is available to the assembler is a generic
316   // implementation and not AMDGPUHSATargetObjectFile, so we can't use
317   // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
318   OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
319   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
320   OS.PopSection();
321 }
322
323 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
324                                                    unsigned Type) {
325   MCSymbolELF *Symbol = cast<MCSymbolELF>(
326       getStreamer().getContext().getOrCreateSymbol(SymbolName));
327   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
328 }
329
330 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
331     StringRef GlobalName) {
332
333   MCSymbolELF *Symbol = cast<MCSymbolELF>(
334       getStreamer().getContext().getOrCreateSymbol(GlobalName));
335   Symbol->setType(ELF::STT_OBJECT);
336   Symbol->setBinding(ELF::STB_LOCAL);
337 }
338
339 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
340     StringRef GlobalName) {
341
342   MCSymbolELF *Symbol = cast<MCSymbolELF>(
343       getStreamer().getContext().getOrCreateSymbol(GlobalName));
344   Symbol->setType(ELF::STT_OBJECT);
345   Symbol->setBinding(ELF::STB_GLOBAL);
346 }