1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file provides AMDGPU specific target streamer methods.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUTargetStreamer.h"
15 #include "SIDefines.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCELFStreamer.h"
19 #include "llvm/MC/MCObjectFileInfo.h"
20 #include "llvm/MC/MCSectionELF.h"
21 #include "llvm/Support/ELF.h"
22 #include "llvm/Support/FormattedStream.h"
26 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
27 : MCTargetStreamer(S) { }
29 //===----------------------------------------------------------------------===//
30 // AMDGPUTargetAsmStreamer
31 //===----------------------------------------------------------------------===//
33 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
34 formatted_raw_ostream &OS)
35 : AMDGPUTargetStreamer(S), OS(OS) { }
38 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
40 OS << "\t.hsa_code_object_version " <<
41 Twine(Major) << "," << Twine(Minor) << '\n';
45 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
50 OS << "\t.hsa_code_object_isa " <<
51 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
52 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
57 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
58 uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
59 bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
60 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
61 bool EnableSGPRDispatchPtr = (Header.code_properties &
62 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
63 bool EnableSGPRQueuePtr = (Header.code_properties &
64 AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
65 bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
66 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
67 bool EnableSGPRDispatchID = (Header.code_properties &
68 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
69 bool EnableSGPRFlatScratchInit = (Header.code_properties &
70 AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
71 bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
72 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
73 bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
74 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
75 bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
76 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
77 bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
78 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
79 bool EnableOrderedAppendGDS = (Header.code_properties &
80 AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
81 uint32_t PrivateElementSize = (Header.code_properties &
82 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
83 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
84 bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
85 bool IsDynamicCallstack = (Header.code_properties &
86 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
87 bool IsDebugEnabled = (Header.code_properties &
88 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
89 bool IsXNackEnabled = (Header.code_properties &
90 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
92 OS << "\t.amd_kernel_code_t\n" <<
93 "\t\tkernel_code_version_major = " <<
94 Header.amd_kernel_code_version_major << '\n' <<
95 "\t\tkernel_code_version_minor = " <<
96 Header.amd_kernel_code_version_minor << '\n' <<
97 "\t\tmachine_kind = " <<
98 Header.amd_machine_kind << '\n' <<
99 "\t\tmachine_version_major = " <<
100 Header.amd_machine_version_major << '\n' <<
101 "\t\tmachine_version_minor = " <<
102 Header.amd_machine_version_minor << '\n' <<
103 "\t\tmachine_version_stepping = " <<
104 Header.amd_machine_version_stepping << '\n' <<
105 "\t\tkernel_code_entry_byte_offset = " <<
106 Header.kernel_code_entry_byte_offset << '\n' <<
107 "\t\tkernel_code_prefetch_byte_size = " <<
108 Header.kernel_code_prefetch_byte_size << '\n' <<
109 "\t\tmax_scratch_backing_memory_byte_size = " <<
110 Header.max_scratch_backing_memory_byte_size << '\n' <<
111 "\t\tcompute_pgm_rsrc1_vgprs = " <<
112 G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
113 "\t\tcompute_pgm_rsrc1_sgprs = " <<
114 G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
115 "\t\tcompute_pgm_rsrc1_priority = " <<
116 G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
117 "\t\tcompute_pgm_rsrc1_float_mode = " <<
118 G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
119 "\t\tcompute_pgm_rsrc1_priv = " <<
120 G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
121 "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
122 G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
123 "\t\tcompute_pgm_rsrc1_debug_mode = " <<
124 G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
125 "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
126 G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
127 "\t\tcompute_pgm_rsrc2_scratch_en = " <<
128 G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
129 "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
130 G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
131 "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
132 G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
133 "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
134 G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
135 "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
136 G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
137 "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
138 G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
139 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
140 G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
141 "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
142 G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
143 "\t\tcompute_pgm_rsrc2_lds_size = " <<
144 G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
145 "\t\tcompute_pgm_rsrc2_excp_en = " <<
146 G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
148 "\t\tenable_sgpr_private_segment_buffer = " <<
149 EnableSGPRPrivateSegmentBuffer << '\n' <<
150 "\t\tenable_sgpr_dispatch_ptr = " <<
151 EnableSGPRDispatchPtr << '\n' <<
152 "\t\tenable_sgpr_queue_ptr = " <<
153 EnableSGPRQueuePtr << '\n' <<
154 "\t\tenable_sgpr_kernarg_segment_ptr = " <<
155 EnableSGPRKernargSegmentPtr << '\n' <<
156 "\t\tenable_sgpr_dispatch_id = " <<
157 EnableSGPRDispatchID << '\n' <<
158 "\t\tenable_sgpr_flat_scratch_init = " <<
159 EnableSGPRFlatScratchInit << '\n' <<
160 "\t\tenable_sgpr_private_segment_size = " <<
161 EnableSGPRPrivateSegmentSize << '\n' <<
162 "\t\tenable_sgpr_grid_workgroup_count_x = " <<
163 EnableSGPRGridWorkgroupCountX << '\n' <<
164 "\t\tenable_sgpr_grid_workgroup_count_y = " <<
165 EnableSGPRGridWorkgroupCountY << '\n' <<
166 "\t\tenable_sgpr_grid_workgroup_count_z = " <<
167 EnableSGPRGridWorkgroupCountZ << '\n' <<
168 "\t\tenable_ordered_append_gds = " <<
169 EnableOrderedAppendGDS << '\n' <<
170 "\t\tprivate_element_size = " <<
171 PrivateElementSize << '\n' <<
174 "\t\tis_dynamic_callstack = " <<
175 IsDynamicCallstack << '\n' <<
176 "\t\tis_debug_enabled = " <<
177 IsDebugEnabled << '\n' <<
178 "\t\tis_xnack_enabled = " <<
179 IsXNackEnabled << '\n' <<
180 "\t\tworkitem_private_segment_byte_size = " <<
181 Header.workitem_private_segment_byte_size << '\n' <<
182 "\t\tworkgroup_group_segment_byte_size = " <<
183 Header.workgroup_group_segment_byte_size << '\n' <<
184 "\t\tgds_segment_byte_size = " <<
185 Header.gds_segment_byte_size << '\n' <<
186 "\t\tkernarg_segment_byte_size = " <<
187 Header.kernarg_segment_byte_size << '\n' <<
188 "\t\tworkgroup_fbarrier_count = " <<
189 Header.workgroup_fbarrier_count << '\n' <<
190 "\t\twavefront_sgpr_count = " <<
191 Header.wavefront_sgpr_count << '\n' <<
192 "\t\tworkitem_vgpr_count = " <<
193 Header.workitem_vgpr_count << '\n' <<
194 "\t\treserved_vgpr_first = " <<
195 Header.reserved_vgpr_first << '\n' <<
196 "\t\treserved_vgpr_count = " <<
197 Header.reserved_vgpr_count << '\n' <<
198 "\t\treserved_sgpr_first = " <<
199 Header.reserved_sgpr_first << '\n' <<
200 "\t\treserved_sgpr_count = " <<
201 Header.reserved_sgpr_count << '\n' <<
202 "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
203 Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
204 "\t\tdebug_private_segment_buffer_sgpr = " <<
205 Header.debug_private_segment_buffer_sgpr << '\n' <<
206 "\t\tkernarg_segment_alignment = " <<
207 (uint32_t)Header.kernarg_segment_alignment << '\n' <<
208 "\t\tgroup_segment_alignment = " <<
209 (uint32_t)Header.group_segment_alignment << '\n' <<
210 "\t\tprivate_segment_alignment = " <<
211 (uint32_t)Header.private_segment_alignment << '\n' <<
212 "\t\twavefront_size = " <<
213 (uint32_t)Header.wavefront_size << '\n' <<
214 "\t\tcall_convention = " <<
215 Header.call_convention << '\n' <<
216 "\t\truntime_loader_kernel_symbol = " <<
217 Header.runtime_loader_kernel_symbol << '\n' <<
218 // TODO: control_directives
219 "\t.end_amd_kernel_code_t\n";
223 //===----------------------------------------------------------------------===//
224 // AMDGPUTargetELFStreamer
225 //===----------------------------------------------------------------------===//
227 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
228 : AMDGPUTargetStreamer(S), Streamer(S) { }
230 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
231 return static_cast<MCELFStreamer &>(Streamer);
235 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
237 MCStreamer &OS = getStreamer();
238 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
243 OS.SwitchSection(Note);
244 OS.EmitIntValue(NameSZ, 4); // namesz
245 OS.EmitIntValue(8, 4); // descz
246 OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
247 OS.EmitBytes(StringRef("AMD", NameSZ)); // name
248 OS.EmitIntValue(Major, 4); // desc
249 OS.EmitIntValue(Minor, 4);
250 OS.EmitValueToAlignment(4);
255 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
258 StringRef VendorName,
259 StringRef ArchName) {
260 MCStreamer &OS = getStreamer();
261 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
264 uint16_t VendorNameSize = VendorName.size() + 1;
265 uint16_t ArchNameSize = ArchName.size() + 1;
266 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
267 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
268 VendorNameSize + ArchNameSize;
271 OS.SwitchSection(Note);
272 OS.EmitIntValue(NameSZ, 4); // namesz
273 OS.EmitIntValue(DescSZ, 4); // descsz
274 OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type
275 OS.EmitBytes(StringRef("AMD", 4)); // name
276 OS.EmitIntValue(VendorNameSize, 2); // desc
277 OS.EmitIntValue(ArchNameSize, 2);
278 OS.EmitIntValue(Major, 4);
279 OS.EmitIntValue(Minor, 4);
280 OS.EmitIntValue(Stepping, 4);
281 OS.EmitBytes(VendorName);
282 OS.EmitIntValue(0, 1); // NULL terminate VendorName
283 OS.EmitBytes(ArchName);
284 OS.EmitIntValue(0, 1); // NULL terminte ArchName
285 OS.EmitValueToAlignment(4);
290 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
292 MCStreamer &OS = getStreamer();
294 OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
295 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));