rk: ion: resolve build err
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / arm / t6xx / kbase / src / common / mali_kbase_gpuprops.c
1 /*
2  *
3  * (C) COPYRIGHT ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16
17
18
19
20 /**
21  * @file mali_kbase_gpuprops.c
22  * Base kernel property query APIs
23  */
24
25 #include <kbase/src/common/mali_kbase.h>
26 #include <kbase/src/common/mali_midg_regmap.h>
27 #include <kbase/src/common/mali_kbase_gpuprops.h>
28
29 /**
30  * @brief Extracts bits from a 32-bit bitfield.
31  * @hideinitializer
32  *
33  * @param[in]    value       The value from which to extract bits.
34  * @param[in]    offset      The first bit to extract (0 being the LSB).
35  * @param[in]    size        The number of bits to extract.
36  * @return                   Bits [@a offset, @a offset + @a size) from @a value.
37  *
38  * @pre offset + size <= 32.
39  */
40 /* from mali_cdsb.h */
41 #define KBASE_UBFX32(value, offset, size) \
42         (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
43
44 mali_error kbase_gpuprops_uk_get_props(kbase_context *kctx, kbase_uk_gpuprops * const kbase_props)
45 {
46         kbase_gpuprops_clock_speed_function get_gpu_speed_mhz;
47         u32 gpu_speed_mhz;
48         int rc = 1;
49
50         KBASE_DEBUG_ASSERT(NULL != kctx);
51         KBASE_DEBUG_ASSERT(NULL != kbase_props);
52
53         /* Current GPU speed is requested from the system integrator via the KBASE_CONFIG_ATTR_GPU_SPEED_FUNC function.
54          * If that function fails, or the function is not provided by the system integrator, we report the maximum
55          * GPU speed as specified by KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX.
56          */
57         get_gpu_speed_mhz = (kbase_gpuprops_clock_speed_function) kbasep_get_config_value(kctx->kbdev, kctx->kbdev->config_attributes, KBASE_CONFIG_ATTR_GPU_SPEED_FUNC);
58         if (get_gpu_speed_mhz != NULL) {
59                 rc = get_gpu_speed_mhz(&gpu_speed_mhz);
60 #ifdef CONFIG_MALI_DEBUG
61                 /* Issue a warning message when the reported GPU speed falls outside the min/max range */
62                 if (rc == 0) {
63                         u32 gpu_speed_khz = gpu_speed_mhz * 1000;
64                         if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
65                                 KBASE_DEBUG_PRINT_WARN(KBASE_CORE, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", (unsigned long)gpu_speed_khz, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
66                 }
67 #endif                          /* CONFIG_MALI_DEBUG */
68         }
69         if (rc != 0)
70                 gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
71
72         kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
73
74         memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
75
76         return MALI_ERROR_NONE;
77 }
78
79 STATIC void kbase_gpuprops_dump_registers(kbase_device *kbdev, kbase_gpuprops_regdump *regdump)
80 {
81         int i;
82
83         KBASE_DEBUG_ASSERT(NULL != kbdev);
84         KBASE_DEBUG_ASSERT(NULL != regdump);
85
86         /* Fill regdump with the content of the relevant registers */
87         regdump->gpu_id = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
88
89         regdump->l2_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES));
90         regdump->l3_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_FEATURES));
91         regdump->tiler_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES));
92         regdump->mem_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MEM_FEATURES));
93         regdump->mmu_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MMU_FEATURES));
94         regdump->as_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT));
95         regdump->js_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT));
96
97         for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
98                 regdump->js_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i)));
99
100         for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
101                 regdump->texture_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
102
103         regdump->thread_max_threads = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_THREADS));
104         regdump->thread_max_workgroup_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
105         regdump->thread_max_barrier_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
106         regdump->thread_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_FEATURES));
107
108         regdump->shader_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO));
109         regdump->shader_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_HI));
110
111         regdump->tiler_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_LO));
112         regdump->tiler_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_HI));
113
114         regdump->l2_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_LO));
115         regdump->l2_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_HI));
116
117         regdump->l3_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_LO));
118         regdump->l3_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_HI));
119 }
120
121 STATIC void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
122 {
123         struct mali_base_gpu_coherent_group *current_group;
124         u64 group_present;
125         u64 group_mask;
126         u64 first_set, first_set_prev;
127         u32 num_groups = 0;
128
129         KBASE_DEBUG_ASSERT(NULL != props);
130
131         props->coherency_info.coherency = props->raw_props.mem_features;
132         props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
133
134         if (props->coherency_info.coherency & GROUPS_L3_COHERENT) {
135                 /* Group is l3 coherent */
136                 group_present = props->raw_props.l3_present;
137         } else if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
138                 /* Group is l2 coherent */
139                 group_present = props->raw_props.l2_present;
140         } else {
141                 /* Group is l1 coherent */
142                 group_present = props->raw_props.shader_present;
143         }
144
145         /*
146          * The coherent group mask can be computed from the l2/l3 present
147          * register.
148          *
149          * For the coherent group n:
150          * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
151          * where first_set is group_present with only its nth set-bit kept
152          * (i.e. the position from where a new group starts).
153          *
154          * For instance if the groups are l2 coherent and l2_present=0x0..01111:
155          * The first mask is:
156          * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
157          *               = (0x0..010     - 1) & ~(0x0..01      - 1)
158          *               =  0x0..00f
159          * The second mask is:
160          * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
161          *               = (0x0..100     - 1) & ~(0x0..010     - 1)
162          *               =  0x0..0f0
163          * And so on until all the bits from group_present have been cleared
164          * (i.e. there is no group left).
165          */
166
167         current_group = props->coherency_info.group;
168         first_set = group_present & ~(group_present - 1);
169
170         while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
171                 group_present -= first_set;     /* Clear the current group bit */
172                 first_set_prev = first_set;
173
174                 first_set = group_present & ~(group_present - 1);
175                 group_mask = (first_set - 1) & ~(first_set_prev - 1);
176
177                 /* Populate the coherent_group structure for each group */
178                 current_group->core_mask = group_mask & props->raw_props.shader_present;
179                 current_group->num_cores = hweight64(current_group->core_mask);
180
181                 num_groups++;
182                 current_group++;
183         }
184
185         if (group_present != 0)
186                 KBASE_DEBUG_PRINT_WARN(KBASE_CORE, "Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
187
188         props->coherency_info.num_groups = num_groups;
189 }
190
191 /**
192  * @brief Get the GPU configuration
193  *
194  * Fill the base_gpu_props structure with values from the GPU configuration registers.
195  * Only the raw properties are filled in this function
196  *
197  * @param gpu_props  The base_gpu_props structure
198  * @param kbdev      The kbase_device structure for the device
199  */
200 static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, kbase_device *kbdev)
201 {
202         kbase_gpuprops_regdump regdump;
203         int i;
204
205         KBASE_DEBUG_ASSERT(NULL != kbdev);
206         KBASE_DEBUG_ASSERT(NULL != gpu_props);
207
208         /* Dump relevant registers */
209         kbase_gpuprops_dump_registers(kbdev, &regdump);
210
211         gpu_props->raw_props.gpu_id = regdump.gpu_id;
212         gpu_props->raw_props.tiler_features = regdump.tiler_features;
213         gpu_props->raw_props.mem_features = regdump.mem_features;
214         gpu_props->raw_props.mmu_features = regdump.mmu_features;
215         gpu_props->raw_props.l2_features = regdump.l2_features;
216         gpu_props->raw_props.l3_features = regdump.l3_features;
217
218         gpu_props->raw_props.as_present = regdump.as_present;
219         gpu_props->raw_props.js_present = regdump.js_present;
220         gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
221         gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
222         gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
223         gpu_props->raw_props.l3_present = ((u64) regdump.l3_present_hi << 32) + regdump.l3_present_lo;
224
225         for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
226                 gpu_props->raw_props.js_features[i] = regdump.js_features[i];
227
228         for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
229                 gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
230
231         gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
232         gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
233         gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
234         gpu_props->raw_props.thread_features = regdump.thread_features;
235 }
236
237 /**
238  * @brief Calculate the derived properties
239  *
240  * Fill the base_gpu_props structure with values derived from the GPU configuration registers
241  *
242  * @param gpu_props  The base_gpu_props structure
243  * @param kbdev      The kbase_device structure for the device
244  */
245 static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, kbase_device *kbdev)
246 {
247         int i;
248
249         /* Populate the base_gpu_props structure */
250         gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
251         gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
252         gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
253         gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
254         gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
255         gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
256
257         for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
258                 gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
259
260         gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
261         gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
262         gpu_props->l2_props.num_l2_slices = 1;
263         if (gpu_props->core_props.product_id == GPU_ID_PI_T76X) {
264                 gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
265         }
266
267         gpu_props->l3_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 0U, 8);
268         gpu_props->l3_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 16U, 8);
269
270         gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
271         gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
272
273         if (gpu_props->raw_props.thread_max_threads == 0)
274                 gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
275         else
276                 gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
277
278         if (gpu_props->raw_props.thread_max_workgroup_size == 0)
279                 gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
280         else
281                 gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
282
283         if (gpu_props->raw_props.thread_max_barrier_size == 0)
284                 gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
285         else
286                 gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
287
288         gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
289         gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
290         gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
291         gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
292
293         /* If values are not specified, then use defaults */
294         if (gpu_props->thread_props.max_registers == 0) {
295                 gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
296                 gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
297                 gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
298         }
299         /* Initialize the coherent_group structure for each group */
300         kbase_gpuprops_construct_coherent_groups(gpu_props);
301 }
302
303 void kbase_gpuprops_set(kbase_device *kbdev)
304 {
305         kbase_gpu_props *gpu_props;
306         struct midg_raw_gpu_props *raw;
307
308         KBASE_DEBUG_ASSERT(NULL != kbdev);
309         gpu_props = &kbdev->gpu_props;
310         raw = &gpu_props->props.raw_props;
311
312         /* Initialize the base_gpu_props structure from the hardware */
313         kbase_gpuprops_get_props(&gpu_props->props, kbdev);
314
315         /* Populate the derived properties */
316         kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
317
318         /* Populate kbase-only fields */
319         gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
320         gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
321
322         gpu_props->l3_props.associativity = KBASE_UBFX32(raw->l3_features, 8U, 8);
323         gpu_props->l3_props.external_bus_width = KBASE_UBFX32(raw->l3_features, 24U, 8);
324
325         gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
326         gpu_props->mem.supergroup = KBASE_UBFX32(raw->mem_features, 1U, 1);
327
328         gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
329         gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
330
331         gpu_props->num_cores = hweight64(raw->shader_present);
332         gpu_props->num_core_groups = hweight64(raw->l2_present);
333         gpu_props->num_supergroups = hweight64(raw->l3_present);
334         gpu_props->num_address_spaces = hweight32(raw->as_present);
335         gpu_props->num_job_slots = hweight32(raw->js_present);
336 }