63b94174224963bc8ea85f96651a2fb188276ba0
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / arm / mali400 / mali / common / mali_kernel_utilization.c
1 /*
2  * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
3  * 
4  * This program is free software and is provided to you under the terms of the GNU General Public License version 2
5  * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
6  * 
7  * A copy of the licence is included with the program, and can also be obtained from Free Software
8  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
9  */
10
11 #include "mali_kernel_utilization.h"
12 #include "mali_osk.h"
13 #include "mali_osk_mali.h"
14 #include "mali_kernel_common.h"
15 #include "mali_session.h"
16 #include "mali_scheduler.h"
17
18 #include "mali_executor.h"
19 #include "mali_dvfs_policy.h"
20 #include "mali_control_timer.h"
21
22 /* Thresholds for GP bound detection. */
23 #define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
24 #define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
25
26 static _mali_osk_spinlock_irq_t *utilization_data_lock;
27
28 static u32 num_running_gp_cores = 0;
29 static u32 num_running_pp_cores = 0;
30
31 static u64 work_start_time_gpu = 0;
32 static u64 work_start_time_gp = 0;
33 static u64 work_start_time_pp = 0;
34 static u64 accumulated_work_time_gpu = 0;
35 static u64 accumulated_work_time_gp = 0;
36 static u64 accumulated_work_time_pp = 0;
37
38 static u32 last_utilization_gpu = 0 ;
39 static u32 last_utilization_gp = 0 ;
40 static u32 last_utilization_pp = 0 ;
41
42 void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
43
44 /* Define the first timer control timer timeout in milliseconds */
45 static u32 mali_control_first_timeout = 100;
46 static struct mali_gpu_utilization_data mali_util_data = {0, };
47
48 struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
49 {
50         u64 time_now;
51         u32 leading_zeroes;
52         u32 shift_val;
53         u32 work_normalized_gpu;
54         u32 work_normalized_gp;
55         u32 work_normalized_pp;
56         u32 period_normalized;
57         u32 utilization_gpu;
58         u32 utilization_gp;
59         u32 utilization_pp;
60
61         mali_utilization_data_lock();
62
63         time_now = _mali_osk_time_get_ns();
64
65         *time_period = time_now - *start_time;
66
67         if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
68                 mali_control_timer_pause();
69                 /*
70                  * No work done for this period
71                  * - No need to reschedule timer
72                  * - Report zero usage
73                  */
74                 last_utilization_gpu = 0;
75                 last_utilization_gp = 0;
76                 last_utilization_pp = 0;
77
78                 mali_util_data.utilization_gpu = last_utilization_gpu;
79                 mali_util_data.utilization_gp = last_utilization_gp;
80                 mali_util_data.utilization_pp = last_utilization_pp;
81
82                 mali_utilization_data_unlock();
83
84                 *need_add_timer = MALI_FALSE;
85
86                 mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
87
88                 MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
89                 MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
90                 MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
91
92                 return &mali_util_data;
93         }
94
95         /* If we are currently busy, update working period up to now */
96         if (work_start_time_gpu != 0) {
97                 accumulated_work_time_gpu += (time_now - work_start_time_gpu);
98                 work_start_time_gpu = time_now;
99
100                 /* GP and/or PP will also be busy if the GPU is busy at this point */
101
102                 if (work_start_time_gp != 0) {
103                         accumulated_work_time_gp += (time_now - work_start_time_gp);
104                         work_start_time_gp = time_now;
105                 }
106
107                 if (work_start_time_pp != 0) {
108                         accumulated_work_time_pp += (time_now - work_start_time_pp);
109                         work_start_time_pp = time_now;
110                 }
111         }
112
113         /*
114          * We have two 64-bit values, a dividend and a divisor.
115          * To avoid dependencies to a 64-bit divider, we shift down the two values
116          * equally first.
117          * We shift the dividend up and possibly the divisor down, making the result X in 256.
118          */
119
120         /* Shift the 64-bit values down so they fit inside a 32-bit integer */
121         leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
122         shift_val = 32 - leading_zeroes;
123         work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
124         work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
125         work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
126         period_normalized = (u32)(*time_period >> shift_val);
127
128         /*
129          * Now, we should report the usage in parts of 256
130          * this means we must shift up the dividend or down the divisor by 8
131          * (we could do a combination, but we just use one for simplicity,
132          * but the end result should be good enough anyway)
133          */
134         if (period_normalized > 0x00FFFFFF) {
135                 /* The divisor is so big that it is safe to shift it down */
136                 period_normalized >>= 8;
137         } else {
138                 /*
139                  * The divisor is so small that we can shift up the dividend, without loosing any data.
140                  * (dividend is always smaller than the divisor)
141                  */
142                 work_normalized_gpu <<= 8;
143                 work_normalized_gp <<= 8;
144                 work_normalized_pp <<= 8;
145         }
146
147         utilization_gpu = work_normalized_gpu / period_normalized;
148         utilization_gp = work_normalized_gp / period_normalized;
149         utilization_pp = work_normalized_pp / period_normalized;
150
151         last_utilization_gpu = utilization_gpu;
152         last_utilization_gp = utilization_gp;
153         last_utilization_pp = utilization_pp;
154
155         if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
156             (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
157                 mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
158         } else {
159                 mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
160         }
161
162         /* starting a new period */
163         accumulated_work_time_gpu = 0;
164         accumulated_work_time_gp = 0;
165         accumulated_work_time_pp = 0;
166
167         *start_time = time_now;
168
169         mali_util_data.utilization_gp = last_utilization_gp;
170         mali_util_data.utilization_gpu = last_utilization_gpu;
171         mali_util_data.utilization_pp = last_utilization_pp;
172
173         mali_utilization_data_unlock();
174
175         *need_add_timer = MALI_TRUE;
176
177         MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
178         MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
179         MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
180
181         return &mali_util_data;
182 }
183
184 _mali_osk_errcode_t mali_utilization_init(void)
185 {
186 #if USING_GPU_UTILIZATION
187         _mali_osk_device_data data;
188
189         if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
190                 if (NULL != data.utilization_callback) {
191                         mali_utilization_callback = data.utilization_callback;
192                         MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
193                 }
194         }
195 #endif /* defined(USING_GPU_UTILIZATION) */
196
197         if (NULL == mali_utilization_callback) {
198                 MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
199         }
200
201         utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
202         if (NULL == utilization_data_lock) {
203                 return _MALI_OSK_ERR_FAULT;
204         }
205
206         num_running_gp_cores = 0;
207         num_running_pp_cores = 0;
208
209         return _MALI_OSK_ERR_OK;
210 }
211
212 void mali_utilization_term(void)
213 {
214         if (NULL != utilization_data_lock) {
215                 _mali_osk_spinlock_irq_term(utilization_data_lock);
216         }
217 }
218
219 void mali_utilization_gp_start(void)
220 {
221         mali_utilization_data_lock();
222
223         ++num_running_gp_cores;
224         if (1 == num_running_gp_cores) {
225                 u64 time_now = _mali_osk_time_get_ns();
226
227                 /* First GP core started, consider GP busy from now and onwards */
228                 work_start_time_gp = time_now;
229
230                 if (0 == num_running_pp_cores) {
231                         mali_bool is_resume = MALI_FALSE;
232                         /*
233                          * There are no PP cores running, so this is also the point
234                          * at which we consider the GPU to be busy as well.
235                          */
236                         work_start_time_gpu = time_now;
237
238                         is_resume  = mali_control_timer_resume(time_now);
239
240                         mali_utilization_data_unlock();
241
242                         if (is_resume) {
243                                 /* Do some policy in new period for performance consideration */
244 #if defined(CONFIG_MALI_DVFS)
245                                 /* Clear session->number_of_window_jobs, prepare parameter for dvfs */
246                                 mali_session_max_window_num();
247                                 if (0 == last_utilization_gpu) {
248                                         /*
249                                          * for mali_dev_pause is called in set clock,
250                                          * so each time we change clock, we will set clock to
251                                          * highest step even if under down clock case,
252                                          * it is not nessesary, so we only set the clock under
253                                          * last time utilization equal 0, we stop the timer then
254                                          * start the GPU again case
255                                          */
256                                         mali_dvfs_policy_new_period();
257                                 }
258 #endif
259                                 /*
260                                  * First timeout using short interval for power consideration
261                                  * because we give full power in the new period, but if the
262                                  * job loading is light, finish in 10ms, the other time all keep
263                                  * in high freq it will wast time.
264                                  */
265                                 mali_control_timer_add(mali_control_first_timeout);
266                         }
267                 } else {
268                         mali_utilization_data_unlock();
269                 }
270
271         } else {
272                 /* Nothing to do */
273                 mali_utilization_data_unlock();
274         }
275 }
276
277 void mali_utilization_pp_start(void)
278 {
279         mali_utilization_data_lock();
280
281         ++num_running_pp_cores;
282         if (1 == num_running_pp_cores) {
283                 u64 time_now = _mali_osk_time_get_ns();
284
285                 /* First PP core started, consider PP busy from now and onwards */
286                 work_start_time_pp = time_now;
287
288                 if (0 == num_running_gp_cores) {
289                         mali_bool is_resume = MALI_FALSE;
290                         /*
291                          * There are no GP cores running, so this is also the point
292                          * at which we consider the GPU to be busy as well.
293                          */
294                         work_start_time_gpu = time_now;
295
296                         /* Start a new period if stoped */
297                         is_resume = mali_control_timer_resume(time_now);
298
299                         mali_utilization_data_unlock();
300
301                         if (is_resume) {
302 #if defined(CONFIG_MALI_DVFS)
303                                 /* Clear session->number_of_window_jobs, prepare parameter for dvfs */
304                                 mali_session_max_window_num();
305                                 if (0 == last_utilization_gpu) {
306                                         /*
307                                          * for mali_dev_pause is called in set clock,
308                                          * so each time we change clock, we will set clock to
309                                          * highest step even if under down clock case,
310                                          * it is not nessesary, so we only set the clock under
311                                          * last time utilization equal 0, we stop the timer then
312                                          * start the GPU again case
313                                          */
314                                         mali_dvfs_policy_new_period();
315                                 }
316 #endif
317
318                                 /*
319                                  * First timeout using short interval for power consideration
320                                  * because we give full power in the new period, but if the
321                                  * job loading is light, finish in 10ms, the other time all keep
322                                  * in high freq it will wast time.
323                                  */
324                                 mali_control_timer_add(mali_control_first_timeout);
325                         }
326                 } else {
327                         mali_utilization_data_unlock();
328                 }
329         } else {
330                 /* Nothing to do */
331                 mali_utilization_data_unlock();
332         }
333 }
334
335 void mali_utilization_gp_end(void)
336 {
337         mali_utilization_data_lock();
338
339         --num_running_gp_cores;
340         if (0 == num_running_gp_cores) {
341                 u64 time_now = _mali_osk_time_get_ns();
342
343                 /* Last GP core ended, consider GP idle from now and onwards */
344                 accumulated_work_time_gp += (time_now - work_start_time_gp);
345                 work_start_time_gp = 0;
346
347                 if (0 == num_running_pp_cores) {
348                         /*
349                          * There are no PP cores running, so this is also the point
350                          * at which we consider the GPU to be idle as well.
351                          */
352                         accumulated_work_time_gpu += (time_now - work_start_time_gpu);
353                         work_start_time_gpu = 0;
354                 }
355         }
356
357         mali_utilization_data_unlock();
358 }
359
360 void mali_utilization_pp_end(void)
361 {
362         mali_utilization_data_lock();
363
364         --num_running_pp_cores;
365         if (0 == num_running_pp_cores) {
366                 u64 time_now = _mali_osk_time_get_ns();
367
368                 /* Last PP core ended, consider PP idle from now and onwards */
369                 accumulated_work_time_pp += (time_now - work_start_time_pp);
370                 work_start_time_pp = 0;
371
372                 if (0 == num_running_gp_cores) {
373                         /*
374                          * There are no GP cores running, so this is also the point
375                          * at which we consider the GPU to be idle as well.
376                          */
377                         accumulated_work_time_gpu += (time_now - work_start_time_gpu);
378                         work_start_time_gpu = 0;
379                 }
380         }
381
382         mali_utilization_data_unlock();
383 }
384
385 mali_bool mali_utilization_enabled(void)
386 {
387 #if defined(CONFIG_MALI_DVFS)
388         return mali_dvfs_policy_enabled();
389 #else
390         return (NULL != mali_utilization_callback);
391 #endif /* defined(CONFIG_MALI_DVFS) */
392 }
393
394 void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
395 {
396         MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
397
398         mali_utilization_callback(util_data);
399 }
400
401 void mali_utilization_reset(void)
402 {
403         accumulated_work_time_gpu = 0;
404         accumulated_work_time_gp = 0;
405         accumulated_work_time_pp = 0;
406
407         last_utilization_gpu = 0;
408         last_utilization_gp = 0;
409         last_utilization_pp = 0;
410 }
411
412 void mali_utilization_data_lock(void)
413 {
414         _mali_osk_spinlock_irq_lock(utilization_data_lock);
415 }
416
417 void mali_utilization_data_unlock(void)
418 {
419         _mali_osk_spinlock_irq_unlock(utilization_data_lock);
420 }
421
422 void mali_utilization_data_assert_locked(void)
423 {
424         MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
425 }
426
427 u32 _mali_ukk_utilization_gp_pp(void)
428 {
429         return last_utilization_gpu;
430 }
431
432 u32 _mali_ukk_utilization_gp(void)
433 {
434         return last_utilization_gp;
435 }
436
437 u32 _mali_ukk_utilization_pp(void)
438 {
439         return last_utilization_pp;
440 }