MALI: rockchip: upgrade midgard DDK to r14p0-01rel0
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / arm / midgard / backend / gpu / mali_kbase_instr_backend.c
1 /*
2  *
3  * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16
17
18
19
20 /*
21  * GPU backend instrumentation APIs.
22  */
23
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
30
31 /**
32  * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
33  * hardware
34  *
35  * @kbdev: Kbase device
36  */
37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
38 {
39         unsigned long flags;
40         unsigned long pm_flags;
41         u32 irq_mask;
42
43         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45                                         KBASE_INSTR_STATE_REQUEST_CLEAN);
46
47         /* Enable interrupt */
48         spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
49         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51                                 irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52         spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
53
54         /* clean&invalidate the caches so we're sure the mmu tables for the dump
55          * buffer is valid */
56         KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58                                         GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
60
61         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
62 }
63
64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65                                         struct kbase_context *kctx,
66                                         struct kbase_uk_hwcnt_setup *setup)
67 {
68         unsigned long flags, pm_flags;
69         int err = -EINVAL;
70         u32 irq_mask;
71         int ret;
72         u64 shader_cores_needed;
73         u32 prfcnt_config;
74
75         shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76                                                         KBASE_PM_CORE_SHADER);
77
78         /* alignment failure */
79         if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
80                 goto out_err;
81
82         /* Override core availability policy to ensure all cores are available
83          */
84         kbase_pm_ca_instr_enable(kbdev);
85
86         /* Request the cores early on synchronously - we'll release them on any
87          * errors (e.g. instrumentation already active) */
88         kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
89
90         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
91
92         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93                 /* Instrumentation is already enabled */
94                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95                 goto out_unrequest_cores;
96         }
97
98         /* Enable interrupt */
99         spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
100         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102                                                 PRFCNT_SAMPLE_COMPLETED, NULL);
103         spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
104
105         /* In use, this context is the owner */
106         kbdev->hwcnt.kctx = kctx;
107         /* Remember the dump address so we can reprogram it later */
108         kbdev->hwcnt.addr = setup->dump_buffer;
109
110         /* Request the clean */
111         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112         kbdev->hwcnt.backend.triggered = 0;
113         /* Clean&invalidate the caches so we're sure the mmu tables for the dump
114          * buffer is valid */
115         ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116                                         &kbdev->hwcnt.backend.cache_clean_work);
117         KBASE_DEBUG_ASSERT(ret);
118
119         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
120
121         /* Wait for cacheclean to complete */
122         wait_event(kbdev->hwcnt.backend.wait,
123                                         kbdev->hwcnt.backend.triggered != 0);
124
125         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126                                                         KBASE_INSTR_STATE_IDLE);
127
128         kbase_pm_request_l2_caches(kbdev);
129
130         /* Configure */
131         prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
133         {
134                 u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135                 u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136                         >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137                 int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
138
139                 if (arch_v6)
140                         prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
141         }
142 #endif
143
144         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145                         prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
146
147         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148                                         setup->dump_buffer & 0xFFFFFFFF, kctx);
149         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150                                         setup->dump_buffer >> 32,        kctx);
151         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
152                                         setup->jm_bm,                    kctx);
153         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154                                         setup->shader_bm,                kctx);
155         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156                                         setup->mmu_l2_bm,                kctx);
157         /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158          * HW counter dump. */
159         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
161                                                                         kctx);
162         else
163                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164                                                         setup->tiler_bm, kctx);
165
166         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167                         prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
168
169         /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
170          */
171         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173                                                         setup->tiler_bm, kctx);
174
175         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
176
177         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178         kbdev->hwcnt.backend.triggered = 1;
179         wake_up(&kbdev->hwcnt.backend.wait);
180
181         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
182
183         err = 0;
184
185         dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
186         return err;
187  out_unrequest_cores:
188         spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
189         kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
190         spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
191  out_err:
192         return err;
193 }
194
195 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
196 {
197         unsigned long flags, pm_flags;
198         int err = -EINVAL;
199         u32 irq_mask;
200         struct kbase_device *kbdev = kctx->kbdev;
201
202         while (1) {
203                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
204
205                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
206                         /* Instrumentation is not enabled */
207                         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
208                         goto out;
209                 }
210
211                 if (kbdev->hwcnt.kctx != kctx) {
212                         /* Instrumentation has been setup for another context */
213                         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
214                         goto out;
215                 }
216
217                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
218                         break;
219
220                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
221
222                 /* Ongoing dump/setup - wait for its completion */
223                 wait_event(kbdev->hwcnt.backend.wait,
224                                         kbdev->hwcnt.backend.triggered != 0);
225         }
226
227         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
228         kbdev->hwcnt.backend.triggered = 0;
229
230         /* Disable interrupt */
231         spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
232         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
233         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
234                                 irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
235
236         /* Disable the counters */
237         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
238
239         kbdev->hwcnt.kctx = NULL;
240         kbdev->hwcnt.addr = 0ULL;
241
242         kbase_pm_ca_instr_disable(kbdev);
243
244         kbase_pm_unrequest_cores(kbdev, true,
245                 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
246
247         kbase_pm_release_l2_caches(kbdev);
248
249         spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
250         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
251
252         dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
253                                                                         kctx);
254
255         err = 0;
256
257  out:
258         return err;
259 }
260
261 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
262 {
263         unsigned long flags;
264         int err = -EINVAL;
265         struct kbase_device *kbdev = kctx->kbdev;
266
267         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
268
269         if (kbdev->hwcnt.kctx != kctx) {
270                 /* The instrumentation has been setup for another context */
271                 goto unlock;
272         }
273
274         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
275                 /* HW counters are disabled or another dump is ongoing, or we're
276                  * resetting */
277                 goto unlock;
278         }
279
280         kbdev->hwcnt.backend.triggered = 0;
281
282         /* Mark that we're dumping - the PF handler can signal that we faulted
283          */
284         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
285
286         /* Reconfigure the dump address */
287         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
288                                         kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
289         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
290                                         kbdev->hwcnt.addr >> 32, NULL);
291
292         /* Start dumping */
293         KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
294                                         kbdev->hwcnt.addr, 0);
295         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
296                                         GPU_COMMAND_PRFCNT_SAMPLE, kctx);
297
298         dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
299
300         err = 0;
301
302  unlock:
303         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
304         return err;
305 }
306 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
307
308 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
309                                                 bool * const success)
310 {
311         unsigned long flags;
312         bool complete = false;
313         struct kbase_device *kbdev = kctx->kbdev;
314
315         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
316
317         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
318                 *success = true;
319                 complete = true;
320         } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
321                 *success = false;
322                 complete = true;
323                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
324         }
325
326         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
327
328         return complete;
329 }
330 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
331
332 void kbasep_cache_clean_worker(struct work_struct *data)
333 {
334         struct kbase_device *kbdev;
335         unsigned long flags;
336
337         kbdev = container_of(data, struct kbase_device,
338                                                 hwcnt.backend.cache_clean_work);
339
340         mutex_lock(&kbdev->cacheclean_lock);
341         kbasep_instr_hwcnt_cacheclean(kbdev);
342
343         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
344         /* Wait for our condition, and any reset to complete */
345         while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
346                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
347                 wait_event(kbdev->hwcnt.backend.cache_clean_wait,
348                                 kbdev->hwcnt.backend.state !=
349                                                 KBASE_INSTR_STATE_CLEANING);
350                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
351         }
352         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
353                                                 KBASE_INSTR_STATE_CLEANED);
354
355         /* All finished and idle */
356         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
357         kbdev->hwcnt.backend.triggered = 1;
358         wake_up(&kbdev->hwcnt.backend.wait);
359
360         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
361         mutex_unlock(&kbdev->cacheclean_lock);
362 }
363
364 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
365 {
366         unsigned long flags;
367
368         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
369
370         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
371                 kbdev->hwcnt.backend.triggered = 1;
372                 wake_up(&kbdev->hwcnt.backend.wait);
373         } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
374                 int ret;
375                 /* Always clean and invalidate the cache after a successful dump
376                  */
377                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
378                 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
379                                         &kbdev->hwcnt.backend.cache_clean_work);
380                 KBASE_DEBUG_ASSERT(ret);
381         }
382
383         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
384 }
385
386 void kbase_clean_caches_done(struct kbase_device *kbdev)
387 {
388         u32 irq_mask;
389
390         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
391                 unsigned long flags;
392                 unsigned long pm_flags;
393
394                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
395                 /* Disable interrupt */
396                 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
397                 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
398                                                                         NULL);
399                 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
400                                 irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
401                 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
402
403                 /* Wakeup... */
404                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
405                         /* Only wake if we weren't resetting */
406                         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
407                         wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
408                 }
409
410                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
411         }
412 }
413
414 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
415 {
416         struct kbase_device *kbdev = kctx->kbdev;
417         unsigned long flags;
418         int err;
419
420         /* Wait for dump & cacheclean to complete */
421         wait_event(kbdev->hwcnt.backend.wait,
422                                         kbdev->hwcnt.backend.triggered != 0);
423
424         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
425
426         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
427                 err = -EINVAL;
428                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
429         } else {
430                 /* Dump done */
431                 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
432                                                         KBASE_INSTR_STATE_IDLE);
433                 err = 0;
434         }
435
436         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
437
438         return err;
439 }
440
441 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
442 {
443         unsigned long flags;
444         int err = -EINVAL;
445         struct kbase_device *kbdev = kctx->kbdev;
446
447         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
448
449         /* Check it's the context previously set up and we're not already
450          * dumping */
451         if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
452                                                         KBASE_INSTR_STATE_IDLE)
453                 goto out;
454
455         /* Clear the counters */
456         KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
457         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
458                                                 GPU_COMMAND_PRFCNT_CLEAR, kctx);
459
460         err = 0;
461
462 out:
463         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
464         return err;
465 }
466 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
467
468 int kbase_instr_backend_init(struct kbase_device *kbdev)
469 {
470         int ret = 0;
471
472         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
473
474         init_waitqueue_head(&kbdev->hwcnt.backend.wait);
475         init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
476         INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
477                                                 kbasep_cache_clean_worker);
478         kbdev->hwcnt.backend.triggered = 0;
479
480         kbdev->hwcnt.backend.cache_clean_wq =
481                         alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
482         if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
483                 ret = -EINVAL;
484
485         return ret;
486 }
487
488 void kbase_instr_backend_term(struct kbase_device *kbdev)
489 {
490         destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
491 }
492