Revert "MALI: rockchip: upgrade midgard DDK to r14p0-01rel0"
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / arm / midgard / backend / gpu / mali_kbase_instr_backend.c
1 /*
2  *
3  * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16
17
18
19
20 /*
21  * GPU backend instrumentation APIs.
22  */
23
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
30
31 /**
32  * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
33  * hardware
34  *
35  * @kbdev: Kbase device
36  */
37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
38 {
39         unsigned long flags;
40         unsigned long pm_flags;
41         u32 irq_mask;
42
43         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45                                         KBASE_INSTR_STATE_REQUEST_CLEAN);
46
47         /* Enable interrupt */
48         spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
49         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51                                 irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52         spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
53
54         /* clean&invalidate the caches so we're sure the mmu tables for the dump
55          * buffer is valid */
56         KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58                                         GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
60
61         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
62 }
63
64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65                                         struct kbase_context *kctx,
66                                         struct kbase_uk_hwcnt_setup *setup)
67 {
68         unsigned long flags, pm_flags;
69         int err = -EINVAL;
70         u32 irq_mask;
71         int ret;
72         u64 shader_cores_needed;
73         u32 prfcnt_config;
74
75         shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76                                                         KBASE_PM_CORE_SHADER);
77
78         /* alignment failure */
79         if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
80                 goto out_err;
81
82         /* Override core availability policy to ensure all cores are available
83          */
84         kbase_pm_ca_instr_enable(kbdev);
85
86         /* Request the cores early on synchronously - we'll release them on any
87          * errors (e.g. instrumentation already active) */
88         kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
89
90         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
91
92         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93                 /* Instrumentation is already enabled */
94                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95                 goto out_unrequest_cores;
96         }
97
98         /* Enable interrupt */
99         spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
100         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102                                                 PRFCNT_SAMPLE_COMPLETED, NULL);
103         spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
104
105         /* In use, this context is the owner */
106         kbdev->hwcnt.kctx = kctx;
107         /* Remember the dump address so we can reprogram it later */
108         kbdev->hwcnt.addr = setup->dump_buffer;
109
110         /* Request the clean */
111         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112         kbdev->hwcnt.backend.triggered = 0;
113         /* Clean&invalidate the caches so we're sure the mmu tables for the dump
114          * buffer is valid */
115         ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116                                         &kbdev->hwcnt.backend.cache_clean_work);
117         KBASE_DEBUG_ASSERT(ret);
118
119         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
120
121         /* Wait for cacheclean to complete */
122         wait_event(kbdev->hwcnt.backend.wait,
123                                         kbdev->hwcnt.backend.triggered != 0);
124
125         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126                                                         KBASE_INSTR_STATE_IDLE);
127
128         kbase_pm_request_l2_caches(kbdev);
129
130         /* Configure */
131         prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
133         {
134                 u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135                 u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136                         >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137                 int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
138
139                 if (arch_v6)
140                         prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
141         }
142 #endif
143
144         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145                         prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
146
147         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148                                         setup->dump_buffer & 0xFFFFFFFF, kctx);
149         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150                                         setup->dump_buffer >> 32,        kctx);
151         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
152                                         setup->jm_bm,                    kctx);
153         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154                                         setup->shader_bm,                kctx);
155         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156                                         setup->mmu_l2_bm,                kctx);
157         /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158          * HW counter dump. */
159         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
161                                                                         kctx);
162         else
163                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164                                                         setup->tiler_bm, kctx);
165
166         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167                         prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
168
169         /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
170          */
171         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172                 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173                                                         setup->tiler_bm, kctx);
174
175         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
176
177         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178         kbdev->hwcnt.backend.triggered = 1;
179         wake_up(&kbdev->hwcnt.backend.wait);
180
181         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
182
183         err = 0;
184
185         dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
186         return err;
187  out_unrequest_cores:
188         kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
189  out_err:
190         return err;
191 }
192
193 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
194 {
195         unsigned long flags, pm_flags;
196         int err = -EINVAL;
197         u32 irq_mask;
198         struct kbase_device *kbdev = kctx->kbdev;
199
200         while (1) {
201                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
202
203                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
204                         /* Instrumentation is not enabled */
205                         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
206                         goto out;
207                 }
208
209                 if (kbdev->hwcnt.kctx != kctx) {
210                         /* Instrumentation has been setup for another context */
211                         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
212                         goto out;
213                 }
214
215                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
216                         break;
217
218                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
219
220                 /* Ongoing dump/setup - wait for its completion */
221                 wait_event(kbdev->hwcnt.backend.wait,
222                                         kbdev->hwcnt.backend.triggered != 0);
223         }
224
225         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
226         kbdev->hwcnt.backend.triggered = 0;
227
228         /* Disable interrupt */
229         spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
230         irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
231         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
232                                 irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
233         spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
234
235         /* Disable the counters */
236         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
237
238         kbdev->hwcnt.kctx = NULL;
239         kbdev->hwcnt.addr = 0ULL;
240
241         kbase_pm_ca_instr_disable(kbdev);
242
243         kbase_pm_unrequest_cores(kbdev, true,
244                 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
245
246         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
247
248         kbase_pm_release_l2_caches(kbdev);
249
250         dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
251                                                                         kctx);
252
253         err = 0;
254
255  out:
256         return err;
257 }
258
259 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
260 {
261         unsigned long flags;
262         int err = -EINVAL;
263         struct kbase_device *kbdev = kctx->kbdev;
264
265         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
266
267         if (kbdev->hwcnt.kctx != kctx) {
268                 /* The instrumentation has been setup for another context */
269                 goto unlock;
270         }
271
272         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
273                 /* HW counters are disabled or another dump is ongoing, or we're
274                  * resetting */
275                 goto unlock;
276         }
277
278         kbdev->hwcnt.backend.triggered = 0;
279
280         /* Mark that we're dumping - the PF handler can signal that we faulted
281          */
282         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
283
284         /* Reconfigure the dump address */
285         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
286                                         kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
287         kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
288                                         kbdev->hwcnt.addr >> 32, NULL);
289
290         /* Start dumping */
291         KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
292                                         kbdev->hwcnt.addr, 0);
293         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
294                                         GPU_COMMAND_PRFCNT_SAMPLE, kctx);
295
296         dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
297
298         err = 0;
299
300  unlock:
301         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
302         return err;
303 }
304 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
305
306 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
307                                                 bool * const success)
308 {
309         unsigned long flags;
310         bool complete = false;
311         struct kbase_device *kbdev = kctx->kbdev;
312
313         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
314
315         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
316                 *success = true;
317                 complete = true;
318         } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
319                 *success = false;
320                 complete = true;
321                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
322         }
323
324         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
325
326         return complete;
327 }
328 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
329
330 void kbasep_cache_clean_worker(struct work_struct *data)
331 {
332         struct kbase_device *kbdev;
333         unsigned long flags;
334
335         kbdev = container_of(data, struct kbase_device,
336                                                 hwcnt.backend.cache_clean_work);
337
338         mutex_lock(&kbdev->cacheclean_lock);
339         kbasep_instr_hwcnt_cacheclean(kbdev);
340
341         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
342         /* Wait for our condition, and any reset to complete */
343         while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
344                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
345                 wait_event(kbdev->hwcnt.backend.cache_clean_wait,
346                                 kbdev->hwcnt.backend.state !=
347                                                 KBASE_INSTR_STATE_CLEANING);
348                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
349         }
350         KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
351                                                 KBASE_INSTR_STATE_CLEANED);
352
353         /* All finished and idle */
354         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
355         kbdev->hwcnt.backend.triggered = 1;
356         wake_up(&kbdev->hwcnt.backend.wait);
357
358         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
359         mutex_unlock(&kbdev->cacheclean_lock);
360 }
361
362 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
363 {
364         unsigned long flags;
365
366         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
367
368         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
369                 kbdev->hwcnt.backend.triggered = 1;
370                 wake_up(&kbdev->hwcnt.backend.wait);
371         } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
372                 int ret;
373                 /* Always clean and invalidate the cache after a successful dump
374                  */
375                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
376                 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
377                                         &kbdev->hwcnt.backend.cache_clean_work);
378                 KBASE_DEBUG_ASSERT(ret);
379         }
380
381         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
382 }
383
384 void kbase_clean_caches_done(struct kbase_device *kbdev)
385 {
386         u32 irq_mask;
387
388         if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
389                 unsigned long flags;
390                 unsigned long pm_flags;
391
392                 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
393                 /* Disable interrupt */
394                 spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
395                 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
396                                                                         NULL);
397                 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
398                                 irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
399                 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
400
401                 /* Wakeup... */
402                 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
403                         /* Only wake if we weren't resetting */
404                         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
405                         wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
406                 }
407
408                 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
409         }
410 }
411
412 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
413 {
414         struct kbase_device *kbdev = kctx->kbdev;
415         unsigned long flags;
416         int err;
417
418         /* Wait for dump & cacheclean to complete */
419         wait_event(kbdev->hwcnt.backend.wait,
420                                         kbdev->hwcnt.backend.triggered != 0);
421
422         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
423
424         if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
425                 err = -EINVAL;
426                 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
427         } else {
428                 /* Dump done */
429                 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
430                                                         KBASE_INSTR_STATE_IDLE);
431                 err = 0;
432         }
433
434         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
435
436         return err;
437 }
438
439 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
440 {
441         unsigned long flags;
442         int err = -EINVAL;
443         struct kbase_device *kbdev = kctx->kbdev;
444
445         spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
446
447         /* Check it's the context previously set up and we're not already
448          * dumping */
449         if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
450                                                         KBASE_INSTR_STATE_IDLE)
451                 goto out;
452
453         /* Clear the counters */
454         KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
455         kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
456                                                 GPU_COMMAND_PRFCNT_CLEAR, kctx);
457
458         err = 0;
459
460 out:
461         spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
462         return err;
463 }
464 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
465
466 int kbase_instr_backend_init(struct kbase_device *kbdev)
467 {
468         int ret = 0;
469
470         kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
471
472         init_waitqueue_head(&kbdev->hwcnt.backend.wait);
473         init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
474         INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
475                                                 kbasep_cache_clean_worker);
476         kbdev->hwcnt.backend.triggered = 0;
477
478         kbdev->hwcnt.backend.cache_clean_wq =
479                         alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
480         if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
481                 ret = -EINVAL;
482
483         return ret;
484 }
485
486 void kbase_instr_backend_term(struct kbase_device *kbdev)
487 {
488         destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
489 }
490