3 * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * GPU backend instrumentation APIs.
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
32 * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
35 * @kbdev: Kbase device
37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
40 unsigned long pm_flags;
43 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45 KBASE_INSTR_STATE_REQUEST_CLEAN);
47 /* Enable interrupt */
48 spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
49 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51 irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
54 /* clean&invalidate the caches so we're sure the mmu tables for the dump
56 KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58 GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
61 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65 struct kbase_context *kctx,
66 struct kbase_uk_hwcnt_setup *setup)
68 unsigned long flags, pm_flags;
72 u64 shader_cores_needed;
75 shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76 KBASE_PM_CORE_SHADER);
78 /* alignment failure */
79 if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
82 /* Override core availability policy to ensure all cores are available
84 kbase_pm_ca_instr_enable(kbdev);
86 /* Request the cores early on synchronously - we'll release them on any
87 * errors (e.g. instrumentation already active) */
88 kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
90 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
92 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93 /* Instrumentation is already enabled */
94 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95 goto out_unrequest_cores;
98 /* Enable interrupt */
99 spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
100 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102 PRFCNT_SAMPLE_COMPLETED, NULL);
103 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
105 /* In use, this context is the owner */
106 kbdev->hwcnt.kctx = kctx;
107 /* Remember the dump address so we can reprogram it later */
108 kbdev->hwcnt.addr = setup->dump_buffer;
110 /* Request the clean */
111 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112 kbdev->hwcnt.backend.triggered = 0;
113 /* Clean&invalidate the caches so we're sure the mmu tables for the dump
115 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116 &kbdev->hwcnt.backend.cache_clean_work);
117 KBASE_DEBUG_ASSERT(ret);
119 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
121 /* Wait for cacheclean to complete */
122 wait_event(kbdev->hwcnt.backend.wait,
123 kbdev->hwcnt.backend.triggered != 0);
125 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126 KBASE_INSTR_STATE_IDLE);
128 kbase_pm_request_l2_caches(kbdev);
131 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
134 u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135 u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136 >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137 int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
140 prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
144 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145 prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
147 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148 setup->dump_buffer & 0xFFFFFFFF, kctx);
149 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150 setup->dump_buffer >> 32, kctx);
151 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
153 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154 setup->shader_bm, kctx);
155 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156 setup->mmu_l2_bm, kctx);
157 /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158 * HW counter dump. */
159 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
163 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164 setup->tiler_bm, kctx);
166 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167 prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
169 /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
171 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173 setup->tiler_bm, kctx);
175 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
177 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178 kbdev->hwcnt.backend.triggered = 1;
179 wake_up(&kbdev->hwcnt.backend.wait);
181 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
185 dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
188 kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
193 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
195 unsigned long flags, pm_flags;
198 struct kbase_device *kbdev = kctx->kbdev;
201 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
203 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
204 /* Instrumentation is not enabled */
205 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
209 if (kbdev->hwcnt.kctx != kctx) {
210 /* Instrumentation has been setup for another context */
211 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
215 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
218 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
220 /* Ongoing dump/setup - wait for its completion */
221 wait_event(kbdev->hwcnt.backend.wait,
222 kbdev->hwcnt.backend.triggered != 0);
225 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
226 kbdev->hwcnt.backend.triggered = 0;
228 /* Disable interrupt */
229 spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
230 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
231 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
232 irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
233 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
235 /* Disable the counters */
236 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
238 kbdev->hwcnt.kctx = NULL;
239 kbdev->hwcnt.addr = 0ULL;
241 kbase_pm_ca_instr_disable(kbdev);
243 kbase_pm_unrequest_cores(kbdev, true,
244 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
246 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
248 kbase_pm_release_l2_caches(kbdev);
250 dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
259 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
263 struct kbase_device *kbdev = kctx->kbdev;
265 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
267 if (kbdev->hwcnt.kctx != kctx) {
268 /* The instrumentation has been setup for another context */
272 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
273 /* HW counters are disabled or another dump is ongoing, or we're
278 kbdev->hwcnt.backend.triggered = 0;
280 /* Mark that we're dumping - the PF handler can signal that we faulted
282 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
284 /* Reconfigure the dump address */
285 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
286 kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
287 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
288 kbdev->hwcnt.addr >> 32, NULL);
291 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
292 kbdev->hwcnt.addr, 0);
293 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
294 GPU_COMMAND_PRFCNT_SAMPLE, kctx);
296 dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
301 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
304 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
306 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
307 bool * const success)
310 bool complete = false;
311 struct kbase_device *kbdev = kctx->kbdev;
313 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
315 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
318 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
321 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
324 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
328 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
330 void kbasep_cache_clean_worker(struct work_struct *data)
332 struct kbase_device *kbdev;
335 kbdev = container_of(data, struct kbase_device,
336 hwcnt.backend.cache_clean_work);
338 mutex_lock(&kbdev->cacheclean_lock);
339 kbasep_instr_hwcnt_cacheclean(kbdev);
341 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
342 /* Wait for our condition, and any reset to complete */
343 while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
344 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
345 wait_event(kbdev->hwcnt.backend.cache_clean_wait,
346 kbdev->hwcnt.backend.state !=
347 KBASE_INSTR_STATE_CLEANING);
348 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
350 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
351 KBASE_INSTR_STATE_CLEANED);
353 /* All finished and idle */
354 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
355 kbdev->hwcnt.backend.triggered = 1;
356 wake_up(&kbdev->hwcnt.backend.wait);
358 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
359 mutex_unlock(&kbdev->cacheclean_lock);
362 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
366 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
368 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
369 kbdev->hwcnt.backend.triggered = 1;
370 wake_up(&kbdev->hwcnt.backend.wait);
371 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
373 /* Always clean and invalidate the cache after a successful dump
375 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
376 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
377 &kbdev->hwcnt.backend.cache_clean_work);
378 KBASE_DEBUG_ASSERT(ret);
381 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
384 void kbase_clean_caches_done(struct kbase_device *kbdev)
388 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
390 unsigned long pm_flags;
392 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
393 /* Disable interrupt */
394 spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
395 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
397 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
398 irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
399 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
402 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
403 /* Only wake if we weren't resetting */
404 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
405 wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
408 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
412 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
414 struct kbase_device *kbdev = kctx->kbdev;
418 /* Wait for dump & cacheclean to complete */
419 wait_event(kbdev->hwcnt.backend.wait,
420 kbdev->hwcnt.backend.triggered != 0);
422 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
424 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
426 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
429 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
430 KBASE_INSTR_STATE_IDLE);
434 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
439 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
443 struct kbase_device *kbdev = kctx->kbdev;
445 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
447 /* Check it's the context previously set up and we're not already
449 if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
450 KBASE_INSTR_STATE_IDLE)
453 /* Clear the counters */
454 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
455 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
456 GPU_COMMAND_PRFCNT_CLEAR, kctx);
461 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
464 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
466 int kbase_instr_backend_init(struct kbase_device *kbdev)
470 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
472 init_waitqueue_head(&kbdev->hwcnt.backend.wait);
473 init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
474 INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
475 kbasep_cache_clean_worker);
476 kbdev->hwcnt.backend.triggered = 0;
478 kbdev->hwcnt.backend.cache_clean_wq =
479 alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
480 if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
486 void kbase_instr_backend_term(struct kbase_device *kbdev)
488 destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);