3 * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * GPU backend instrumentation APIs.
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
32 * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
35 * @kbdev: Kbase device
37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
40 unsigned long pm_flags;
43 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45 KBASE_INSTR_STATE_REQUEST_CLEAN);
47 /* Enable interrupt */
48 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
49 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51 irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
54 /* clean&invalidate the caches so we're sure the mmu tables for the dump
56 KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58 GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
61 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65 struct kbase_context *kctx,
66 struct kbase_uk_hwcnt_setup *setup)
68 unsigned long flags, pm_flags;
72 u64 shader_cores_needed;
75 shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76 KBASE_PM_CORE_SHADER);
78 /* alignment failure */
79 if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
82 /* Override core availability policy to ensure all cores are available
84 kbase_pm_ca_instr_enable(kbdev);
86 /* Request the cores early on synchronously - we'll release them on any
87 * errors (e.g. instrumentation already active) */
88 kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
90 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
92 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93 /* Instrumentation is already enabled */
94 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95 goto out_unrequest_cores;
98 /* Enable interrupt */
99 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
100 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102 PRFCNT_SAMPLE_COMPLETED, NULL);
103 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
105 /* In use, this context is the owner */
106 kbdev->hwcnt.kctx = kctx;
107 /* Remember the dump address so we can reprogram it later */
108 kbdev->hwcnt.addr = setup->dump_buffer;
110 /* Request the clean */
111 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112 kbdev->hwcnt.backend.triggered = 0;
113 /* Clean&invalidate the caches so we're sure the mmu tables for the dump
115 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116 &kbdev->hwcnt.backend.cache_clean_work);
117 KBASE_DEBUG_ASSERT(ret);
119 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
121 /* Wait for cacheclean to complete */
122 wait_event(kbdev->hwcnt.backend.wait,
123 kbdev->hwcnt.backend.triggered != 0);
125 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126 KBASE_INSTR_STATE_IDLE);
128 kbase_pm_request_l2_caches(kbdev);
131 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
134 u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135 u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136 >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137 int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
140 prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
144 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145 prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
147 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148 setup->dump_buffer & 0xFFFFFFFF, kctx);
149 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150 setup->dump_buffer >> 32, kctx);
151 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
153 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154 setup->shader_bm, kctx);
155 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156 setup->mmu_l2_bm, kctx);
157 /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158 * HW counter dump. */
159 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
163 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164 setup->tiler_bm, kctx);
166 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167 prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
169 /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
171 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173 setup->tiler_bm, kctx);
175 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
177 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178 kbdev->hwcnt.backend.triggered = 1;
179 wake_up(&kbdev->hwcnt.backend.wait);
181 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
185 dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
188 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
189 kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
190 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
195 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
197 unsigned long flags, pm_flags;
200 struct kbase_device *kbdev = kctx->kbdev;
203 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
205 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
206 /* Instrumentation is not enabled */
207 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
211 if (kbdev->hwcnt.kctx != kctx) {
212 /* Instrumentation has been setup for another context */
213 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
217 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
220 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
222 /* Ongoing dump/setup - wait for its completion */
223 wait_event(kbdev->hwcnt.backend.wait,
224 kbdev->hwcnt.backend.triggered != 0);
227 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
228 kbdev->hwcnt.backend.triggered = 0;
230 /* Disable interrupt */
231 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
232 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
233 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
234 irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
236 /* Disable the counters */
237 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
239 kbdev->hwcnt.kctx = NULL;
240 kbdev->hwcnt.addr = 0ULL;
242 kbase_pm_ca_instr_disable(kbdev);
244 kbase_pm_unrequest_cores(kbdev, true,
245 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
247 kbase_pm_release_l2_caches(kbdev);
249 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
250 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
252 dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
261 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
265 struct kbase_device *kbdev = kctx->kbdev;
267 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
269 if (kbdev->hwcnt.kctx != kctx) {
270 /* The instrumentation has been setup for another context */
274 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
275 /* HW counters are disabled or another dump is ongoing, or we're
280 kbdev->hwcnt.backend.triggered = 0;
282 /* Mark that we're dumping - the PF handler can signal that we faulted
284 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
286 /* Reconfigure the dump address */
287 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
288 kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
289 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
290 kbdev->hwcnt.addr >> 32, NULL);
293 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
294 kbdev->hwcnt.addr, 0);
295 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
296 GPU_COMMAND_PRFCNT_SAMPLE, kctx);
298 dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
303 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
306 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
308 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
309 bool * const success)
312 bool complete = false;
313 struct kbase_device *kbdev = kctx->kbdev;
315 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
317 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
320 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
323 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
326 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
330 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
332 void kbasep_cache_clean_worker(struct work_struct *data)
334 struct kbase_device *kbdev;
337 kbdev = container_of(data, struct kbase_device,
338 hwcnt.backend.cache_clean_work);
340 mutex_lock(&kbdev->cacheclean_lock);
341 kbasep_instr_hwcnt_cacheclean(kbdev);
343 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
344 /* Wait for our condition, and any reset to complete */
345 while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
346 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
347 wait_event(kbdev->hwcnt.backend.cache_clean_wait,
348 kbdev->hwcnt.backend.state !=
349 KBASE_INSTR_STATE_CLEANING);
350 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
352 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
353 KBASE_INSTR_STATE_CLEANED);
355 /* All finished and idle */
356 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
357 kbdev->hwcnt.backend.triggered = 1;
358 wake_up(&kbdev->hwcnt.backend.wait);
360 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
361 mutex_unlock(&kbdev->cacheclean_lock);
364 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
368 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
370 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
371 kbdev->hwcnt.backend.triggered = 1;
372 wake_up(&kbdev->hwcnt.backend.wait);
373 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
375 /* Always clean and invalidate the cache after a successful dump
377 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
378 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
379 &kbdev->hwcnt.backend.cache_clean_work);
380 KBASE_DEBUG_ASSERT(ret);
383 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
386 void kbase_clean_caches_done(struct kbase_device *kbdev)
390 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
392 unsigned long pm_flags;
394 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
395 /* Disable interrupt */
396 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
397 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
399 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
400 irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
401 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
404 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
405 /* Only wake if we weren't resetting */
406 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
407 wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
410 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
414 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
416 struct kbase_device *kbdev = kctx->kbdev;
420 /* Wait for dump & cacheclean to complete */
421 wait_event(kbdev->hwcnt.backend.wait,
422 kbdev->hwcnt.backend.triggered != 0);
424 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
426 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
428 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
431 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
432 KBASE_INSTR_STATE_IDLE);
436 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
441 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
445 struct kbase_device *kbdev = kctx->kbdev;
447 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
449 /* Check it's the context previously set up and we're not already
451 if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
452 KBASE_INSTR_STATE_IDLE)
455 /* Clear the counters */
456 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
457 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
458 GPU_COMMAND_PRFCNT_CLEAR, kctx);
463 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
466 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
468 int kbase_instr_backend_init(struct kbase_device *kbdev)
472 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
474 init_waitqueue_head(&kbdev->hwcnt.backend.wait);
475 init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
476 INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
477 kbasep_cache_clean_worker);
478 kbdev->hwcnt.backend.triggered = 0;
480 kbdev->hwcnt.backend.cache_clean_wq =
481 alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
482 if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
488 void kbase_instr_backend_term(struct kbase_device *kbdev)
490 destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);