3 * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
20 * Register-based HW access backend specific APIs
23 #include <mali_kbase.h>
24 #include <mali_kbase_hwaccess_jm.h>
25 #include <mali_kbase_jm.h>
26 #include <mali_kbase_js.h>
27 #include <mali_kbase_10969_workaround.h>
28 #include <backend/gpu/mali_kbase_device_internal.h>
29 #include <backend/gpu/mali_kbase_jm_internal.h>
30 #include <backend/gpu/mali_kbase_js_affinity.h>
31 #include <backend/gpu/mali_kbase_js_internal.h>
32 #include <backend/gpu/mali_kbase_pm_internal.h>
34 /* Return whether the specified ringbuffer is empty. HW access lock must be
36 #define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx)
37 /* Return number of atoms currently in the specified ringbuffer. HW access lock
39 #define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
41 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
42 struct kbase_jd_atom *katom);
45 * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
46 * @kbdev: Device pointer
47 * @katom: Atom to enqueue
49 * Context: Caller must hold the HW access lock
51 static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
52 struct kbase_jd_atom *katom)
54 struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
56 WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
58 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
60 rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
63 katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
67 * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
68 * it has been completed
69 * @kbdev: Device pointer
70 * @js: Job slot to remove atom from
72 * Context: Caller must hold the HW access lock
74 * Return: Atom removed from ringbuffer
76 static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
79 struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
80 struct kbase_jd_atom *katom;
82 if (SLOT_RB_EMPTY(rb)) {
83 WARN(1, "GPU ringbuffer unexpectedly empty\n");
87 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
89 katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
91 kbase_gpu_release_atom(kbdev, katom);
95 katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
97 kbase_js_debug_log_current_affinities(kbdev);
102 struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
105 struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
107 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
109 if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
110 return NULL; /* idx out of range */
112 return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
115 struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
118 return kbase_gpu_inspect(kbdev, js, 0);
121 struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
124 struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
126 if (SLOT_RB_EMPTY(rb))
129 return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
133 * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
135 * @kbdev: Device pointer
136 * @js: Job slot to inspect
138 * Return: true if there are atoms on the GPU for slot js,
141 static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
145 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
147 for (i = 0; i < SLOT_RB_SIZE; i++) {
148 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
152 if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
153 katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
161 * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
162 * currently on the GPU
163 * @kbdev: Device pointer
165 * Return: true if there are any atoms on the GPU, false otherwise
167 static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
172 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
174 for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
175 for (i = 0; i < SLOT_RB_SIZE; i++) {
176 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
178 if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
185 int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
190 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
192 for (i = 0; i < SLOT_RB_SIZE; i++) {
193 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
195 if (katom && (katom->gpu_rb_state ==
196 KBASE_ATOM_GPU_RB_SUBMITTED))
203 int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
208 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
210 for (i = 0; i < SLOT_RB_SIZE; i++) {
211 if (kbase_gpu_inspect(kbdev, js, i))
218 static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
219 enum kbase_atom_gpu_rb_state min_rb_state)
224 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
226 for (i = 0; i < SLOT_RB_SIZE; i++) {
227 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
229 if (katom && (katom->gpu_rb_state >= min_rb_state))
236 int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
238 if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
239 KBASE_RESET_GPU_NOT_PENDING) {
240 /* The GPU is being reset - so prevent submission */
244 return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
248 static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
249 struct kbase_jd_atom *katom);
251 static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
253 struct kbase_jd_atom *katom)
255 /* The most recently checked affinity. Having this at this scope allows
256 * us to guarantee that we've checked the affinity in this function
259 u64 recently_chosen_affinity = 0;
260 bool chosen_affinity = false;
266 /* NOTE: The following uses a number of FALLTHROUGHs to optimize
267 * the calls to this function. Ending of the function is
268 * indicated by BREAK OUT */
269 switch (katom->coreref_state) {
270 /* State when job is first attempted to be run */
271 case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
272 KBASE_DEBUG_ASSERT(katom->affinity == 0);
274 /* Compute affinity */
275 if (false == kbase_js_choose_affinity(
276 &recently_chosen_affinity, kbdev, katom,
278 /* No cores are currently available */
279 /* *** BREAK OUT: No state transition *** */
283 chosen_affinity = true;
285 /* Request the cores */
286 kbase_pm_request_cores(kbdev,
287 katom->core_req & BASE_JD_REQ_T,
288 recently_chosen_affinity);
290 katom->affinity = recently_chosen_affinity;
292 /* Proceed to next state */
293 katom->coreref_state =
294 KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
296 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
298 case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
300 enum kbase_pm_cores_ready cores_ready;
302 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
303 (katom->core_req & BASE_JD_REQ_T));
305 cores_ready = kbase_pm_register_inuse_cores(
307 katom->core_req & BASE_JD_REQ_T,
309 if (cores_ready == KBASE_NEW_AFFINITY) {
310 /* Affinity no longer valid - return to
312 kbasep_js_job_check_deref_cores(kbdev,
314 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
315 JS_CORE_REF_REGISTER_INUSE_FAILED,
318 (u32) katom->affinity);
319 /* *** BREAK OUT: Return to previous
320 * state, retry *** */
324 if (cores_ready == KBASE_CORES_NOT_READY) {
325 /* Stay in this state and return, to
326 * retry at this state later */
327 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
328 JS_CORE_REF_REGISTER_INUSE_FAILED,
331 (u32) katom->affinity);
332 /* *** BREAK OUT: No state transition
336 /* Proceed to next state */
337 katom->coreref_state =
338 KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
341 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
343 case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
344 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
345 (katom->core_req & BASE_JD_REQ_T));
347 /* Optimize out choosing the affinity twice in the same
349 if (chosen_affinity == false) {
350 /* See if the affinity changed since a previous
352 if (false == kbase_js_choose_affinity(
353 &recently_chosen_affinity,
355 /* No cores are currently available */
356 kbasep_js_job_check_deref_cores(kbdev,
358 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
359 JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
362 (u32) recently_chosen_affinity);
363 /* *** BREAK OUT: Transition to lower
367 chosen_affinity = true;
370 /* Now see if this requires a different set of cores */
371 if (recently_chosen_affinity != katom->affinity) {
372 enum kbase_pm_cores_ready cores_ready;
374 kbase_pm_request_cores(kbdev,
375 katom->core_req & BASE_JD_REQ_T,
376 recently_chosen_affinity);
378 /* Register new cores whilst we still hold the
379 * old ones, to minimize power transitions */
381 kbase_pm_register_inuse_cores(kbdev,
382 katom->core_req & BASE_JD_REQ_T,
383 recently_chosen_affinity);
384 kbasep_js_job_check_deref_cores(kbdev, katom);
386 /* Fixup the state that was reduced by
388 katom->coreref_state =
389 KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
390 katom->affinity = recently_chosen_affinity;
391 if (cores_ready == KBASE_NEW_AFFINITY) {
392 /* Affinity no longer valid - return to
394 katom->coreref_state =
395 KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
397 kbasep_js_job_check_deref_cores(kbdev,
400 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
401 JS_CORE_REF_REGISTER_INUSE_FAILED,
404 (u32) katom->affinity);
405 /* *** BREAK OUT: Return to previous
406 * state, retry *** */
410 /* Now might be waiting for powerup again, with
412 if (cores_ready == KBASE_CORES_NOT_READY) {
413 /* Return to previous state */
414 katom->coreref_state =
415 KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
416 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
417 JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
420 (u32) katom->affinity);
421 /* *** BREAK OUT: Transition to lower
426 /* Proceed to next state */
427 katom->coreref_state =
428 KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
430 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
431 case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
432 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
433 (katom->core_req & BASE_JD_REQ_T));
434 KBASE_DEBUG_ASSERT(katom->affinity ==
435 recently_chosen_affinity);
437 /* Note: this is where the caller must've taken the
438 * runpool_irq.lock */
440 /* Check for affinity violations - if there are any,
441 * then we just ask the caller to requeue and try again
443 if (kbase_js_affinity_would_violate(kbdev, js,
444 katom->affinity) != false) {
445 /* Return to previous state */
446 katom->coreref_state =
447 KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
448 /* *** BREAK OUT: Transition to lower state ***
450 KBASE_TRACE_ADD_SLOT_INFO(kbdev,
451 JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
452 katom->kctx, katom, katom->jc, js,
453 (u32) katom->affinity);
457 /* No affinity violations would result, so the cores are
459 katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
460 /* *** BREAK OUT: Cores Ready *** */
464 KBASE_DEBUG_ASSERT_MSG(false,
465 "Unhandled kbase_atom_coreref_state %d",
466 katom->coreref_state);
469 } while (retry != false);
471 return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
474 static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
475 struct kbase_jd_atom *katom)
477 KBASE_DEBUG_ASSERT(kbdev != NULL);
478 KBASE_DEBUG_ASSERT(katom != NULL);
480 switch (katom->coreref_state) {
481 case KBASE_ATOM_COREREF_STATE_READY:
482 /* State where atom was submitted to the HW - just proceed to
484 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
485 (katom->core_req & BASE_JD_REQ_T));
487 /* *** FALLTHROUGH *** */
489 case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
490 /* State where cores were registered */
491 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
492 (katom->core_req & BASE_JD_REQ_T));
493 kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
498 case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
499 /* State where cores were requested, but not registered */
500 KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
501 (katom->core_req & BASE_JD_REQ_T));
502 kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
506 case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
507 /* Initial state - nothing required */
508 KBASE_DEBUG_ASSERT(katom->affinity == 0);
512 KBASE_DEBUG_ASSERT_MSG(false,
513 "Unhandled coreref_state: %d",
514 katom->coreref_state);
519 katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
523 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
524 struct kbase_jd_atom *katom)
526 switch (katom->gpu_rb_state) {
527 case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
528 /* Should be impossible */
529 WARN(1, "Attempting to release atom not in ringbuffer\n");
532 case KBASE_ATOM_GPU_RB_SUBMITTED:
533 /* Inform power management at start/finish of atom
534 * so it can update its GPU utilisation metrics. */
535 kbase_pm_metrics_release_atom(kbdev, katom);
537 if (katom->core_req & BASE_JD_REQ_PERMON)
538 kbase_pm_release_gpu_cycle_counter(kbdev);
539 /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
541 case KBASE_ATOM_GPU_RB_READY:
542 /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
544 case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
545 /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
547 case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
548 kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
550 /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
552 case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
553 kbasep_js_job_check_deref_cores(kbdev, katom);
556 case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
557 /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
559 case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
563 katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
566 static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
567 struct kbase_jd_atom *katom)
569 kbase_gpu_release_atom(kbdev, katom);
570 katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
573 static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
575 struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
578 if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
580 slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
581 KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
582 slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
583 KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
584 slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
585 KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
587 if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
588 (js != 2 && !slot_busy[2]))
591 /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
592 if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
593 kbase_gpu_atoms_submitted(kbdev, 1) ||
594 backend->rmu_workaround_flag))
597 /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
598 if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
599 !backend->rmu_workaround_flag))
602 backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
607 static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
609 return kbdev->js_data.runpool_irq.secure_mode;
612 static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
616 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
618 WARN_ONCE(!kbdev->secure_ops,
619 "Cannot enable secure mode: secure callbacks not specified.\n");
621 if (kbdev->secure_ops) {
622 /* Switch GPU to secure mode */
623 err = kbdev->secure_ops->secure_mode_enable(kbdev);
626 dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
628 kbdev->js_data.runpool_irq.secure_mode = true;
634 static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
638 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
640 WARN_ONCE(!kbdev->secure_ops,
641 "Cannot disable secure mode: secure callbacks not specified.\n");
643 if (kbdev->secure_ops) {
644 /* Switch GPU to non-secure mode */
645 err = kbdev->secure_ops->secure_mode_disable(kbdev);
648 dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
650 kbdev->js_data.runpool_irq.secure_mode = false;
656 void kbase_gpu_slot_update(struct kbase_device *kbdev)
660 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
662 for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
663 struct kbase_jd_atom *katom[2];
666 katom[0] = kbase_gpu_inspect(kbdev, js, 0);
667 katom[1] = kbase_gpu_inspect(kbdev, js, 1);
668 WARN_ON(katom[1] && !katom[0]);
670 for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
676 switch (katom[idx]->gpu_rb_state) {
677 case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
678 /* Should be impossible */
679 WARN(1, "Attempting to update atom not in ringbuffer\n");
682 case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
683 if (katom[idx]->atom_flags &
684 KBASE_KATOM_FLAG_X_DEP_BLOCKED)
687 katom[idx]->gpu_rb_state =
688 KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
690 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
691 case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
693 kbasep_js_job_check_ref_cores(kbdev, js,
696 if (katom[idx]->event_code ==
697 BASE_JD_EVENT_PM_EVENT) {
698 katom[idx]->gpu_rb_state =
699 KBASE_ATOM_GPU_RB_RETURN_TO_JS;
706 kbase_js_affinity_retain_slot_cores(kbdev, js,
707 katom[idx]->affinity);
708 katom[idx]->gpu_rb_state =
709 KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
711 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
713 case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
714 if (!kbase_gpu_rmu_workaround(kbdev, js))
717 katom[idx]->gpu_rb_state =
718 KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
720 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
722 case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
723 if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
726 /* Not in correct mode, take action */
727 if (kbase_gpu_atoms_submitted_any(kbdev)) {
729 * We are not in the correct
730 * GPU mode for this job, and
731 * we can't switch now because
732 * there are jobs already
738 /* No jobs running, so we can switch GPU mode right now */
739 if (kbase_jd_katom_is_secure(katom[idx])) {
740 err = kbase_gpu_secure_mode_enable(kbdev);
742 err = kbase_gpu_secure_mode_disable(kbdev);
746 /* Failed to switch secure mode, fail atom */
747 katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
748 kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
753 /* Secure mode sanity checks */
754 KBASE_DEBUG_ASSERT_MSG(
755 kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
756 "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
757 kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
758 KBASE_DEBUG_ASSERT_MSG(
759 (kbase_jd_katom_is_secure(katom[idx]) && js == 0) ||
760 !kbase_jd_katom_is_secure(katom[idx]),
761 "Secure atom on JS%d not supported", js);
763 katom[idx]->gpu_rb_state =
764 KBASE_ATOM_GPU_RB_READY;
766 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
768 case KBASE_ATOM_GPU_RB_READY:
769 /* Only submit if head atom or previous atom
770 * already submitted */
772 (katom[0]->gpu_rb_state !=
773 KBASE_ATOM_GPU_RB_SUBMITTED &&
774 katom[0]->gpu_rb_state !=
775 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
778 /* Check if this job needs the cycle counter
779 * enabled before submission */
780 if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
781 kbase_pm_request_gpu_cycle_counter_l2_is_on(
784 /* Inform power management at start/finish of
785 * atom so it can update its GPU utilisation
787 kbase_pm_metrics_run_atom(kbdev, katom[idx]);
789 kbase_job_hw_submit(kbdev, katom[idx], js);
790 katom[idx]->gpu_rb_state =
791 KBASE_ATOM_GPU_RB_SUBMITTED;
793 /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
795 case KBASE_ATOM_GPU_RB_SUBMITTED:
796 /* Atom submitted to HW, nothing else to do */
799 case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
800 /* Only return if head atom or previous atom
801 * already removed - as atoms must be returned
803 if (idx == 0 || katom[0]->gpu_rb_state ==
804 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
805 kbase_gpu_dequeue_atom(kbdev, js);
806 kbase_jm_return_atom_to_js(kbdev,
814 /* Warn if PRLAM-8987 affinity restrictions are violated */
815 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
816 WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
817 kbase_gpu_atoms_submitted(kbdev, 1)) &&
818 kbase_gpu_atoms_submitted(kbdev, 2));
822 void kbase_backend_run_atom(struct kbase_device *kbdev,
823 struct kbase_jd_atom *katom)
825 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
827 kbase_gpu_enqueue_atom(kbdev, katom);
828 kbase_gpu_slot_update(kbdev);
831 bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
833 struct kbase_jd_atom *katom;
834 struct kbase_jd_atom *next_katom;
836 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
838 katom = kbase_gpu_inspect(kbdev, js, 0);
839 next_katom = kbase_gpu_inspect(kbdev, js, 1);
841 if (next_katom && katom->kctx == next_katom->kctx &&
842 next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
843 (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
845 kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
847 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
848 JS_COMMAND_NOP, NULL);
849 next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
856 void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
859 ktime_t *end_timestamp)
861 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
862 struct kbase_context *kctx = katom->kctx;
864 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
866 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
867 completion_code != BASE_JD_EVENT_DONE &&
868 !(completion_code & BASE_JD_SW_EVENT)) {
869 katom->need_cache_flush_cores_retained = katom->affinity;
870 kbase_pm_request_cores(kbdev, false, katom->affinity);
871 } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
872 if (kbdev->gpu_props.num_core_groups > 1 &&
874 kbdev->gpu_props.props.coherency_info.group[0].core_mask
877 kbdev->gpu_props.props.coherency_info.group[1].core_mask
879 dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
880 katom->need_cache_flush_cores_retained =
882 kbase_pm_request_cores(kbdev, false,
887 katom = kbase_gpu_dequeue_atom(kbdev, js);
889 kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
891 if (completion_code == BASE_JD_EVENT_STOPPED) {
892 struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
896 * Dequeue next atom from ringbuffers on same slot if required.
897 * This atom will already have been removed from the NEXT
898 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
899 * the atoms on this slot are returned in the correct order.
901 if (next_katom && katom->kctx == next_katom->kctx) {
902 kbase_gpu_dequeue_atom(kbdev, js);
903 kbase_jm_return_atom_to_js(kbdev, next_katom);
905 } else if (completion_code != BASE_JD_EVENT_DONE) {
906 struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
909 #if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
910 KBASE_TRACE_DUMP(kbdev);
912 kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
915 * Remove all atoms on the same context from ringbuffers. This
916 * will not remove atoms that are already on the GPU, as these
917 * are guaranteed not to have fail dependencies on the failed
920 for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
921 struct kbase_jd_atom *katom_idx0 =
922 kbase_gpu_inspect(kbdev, i, 0);
923 struct kbase_jd_atom *katom_idx1 =
924 kbase_gpu_inspect(kbdev, i, 1);
926 if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
927 katom_idx0->gpu_rb_state !=
928 KBASE_ATOM_GPU_RB_SUBMITTED) {
929 /* Dequeue katom_idx0 from ringbuffer */
930 kbase_gpu_dequeue_atom(kbdev, i);
933 katom_idx1->kctx == katom->kctx &&
934 katom_idx0->gpu_rb_state !=
935 KBASE_ATOM_GPU_RB_SUBMITTED) {
936 /* Dequeue katom_idx1 from ringbuffer */
937 kbase_gpu_dequeue_atom(kbdev, i);
939 katom_idx1->event_code =
940 BASE_JD_EVENT_STOPPED;
941 kbase_jm_return_atom_to_js(kbdev,
944 katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
945 kbase_jm_return_atom_to_js(kbdev, katom_idx0);
947 } else if (katom_idx1 &&
948 katom_idx1->kctx == katom->kctx &&
949 katom_idx1->gpu_rb_state !=
950 KBASE_ATOM_GPU_RB_SUBMITTED) {
951 /* Can not dequeue this atom yet - will be
952 * dequeued when atom at idx0 completes */
953 katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
954 kbase_gpu_mark_atom_for_return(kbdev,
960 KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
961 js, completion_code);
963 if (job_tail != 0 && job_tail != katom->jc) {
964 bool was_updated = (job_tail != katom->jc);
966 /* Some of the job has been executed, so we update the job chain
967 * address to where we should resume from */
968 katom->jc = job_tail;
970 KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
971 katom, job_tail, js);
974 /* Only update the event code for jobs that weren't cancelled */
975 if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
976 katom->event_code = (base_jd_event_code)completion_code;
978 kbase_device_trace_register_access(kctx, REG_WRITE,
979 JOB_CONTROL_REG(JOB_IRQ_CLEAR),
982 /* Complete the job, and start new ones
984 * Also defer remaining work onto the workqueue:
985 * - Re-queue Soft-stopped jobs
986 * - For any other jobs, queue the job back into the dependency system
987 * - Schedule out the parent context if necessary, and schedule a new
990 #ifdef CONFIG_GPU_TRACEPOINTS
992 /* The atom in the HEAD */
993 struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
996 if (next_katom && next_katom->gpu_rb_state ==
997 KBASE_ATOM_GPU_RB_SUBMITTED) {
1000 trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
1002 ktime_to_ns(*end_timestamp),
1003 (u32)next_katom->kctx, 0,
1004 next_katom->work_id);
1005 kbdev->hwaccess.backend.slot_rb[js].last_context =
1010 trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
1012 ktime_to_ns(ktime_get()), 0, 0,
1014 kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
1019 if (completion_code == BASE_JD_EVENT_STOPPED)
1020 kbase_jm_return_atom_to_js(kbdev, katom);
1022 kbase_jm_complete(kbdev, katom, end_timestamp);
1024 /* Job completion may have unblocked other atoms. Try to update all job
1026 kbase_gpu_slot_update(kbdev);
1029 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
1033 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
1035 for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
1038 for (idx = 0; idx < 2; idx++) {
1039 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
1043 enum kbase_atom_gpu_rb_state gpu_rb_state =
1044 katom->gpu_rb_state;
1046 kbase_gpu_release_atom(kbdev, katom);
1047 kbase_gpu_dequeue_atom(kbdev, js);
1050 KBASE_ATOM_GPU_RB_SUBMITTED) {
1052 BASE_JD_EVENT_JOB_CANCELLED;
1053 kbase_jm_complete(kbdev, katom,
1057 BASE_JD_EVENT_STOPPED;
1058 kbase_jm_return_atom_to_js(kbdev,
1066 static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
1068 struct kbase_jd_atom *katom,
1071 struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1072 u32 hw_action = action & JS_COMMAND_MASK;
1074 kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
1075 kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
1076 katom->core_req, katom);
1077 kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
1080 static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
1081 struct kbase_jd_atom *katom,
1085 struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1087 katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
1088 kbase_gpu_mark_atom_for_return(kbdev, katom);
1089 kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
1092 kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
1096 static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
1098 if (katom->x_post_dep) {
1099 struct kbase_jd_atom *dep_atom = katom->x_post_dep;
1101 if (dep_atom->gpu_rb_state !=
1102 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
1103 dep_atom->gpu_rb_state !=
1104 KBASE_ATOM_GPU_RB_RETURN_TO_JS)
1105 return dep_atom->slot_nr;
1110 bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
1111 struct kbase_context *kctx,
1113 struct kbase_jd_atom *katom,
1116 struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1118 struct kbase_jd_atom *katom_idx0;
1119 struct kbase_jd_atom *katom_idx1;
1121 bool katom_idx0_valid, katom_idx1_valid;
1125 int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
1127 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
1129 katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
1130 katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
1133 katom_idx0_valid = (katom_idx0 == katom);
1134 /* If idx0 is to be removed and idx1 is on the same context,
1135 * then idx1 must also be removed otherwise the atoms might be
1136 * returned out of order */
1138 katom_idx1_valid = (katom_idx1 == katom) ||
1139 (katom_idx0_valid &&
1140 (katom_idx0->kctx ==
1143 katom_idx1_valid = false;
1145 katom_idx0_valid = (katom_idx0 &&
1146 (!kctx || katom_idx0->kctx == kctx));
1147 katom_idx1_valid = (katom_idx1 &&
1148 (!kctx || katom_idx1->kctx == kctx));
1151 if (katom_idx0_valid)
1152 stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
1153 if (katom_idx1_valid)
1154 stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
1156 if (katom_idx0_valid) {
1157 if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
1158 /* Simple case - just dequeue and return */
1159 kbase_gpu_dequeue_atom(kbdev, js);
1160 if (katom_idx1_valid) {
1161 kbase_gpu_dequeue_atom(kbdev, js);
1162 katom_idx1->event_code =
1163 BASE_JD_EVENT_REMOVED_FROM_NEXT;
1164 kbase_jm_return_atom_to_js(kbdev, katom_idx1);
1165 kbasep_js_clear_submit_allowed(js_devdata,
1169 katom_idx0->event_code =
1170 BASE_JD_EVENT_REMOVED_FROM_NEXT;
1171 kbase_jm_return_atom_to_js(kbdev, katom_idx0);
1172 kbasep_js_clear_submit_allowed(js_devdata,
1175 /* katom_idx0 is on GPU */
1176 if (katom_idx1 && katom_idx1->gpu_rb_state ==
1177 KBASE_ATOM_GPU_RB_SUBMITTED) {
1178 /* katom_idx0 and katom_idx1 are on GPU */
1180 if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
1181 JS_COMMAND_NEXT), NULL) == 0) {
1182 /* idx0 has already completed - stop
1184 if (katom_idx1_valid) {
1185 kbase_gpu_stop_atom(kbdev, js,
1191 /* idx1 is in NEXT registers - attempt
1193 kbase_reg_write(kbdev,
1196 JS_COMMAND_NOP, NULL);
1198 if (kbase_reg_read(kbdev,
1200 JS_HEAD_NEXT_LO), NULL)
1202 kbase_reg_read(kbdev,
1204 JS_HEAD_NEXT_HI), NULL)
1206 /* idx1 removed successfully,
1207 * will be handled in IRQ */
1208 kbase_gpu_remove_atom(kbdev,
1212 should_stop_x_dep_slot(katom_idx1);
1214 /* stop idx0 if still on GPU */
1215 kbase_gpu_stop_atom(kbdev, js,
1219 } else if (katom_idx1_valid) {
1220 /* idx0 has already completed,
1221 * stop idx1 if needed */
1222 kbase_gpu_stop_atom(kbdev, js,
1228 } else if (katom_idx1_valid) {
1229 /* idx1 not on GPU but must be dequeued*/
1231 /* idx1 will be handled in IRQ */
1232 kbase_gpu_remove_atom(kbdev, katom_idx1, action,
1235 /* This will be repeated for anything removed
1236 * from the next registers, since their normal
1237 * flow was also interrupted, and this function
1238 * might not enter disjoint state e.g. if we
1239 * don't actually do a hard stop on the head
1241 kbase_gpu_stop_atom(kbdev, js, katom_idx0,
1245 /* no atom in idx1 */
1246 /* just stop idx0 */
1247 kbase_gpu_stop_atom(kbdev, js, katom_idx0,
1252 } else if (katom_idx1_valid) {
1253 if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
1254 /* Mark for return */
1255 /* idx1 will be returned once idx0 completes */
1256 kbase_gpu_remove_atom(kbdev, katom_idx1, action,
1259 /* idx1 is on GPU */
1260 if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
1261 JS_COMMAND_NEXT), NULL) == 0) {
1262 /* idx0 has already completed - stop idx1 */
1263 kbase_gpu_stop_atom(kbdev, js, katom_idx1,
1267 /* idx1 is in NEXT registers - attempt to
1269 kbase_reg_write(kbdev, JOB_SLOT_REG(js,
1271 JS_COMMAND_NOP, NULL);
1273 if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
1274 JS_HEAD_NEXT_LO), NULL) != 0 ||
1275 kbase_reg_read(kbdev, JOB_SLOT_REG(js,
1276 JS_HEAD_NEXT_HI), NULL) != 0) {
1277 /* idx1 removed successfully, will be
1278 * handled in IRQ once idx0 completes */
1279 kbase_gpu_remove_atom(kbdev, katom_idx1,
1283 /* idx0 has already completed - stop
1285 kbase_gpu_stop_atom(kbdev, js,
1295 if (stop_x_dep_idx0 != -1)
1296 kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
1299 if (stop_x_dep_idx1 != -1)
1300 kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
1306 static void kbasep_gpu_cacheclean(struct kbase_device *kbdev,
1307 struct kbase_jd_atom *katom)
1309 /* Limit the number of loops to avoid a hang if the interrupt is missed
1311 u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
1313 mutex_lock(&kbdev->cacheclean_lock);
1315 /* use GPU_COMMAND completion solution */
1316 /* clean & invalidate the caches */
1317 KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
1318 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
1319 GPU_COMMAND_CLEAN_INV_CACHES, NULL);
1321 /* wait for cache flush to complete before continuing */
1322 while (--max_loops &&
1323 (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
1324 CLEAN_CACHES_COMPLETED) == 0)
1327 /* clear the CLEAN_CACHES_COMPLETED irq */
1328 KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
1329 CLEAN_CACHES_COMPLETED);
1330 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
1331 CLEAN_CACHES_COMPLETED, NULL);
1332 KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
1333 KBASE_INSTR_STATE_CLEANING,
1334 "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
1336 mutex_unlock(&kbdev->cacheclean_lock);
1338 kbase_pm_unrequest_cores(kbdev, false,
1339 katom->need_cache_flush_cores_retained);
1342 void kbase_backend_complete_wq(struct kbase_device *kbdev,
1343 struct kbase_jd_atom *katom)
1346 * If cache flush required due to HW workaround then perform the flush
1349 if (katom->need_cache_flush_cores_retained) {
1350 kbasep_gpu_cacheclean(kbdev, katom);
1351 katom->need_cache_flush_cores_retained = 0;
1354 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) &&
1355 (katom->core_req & BASE_JD_REQ_FS) &&
1356 katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT &&
1357 (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
1358 !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
1359 dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
1360 if (kbasep_10969_workaround_clamp_coordinates(katom)) {
1361 /* The job had a TILE_RANGE_FAULT after was soft-stopped
1362 * Due to an HW issue we try to execute the job again.
1365 "Clamping has been executed, try to rerun the job\n"
1367 katom->event_code = BASE_JD_EVENT_STOPPED;
1368 katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
1373 void kbase_gpu_dump_slots(struct kbase_device *kbdev)
1375 struct kbasep_js_device_data *js_devdata;
1376 unsigned long flags;
1379 js_devdata = &kbdev->js_data;
1381 spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
1383 dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
1385 for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
1388 for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
1389 struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
1394 dev_info(kbdev->dev,
1395 " js%d idx%d : katom=%p gpu_rb_state=%d\n",
1396 js, idx, katom, katom->gpu_rb_state);
1398 dev_info(kbdev->dev, " js%d idx%d : empty\n",
1403 spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);