3 * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * @file mali_kbase_defs.h
23 * Defintions (types, defines, etcs) common to Kbase. They are placed here to
24 * allow the hierarchy of header files to work.
27 #ifndef _KBASE_DEFS_H_
28 #define _KBASE_DEFS_H_
30 #include <mali_kbase_config.h>
31 #include <mali_base_hwconfig_features.h>
32 #include <mali_base_hwconfig_issues.h>
33 #include <mali_kbase_mem_lowlevel.h>
34 #include <mali_kbase_mmu_hw.h>
35 #include <mali_kbase_mmu_mode.h>
36 #include <mali_kbase_instr_defs.h>
38 #include <linux/atomic.h>
39 #include <linux/mempool.h>
40 #include <linux/slab.h>
41 #include <linux/file.h>
43 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
44 #include <linux/bus_logger.h>
49 #include <linux/kds.h>
50 #endif /* CONFIG_KDS */
54 #endif /* CONFIG_SYNC */
56 #include "mali_kbase_dma_fence.h"
58 #ifdef CONFIG_DEBUG_FS
59 #include <linux/debugfs.h>
60 #endif /* CONFIG_DEBUG_FS */
62 #ifdef CONFIG_PM_DEVFREQ
63 #include <linux/devfreq.h>
64 #endif /* CONFIG_DEVFREQ */
66 #include <linux/clk.h>
67 #include <linux/regulator/consumer.h>
69 #if defined(CONFIG_PM)
70 #define KBASE_PM_RUNTIME 1
73 /** Enable SW tracing when set */
74 #ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
75 #define KBASE_TRACE_ENABLE 1
78 #ifndef KBASE_TRACE_ENABLE
79 #ifdef CONFIG_MALI_DEBUG
80 #define KBASE_TRACE_ENABLE 1
82 #define KBASE_TRACE_ENABLE 0
83 #endif /* CONFIG_MALI_DEBUG */
84 #endif /* KBASE_TRACE_ENABLE */
86 /** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
87 #define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
90 * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
91 * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
92 * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
95 #define ZAP_TIMEOUT 1000
97 /** Number of milliseconds before we time out on a GPU soft/hard reset */
98 #define RESET_TIMEOUT 500
101 * Prevent soft-stops from occuring in scheduling situations
103 * This is not due to HW issues, but when scheduling is desired to be more predictable.
105 * Therefore, soft stop may still be disabled due to HW issues.
107 * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
109 * @note if not in use, define this value to 0 instead of \#undef'ing it
111 #define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
114 * Prevent hard-stops from occuring in scheduling situations
116 * This is not due to HW issues, but when scheduling is desired to be more predictable.
118 * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
120 * @note if not in use, define this value to 0 instead of \#undef'ing it
122 #define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
125 * The maximum number of Job Slots to support in the Hardware.
127 * You can optimize this down if your target devices will only ever support a
128 * small number of job slots.
130 #define BASE_JM_MAX_NR_SLOTS 3
133 * The maximum number of Address Spaces to support in the Hardware.
135 * You can optimize this down if your target devices will only ever support a
136 * small number of Address Spaces
138 #define BASE_MAX_NR_AS 16
141 #define MIDGARD_MMU_VA_BITS 48
143 #if MIDGARD_MMU_VA_BITS > 39
144 #define MIDGARD_MMU_TOPLEVEL 0
146 #define MIDGARD_MMU_TOPLEVEL 1
149 #define MIDGARD_MMU_BOTTOMLEVEL 3
151 #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
153 /** setting in kbase_context::as_nr that indicates it's invalid */
154 #define KBASEP_AS_NR_INVALID (-1)
156 #define KBASE_LOCK_REGION_MAX_SIZE (63)
157 #define KBASE_LOCK_REGION_MIN_SIZE (11)
159 #define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */
160 #define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
161 #define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
163 #include "mali_kbase_js_defs.h"
164 #include "mali_kbase_hwaccess_defs.h"
166 #define KBASEP_FORCE_REPLAY_DISABLED 0
168 /* Maximum force replay limit when randomization is enabled */
169 #define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
171 /** Atom has been previously soft-stoppped */
172 #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
173 /** Atom has been previously retried to execute */
174 #define KBASE_KATOM_FLAGS_RERUN (1<<2)
175 #define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
176 /** Atom has been previously hard-stopped. */
177 #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
178 /** Atom has caused us to enter disjoint state */
179 #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
180 /* Atom blocked on cross-slot dependency */
181 #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
182 /* Atom has fail dependency on cross-slot dependency */
183 #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
184 /* Atom is currently in the list of atoms blocked on cross-slot dependencies */
185 #define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
186 /* Atom is currently holding a context reference */
187 #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
188 /* Atom requires GPU to be in protected mode */
189 #define KBASE_KATOM_FLAG_PROTECTED (1<<11)
190 /* Atom has been stored in runnable_tree */
191 #define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
193 /* SW related flags about types of JS_COMMAND action
194 * NOTE: These must be masked off by JS_COMMAND_MASK */
196 /** This command causes a disjoint event */
197 #define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
199 /** Bitmask of all SW related flags */
200 #define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT)
202 #if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
203 #error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
206 /** Soft-stop command that causes a Disjoint event. This of course isn't
207 * entirely masked off by JS_COMMAND_MASK */
208 #define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
209 (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
211 #define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
213 #ifdef CONFIG_DEBUG_FS
214 struct base_job_fault_event {
217 struct kbase_jd_atom *katom;
218 struct work_struct job_fault_work;
219 struct list_head head;
225 struct kbase_jd_atom_dependency {
226 struct kbase_jd_atom *atom;
231 * @brief The function retrieves a read-only reference to the atom field from
232 * the kbase_jd_atom_dependency structure
234 * @param[in] dep kbase jd atom dependency.
236 * @return readonly reference to dependent ATOM.
238 static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
240 LOCAL_ASSERT(dep != NULL);
242 return (const struct kbase_jd_atom *)(dep->atom);
246 * @brief The function retrieves a read-only reference to the dependency type field from
247 * the kbase_jd_atom_dependency structure
249 * @param[in] dep kbase jd atom dependency.
251 * @return A dependency type value.
253 static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
255 LOCAL_ASSERT(dep != NULL);
257 return dep->dep_type;
261 * @brief Setter macro for dep_atom array entry in kbase_jd_atom
263 * @param[in] dep The kbase jd atom dependency.
264 * @param[in] a The ATOM to be set as a dependency.
265 * @param type The ATOM dependency type to be set.
268 static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
269 struct kbase_jd_atom *a, u8 type)
271 struct kbase_jd_atom_dependency *dep;
273 LOCAL_ASSERT(const_dep != NULL);
275 dep = (struct kbase_jd_atom_dependency *)const_dep;
278 dep->dep_type = type;
282 * @brief Setter macro for dep_atom array entry in kbase_jd_atom
284 * @param[in] dep The kbase jd atom dependency to be cleared.
287 static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
289 struct kbase_jd_atom_dependency *dep;
291 LOCAL_ASSERT(const_dep != NULL);
293 dep = (struct kbase_jd_atom_dependency *)const_dep;
296 dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
299 enum kbase_atom_gpu_rb_state {
300 /* Atom is not currently present in slot ringbuffer */
301 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
302 /* Atom is in slot ringbuffer but is blocked on a previous atom */
303 KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
304 /* Atom is in slot ringbuffer but is waiting for proected mode exit */
305 KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT,
306 /* Atom is in slot ringbuffer but is waiting for cores to become
308 KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
309 /* Atom is in slot ringbuffer but is blocked on affinity */
310 KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
311 /* Atom is in slot ringbuffer but is waiting for protected mode entry */
312 KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY,
313 /* Atom is in slot ringbuffer and ready to run */
314 KBASE_ATOM_GPU_RB_READY,
315 /* Atom is in slot ringbuffer and has been submitted to the GPU */
316 KBASE_ATOM_GPU_RB_SUBMITTED,
317 /* Atom must be returned to JS as soon as it reaches the head of the
318 * ringbuffer due to a previous failure */
319 KBASE_ATOM_GPU_RB_RETURN_TO_JS
322 enum kbase_atom_exit_protected_state {
325 * Check if a transition out of protected mode is required.
327 KBASE_ATOM_EXIT_PROTECTED_CHECK,
328 /* Wait for the L2 to become idle in preparation for the reset. */
329 KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
330 /* Issue the protected reset. */
331 KBASE_ATOM_EXIT_PROTECTED_RESET,
334 * Wait for the reset to complete.
336 KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
339 struct kbase_ext_res {
341 struct kbase_mem_phy_alloc *alloc;
344 struct kbase_jd_atom {
345 struct work_struct work;
346 ktime_t start_timestamp;
347 u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
349 struct base_jd_udata udata;
350 struct kbase_context *kctx;
352 struct list_head dep_head[2];
353 struct list_head dep_item[2];
354 const struct kbase_jd_atom_dependency dep[2];
355 /* List head used during job dispatch job_done processing - as
356 * dependencies may not be entirely resolved at this point, we need to
357 * use a separate list head. */
358 struct list_head jd_item;
359 /* true if atom's jd_item is currently on a list. Prevents atom being
360 * processed twice. */
364 struct kbase_ext_res *extres;
369 enum kbase_atom_coreref_state coreref_state;
371 struct list_head node;
372 struct kds_resource_set *kds_rset;
373 bool kds_dep_satisfied;
374 #endif /* CONFIG_KDS */
376 struct sync_fence *fence;
377 struct sync_fence_waiter sync_waiter;
378 #endif /* CONFIG_SYNC */
379 #ifdef CONFIG_MALI_DMA_FENCE
381 /* This points to the dma-buf fence for this atom. If this is
382 * NULL then there is no fence for this atom and the other
383 * fields related to dma_fence may have invalid data.
385 * The context and seqno fields contain the details for this
388 * This fence is signaled when the katom is completed,
389 * regardless of the event_code of the katom (signal also on
393 /* The dma-buf fence context number for this atom. A unique
394 * context number is allocated to each katom in the context on
397 unsigned int context;
398 /* The dma-buf fence sequence number for this atom. This is
399 * increased every time this katom uses dma-buf fence.
402 /* This contains a list of all callbacks set up to wait on
403 * other fences. This atom must be held back from JS until all
404 * these callbacks have been called and dep_count have reached
405 * 0. The initial value of dep_count must be equal to the
406 * number of callbacks on this list.
408 * This list is protected by jctx.lock. Callbacks are added to
409 * this list when the atom is built and the wait are set up.
410 * All the callbacks then stay on the list until all callbacks
411 * have been called and the atom is queued, or cancelled, and
412 * then all callbacks are taken off the list and freed.
414 struct list_head callbacks;
415 /* Atomic counter of number of outstandind dma-buf fence
416 * dependencies for this atom. When dep_count reaches 0 the
417 * atom may be queued.
419 * The special value "-1" may only be set after the count
420 * reaches 0, while holding jctx.lock. This indicates that the
421 * atom has been handled, either queued in JS or cancelled.
423 * If anyone but the dma-fence worker sets this to -1 they must
424 * ensure that any potentially queued worker must have
425 * completed before allowing the atom to be marked as unused.
426 * This can be done by flushing the fence work queue:
427 * kctx->dma_fence.wq.
431 #endif /* CONFIG_MALI_DMA_FENCE */
433 /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
434 enum base_jd_event_code event_code;
435 base_jd_core_req core_req; /**< core requirements */
436 /** Job Slot to retry submitting to if submission from IRQ handler failed
438 * NOTE: see if this can be unified into the another member e.g. the event */
439 int retry_submit_on_slot;
441 union kbasep_js_policy_job_info sched_info;
442 /* JS atom priority with respect to other atoms on its kctx. */
445 int poking; /* BASE_HW_ISSUE_8316 */
447 wait_queue_head_t completed;
448 enum kbase_jd_atom_state status;
449 #ifdef CONFIG_GPU_TRACEPOINTS
452 /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
457 /* Number of times this atom has been retried. Used by replay soft job.
461 enum kbase_atom_gpu_rb_state gpu_rb_state;
463 u64 need_cache_flush_cores_retained;
467 /* Pointer to atom that this atom has same-slot dependency on */
468 struct kbase_jd_atom *pre_dep;
469 /* Pointer to atom that has same-slot dependency on this atom */
470 struct kbase_jd_atom *post_dep;
472 /* Pointer to atom that this atom has cross-slot dependency on */
473 struct kbase_jd_atom *x_pre_dep;
474 /* Pointer to atom that has cross-slot dependency on this atom */
475 struct kbase_jd_atom *x_post_dep;
477 /* The GPU's flush count recorded at the time of submission, used for
478 * the cache flush optimisation */
481 struct kbase_jd_atom_backend backend;
482 #ifdef CONFIG_DEBUG_FS
483 struct base_job_fault_event fault_event;
486 /* List head used for two different purposes:
487 * 1. Overflow list for JS ring buffers. If an atom is ready to run,
488 * but there is no room in the JS ring buffer, then the atom is put
489 * on the ring buffer's overflow list using this list node.
490 * 2. List of waiting soft jobs.
492 struct list_head queue;
494 struct kbase_va_region *jit_addr_reg;
496 /* If non-zero, this indicates that the atom will fail with the set
497 * event_code when the atom is processed. */
498 enum base_jd_event_code will_fail_event_code;
500 enum kbase_atom_exit_protected_state exit_protected_state;
502 struct rb_node runnable_tree_node;
504 /* 'Age' of atom relative to other atoms in the context. */
508 static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
510 return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
514 * Theory of operations:
516 * Atom objects are statically allocated within the context structure.
518 * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
521 #define KBASE_JD_DEP_QUEUE_SIZE 256
523 struct kbase_jd_context {
525 struct kbasep_js_kctx_info sched_info;
526 struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
528 /** Tracks all job-dispatch jobs. This includes those not tracked by
529 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
532 /** Waitq that reflects whether there are no jobs (including SW-only
533 * dependency jobs). This is set when no jobs are present on the ctx,
534 * and clear when there are jobs.
536 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
537 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
538 * and SW-only dependency jobs.
540 * This waitq can be waited upon to find out when the context jobs are all
541 * done/cancelled (including those that might've been blocked on
542 * dependencies) - and so, whether it can be terminated. However, it should
543 * only be terminated once it is neither present in the policy-queue (see
544 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
545 * kbasep_js_kctx_info::ctx::is_scheduled).
547 * Since the waitq is only set under kbase_jd_context::lock,
548 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
549 * guarentee that the setter has completed its work on the kbase_context
551 * This must be updated atomically with:
552 * - kbase_jd_context::job_nr */
553 wait_queue_head_t zero_jobs_wait;
555 /** Job Done workqueue. */
556 struct workqueue_struct *job_done_wq;
560 size_t tb_wrap_offset;
563 struct kds_callback kds_cb;
564 #endif /* CONFIG_KDS */
565 #ifdef CONFIG_GPU_TRACEPOINTS
570 struct kbase_device_info {
574 /** Poking state for BASE_HW_ISSUE_8316 */
576 KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0,
577 KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1
580 /** Poking state for BASE_HW_ISSUE_8316 */
581 typedef u32 kbase_as_poke_state;
583 struct kbase_mmu_setup {
590 * Important: Our code makes assumptions that a struct kbase_as structure is always at
591 * kbase_device->as[number]. This is used to recover the containing
592 * struct kbase_device from a struct kbase_as structure.
594 * Therefore, struct kbase_as structures must not be allocated anywhere else.
599 struct workqueue_struct *pf_wq;
600 struct work_struct work_pagefault;
601 struct work_struct work_busfault;
602 enum kbase_mmu_fault_type fault_type;
605 u64 fault_extra_addr;
606 struct mutex transaction_mutex;
608 struct kbase_mmu_setup current_setup;
610 /* BASE_HW_ISSUE_8316 */
611 struct workqueue_struct *poke_wq;
612 struct work_struct poke_work;
613 /** Protected by kbasep_js_device_data::runpool_irq::lock */
615 /** Protected by kbasep_js_device_data::runpool_irq::lock */
616 kbase_as_poke_state poke_state;
617 struct hrtimer poke_timer;
620 static inline int kbase_as_has_bus_fault(struct kbase_as *as)
622 return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
625 static inline int kbase_as_has_page_fault(struct kbase_as *as)
627 return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
630 struct kbasep_mem_device {
631 atomic_t used_pages; /* Tracks usage of OS shared memory. Updated
632 when OS memory is allocated/freed. */
636 #define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
638 enum kbase_trace_code {
639 /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
640 * THIS MUST BE USED AT THE START OF THE ENUM */
641 #define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
642 #include "mali_kbase_trace_defs.h"
643 #undef KBASE_TRACE_CODE_MAKE_CODE
644 /* Comma on its own, to extend the list */
646 /* Must be the last in the enum */
647 KBASE_TRACE_CODE_COUNT
650 #define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
651 #define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1)
654 struct timespec timestamp;
662 unsigned long info_val;
669 /** Event IDs for the power management framework.
671 * Any of these events might be missed, so they should not be relied upon to
672 * find the precise state of the GPU at a particular time in the
673 * trace. Overall, we should get a high percentage of these events for
674 * statisical purposes, and so a few missing should not be a problem */
675 enum kbase_timeline_pm_event {
676 /* helper for tests */
677 KBASEP_TIMELINE_PM_EVENT_FIRST,
679 /** Event reserved for backwards compatibility with 'init' events */
680 KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
682 /** The power state of the device has changed.
684 * Specifically, the device has reached a desired or available state.
686 KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
688 /** The GPU is becoming active.
690 * This event is sent when the first context is about to use the GPU.
692 KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
694 /** The GPU is becoming idle.
696 * This event is sent when the last context has finished using the GPU.
698 KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
700 /** Event reserved for backwards compatibility with 'policy_change'
702 KBASE_TIMELINE_PM_EVENT_RESERVED_4,
704 /** Event reserved for backwards compatibility with 'system_suspend'
706 KBASE_TIMELINE_PM_EVENT_RESERVED_5,
708 /** Event reserved for backwards compatibility with 'system_resume'
710 KBASE_TIMELINE_PM_EVENT_RESERVED_6,
712 /** The job scheduler is requesting to power up/down cores.
714 * This event is sent when:
715 * - powered down cores are needed to complete a job
716 * - powered up cores are not needed anymore
718 KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
720 KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
723 #ifdef CONFIG_MALI_TRACE_TIMELINE
724 struct kbase_trace_kctx_timeline {
725 atomic_t jd_atoms_in_flight;
729 struct kbase_trace_kbdev_timeline {
730 /* Note: strictly speaking, not needed, because it's in sync with
731 * kbase_device::jm_slots[]::submitted_nr
733 * But it's kept as an example of how to add global timeline tracking
736 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
738 u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
740 /* Last UID for each PM event */
741 atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
742 /* Counter for generating PM event UIDs */
743 atomic_t pm_event_uid_counter;
745 * L2 transition state - true indicates that the transition is ongoing
746 * Expected to be protected by pm.power_change_lock */
747 bool l2_transitioning;
749 #endif /* CONFIG_MALI_TRACE_TIMELINE */
752 struct kbasep_kctx_list_element {
753 struct list_head link;
754 struct kbase_context *kctx;
758 * Data stored per device for power management.
760 * This structure contains data for the power management framework. There is one
761 * instance of this structure per device in the system.
763 struct kbase_pm_device_data {
765 * The lock protecting Power Management structures accessed outside of
768 * This lock must also be held whenever the GPU is being powered on or
773 /** The reference count of active contexts on this device. */
775 /** Flag indicating suspending/suspended */
777 /* Wait queue set when active_count == 0 */
778 wait_queue_head_t zero_active_count_wait;
781 * Bit masks identifying the available shader cores that are specified
782 * via sysfs. One mask per job slot.
784 u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
785 u64 debug_core_mask_all;
788 * Lock protecting the power state of the device.
790 * This lock must be held when accessing the shader_available_bitmap,
791 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
792 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
793 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
794 * also held when the hardware power registers are being written to, to
795 * ensure that two threads do not conflict over the power transitions
796 * that the hardware should make.
798 spinlock_t power_change_lock;
801 * Callback for initializing the runtime power management.
803 * @param kbdev The kbase device
805 * @return 0 on success, else error code
807 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
810 * Callback for terminating the runtime power management.
812 * @param kbdev The kbase device
814 void (*callback_power_runtime_term)(struct kbase_device *kbdev);
816 /* Time in milliseconds between each dvfs sample */
819 /* Period of GPU poweroff timer */
820 ktime_t gpu_poweroff_time;
822 /* Number of ticks of GPU poweroff timer before shader is powered off */
823 int poweroff_shader_ticks;
825 /* Number of ticks of GPU poweroff timer before GPU is powered off */
826 int poweroff_gpu_ticks;
828 struct kbase_pm_backend_data backend;
832 * struct kbase_protected_ops - Platform specific functions for GPU protected
834 * @protected_mode_enter: Callback to enter protected mode on the GPU
835 * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
836 * @protected_mode_supported: Callback to check if protected mode is supported.
838 struct kbase_protected_ops {
840 * protected_mode_enter() - Enter protected mode on the GPU
841 * @kbdev: The kbase device
843 * Return: 0 on success, non-zero on error
845 int (*protected_mode_enter)(struct kbase_device *kbdev);
848 * protected_mode_reset() - Reset the GPU and exit protected mode
849 * @kbdev: The kbase device
851 * Return: 0 on success, non-zero on error
853 int (*protected_mode_reset)(struct kbase_device *kbdev);
856 * protected_mode_supported() - Check if protected mode is supported
857 * @kbdev: The kbase device
859 * Return: 0 on success, non-zero on error
861 bool (*protected_mode_supported)(struct kbase_device *kbdev);
866 * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
867 * @kbdev: Kbase device where memory is used
868 * @cur_size: Number of free pages currently in the pool (may exceed @max_size
869 * in some corner cases)
870 * @max_size: Maximum number of free pages in the pool
871 * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
873 * @page_list: List of free pages in the pool
874 * @reclaim: Shrinker for kernel reclaim of free pages
875 * @next_pool: Pointer to next pool where pages can be allocated when this pool
876 * is empty. Pages will spill over to the next pool when this pool
877 * is full. Can be NULL if there is no next pool.
879 struct kbase_mem_pool {
880 struct kbase_device *kbdev;
883 spinlock_t pool_lock;
884 struct list_head page_list;
885 struct shrinker reclaim;
887 struct kbase_mem_pool *next_pool;
891 #define DEVNAME_SIZE 16
893 struct kbase_device {
894 s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
901 struct list_head entry;
903 unsigned int kbase_group_error;
904 struct miscdevice mdev;
917 * current freq of clk_gpu, in Hz.
921 * mutex for setting freq of clk_gpu.
923 struct mutex mutex_for_clk;
926 #ifdef CONFIG_REGULATOR
927 struct regulator *regulator;
929 char devname[DEVNAME_SIZE];
931 #ifdef CONFIG_MALI_NO_MALI
933 struct kmem_cache *irq_slab;
934 struct workqueue_struct *irq_workq;
935 atomic_t serving_job_irq;
936 atomic_t serving_gpu_irq;
937 atomic_t serving_mmu_irq;
938 spinlock_t reg_op_lock;
939 #endif /* CONFIG_MALI_NO_MALI */
941 struct kbase_pm_device_data pm;
942 struct kbasep_js_device_data js_data;
943 struct kbase_mem_pool mem_pool;
944 struct kbasep_mem_device memdev;
945 struct kbase_mmu_mode const *mmu_mode;
947 struct kbase_as as[BASE_MAX_NR_AS];
949 spinlock_t mmu_mask_change;
951 struct kbase_gpu_props gpu_props;
953 /** List of SW workarounds for HW issues */
954 unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
955 /** List of features available */
956 unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
958 /* Bitmaps of cores that are currently in use (running jobs).
959 * These should be kept up to date by the job scheduler.
961 * pm.power_change_lock should be held when accessing these members.
963 * kbase_pm_check_transitions_nolock() should be called when bits are
964 * cleared to update the power management system and allow transitions to
966 u64 shader_inuse_bitmap;
968 /* Refcount for cores in use */
969 u32 shader_inuse_cnt[64];
971 /* Bitmaps of cores the JS needs for jobs ready to run */
972 u64 shader_needed_bitmap;
974 /* Refcount for cores needed */
975 u32 shader_needed_cnt[64];
979 u32 tiler_needed_cnt;
981 /* struct for keeping track of the disjoint information
983 * The state is > 0 if the GPU is in a disjoint state. Otherwise 0
984 * The count is the number of disjoint events that have occurred on the GPU
991 /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
994 /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
995 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
996 * submitting new jobs to any cores that are not marked as available.
998 * pm.power_change_lock should be held when accessing these members.
1000 u64 shader_available_bitmap;
1001 u64 tiler_available_bitmap;
1002 u64 l2_available_bitmap;
1004 u64 shader_ready_bitmap;
1005 u64 shader_transitioning_bitmap;
1007 s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */
1008 s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */
1010 /* Structure used for instrumentation and HW counters dumping */
1011 struct kbase_hwcnt {
1012 /* The lock should be used when accessing any of the following members */
1015 struct kbase_context *kctx;
1018 struct kbase_instr_backend backend;
1021 struct kbase_vinstr_context *vinstr_ctx;
1023 /*value to be written to the irq_throttle register each time an irq is served */
1024 atomic_t irq_throttle_cycles;
1026 #if KBASE_TRACE_ENABLE
1027 spinlock_t trace_lock;
1028 u16 trace_first_out;
1030 struct kbase_trace *trace_rbuf;
1033 u32 reset_timeout_ms;
1035 struct mutex cacheclean_lock;
1037 /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
1038 void *platform_context;
1040 /* List of kbase_contexts created */
1041 struct list_head kctx_list;
1042 struct mutex kctx_list_lock;
1044 #ifdef CONFIG_PM_DEVFREQ
1045 struct devfreq_dev_profile devfreq_profile;
1046 struct devfreq *devfreq;
1047 unsigned long current_freq;
1048 unsigned long current_voltage;
1049 #ifdef CONFIG_DEVFREQ_THERMAL
1050 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
1051 struct devfreq_cooling_device *devfreq_cooling;
1053 struct thermal_cooling_device *devfreq_cooling;
1058 struct kbase_ipa_context *ipa_ctx;
1060 #ifdef CONFIG_MALI_TRACE_TIMELINE
1061 struct kbase_trace_kbdev_timeline timeline;
1065 * Control for enabling job dump on failure, set when control debugfs
1068 bool job_fault_debug;
1070 #ifdef CONFIG_DEBUG_FS
1071 /* directory for debugfs entries */
1072 struct dentry *mali_debugfs_directory;
1073 /* Root directory for per context entry */
1074 struct dentry *debugfs_ctx_directory;
1076 #ifdef CONFIG_MALI_DEBUG
1077 /* bit for each as, set if there is new data to report */
1078 u64 debugfs_as_read_bitmap;
1079 #endif /* CONFIG_MALI_DEBUG */
1081 /* failed job dump, used for separate debug process */
1082 wait_queue_head_t job_fault_wq;
1083 wait_queue_head_t job_fault_resume_wq;
1084 struct workqueue_struct *job_fault_resume_workq;
1085 struct list_head job_fault_event_list;
1086 spinlock_t job_fault_event_lock;
1087 struct kbase_context *kctx_fault;
1089 #if !MALI_CUSTOMER_RELEASE
1090 /* Per-device data for register dumping interface */
1092 u16 reg_offset; /* Offset of a GPU_CONTROL register to be
1093 dumped upon request */
1094 } regs_dump_debugfs_data;
1095 #endif /* !MALI_CUSTOMER_RELEASE */
1096 #endif /* CONFIG_DEBUG_FS */
1098 /* fbdump profiling controls set by gator */
1099 u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
1102 #if MALI_CUSTOMER_RELEASE == 0
1103 /* Number of jobs that are run before a job is forced to fail and
1104 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
1106 int force_replay_limit;
1107 /* Count of jobs between forced failures. Incremented on each job. A
1108 * job is forced to fail once this is greater than or equal to
1109 * force_replay_limit. */
1110 int force_replay_count;
1111 /* Core requirement for jobs to be failed and replayed. May be zero. */
1112 base_jd_core_req force_replay_core_req;
1113 /* true if force_replay_limit should be randomized. The random
1114 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
1116 bool force_replay_random;
1119 /* Total number of created contexts */
1122 struct kbase_hwaccess_data hwaccess;
1124 /* Count of page/bus faults waiting for workqueues to process */
1125 atomic_t faults_pending;
1127 /* true if GPU is powered off or power off operation is in progress */
1128 bool poweroff_pending;
1131 /* defaults for new context created for this device */
1132 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
1133 bool infinite_cache_active_default;
1135 u32 infinite_cache_active_default;
1137 size_t mem_pool_max_size_default;
1139 /* system coherency mode */
1140 u32 system_coherency;
1141 /* Flag to track when cci snoops have been enabled on the interface */
1142 bool cci_snoop_enabled;
1144 /* SMC function IDs to call into Trusted firmware to enable/disable
1145 * cache snooping. Value of 0 indicates that they are not used
1147 u32 snoop_enable_smc;
1148 u32 snoop_disable_smc;
1150 /* Protected operations */
1151 struct kbase_protected_ops *protected_ops;
1154 * true when GPU is put into protected mode
1156 bool protected_mode;
1159 * true when GPU is transitioning into or out of protected mode
1161 bool protected_mode_transition;
1164 * true if protected mode is supported
1166 bool protected_mode_support;
1169 #ifdef CONFIG_MALI_DEBUG
1170 wait_queue_head_t driver_inactive_wait;
1171 bool driver_inactive;
1172 #endif /* CONFIG_MALI_DEBUG */
1174 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
1176 * Bus logger integration.
1178 struct bus_logger_client *buslogger;
1180 /* Boolean indicating if an IRQ flush during reset is in progress. */
1181 bool irq_reset_flush;
1183 /* list of inited sub systems. Used during terminate/error recovery */
1188 * struct jsctx_queue - JS context atom queue
1189 * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
1191 * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot
1192 * dependencies. Atoms on this list will be moved to the
1193 * runnable_tree when the blocking atom completes.
1195 * runpool_irq.lock must be held when accessing this structure.
1197 struct jsctx_queue {
1198 struct rb_root runnable_tree;
1199 struct list_head x_dep_head;
1203 #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
1204 (((minor) & 0xFFF) << 8) | \
1207 struct kbase_context {
1209 struct kbase_device *kbdev;
1210 int id; /* System wide unique id */
1211 unsigned long api_version;
1213 struct list_head event_list;
1214 struct list_head event_coalesce_list;
1215 struct mutex event_mutex;
1216 atomic_t event_closed;
1217 struct workqueue_struct *event_workq;
1218 atomic_t event_count;
1219 int event_coalesce_count;
1223 atomic_t setup_complete;
1224 atomic_t setup_in_progress;
1226 u64 *mmu_teardown_pages;
1228 struct page *aliasing_sink_page;
1230 struct mutex mmu_lock;
1231 struct mutex reg_lock; /* To be converted to a rwlock? */
1232 struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
1234 unsigned long cookies;
1235 struct kbase_va_region *pending_regions[BITS_PER_LONG];
1237 wait_queue_head_t event_queue;
1241 struct kbase_jd_context jctx;
1242 atomic_t used_pages;
1243 atomic_t nonmapped_pages;
1245 struct kbase_mem_pool mem_pool;
1247 struct shrinker reclaim;
1248 struct list_head evict_list;
1249 struct mutex evict_lock;
1251 struct list_head waiting_soft_jobs;
1252 spinlock_t waiting_soft_jobs_lock;
1254 struct list_head waiting_kds_resource;
1256 #ifdef CONFIG_MALI_DMA_FENCE
1258 struct list_head waiting_resource;
1259 struct workqueue_struct *wq;
1261 #endif /* CONFIG_MALI_DMA_FENCE */
1262 /** This is effectively part of the Run Pool, because it only has a valid
1263 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
1265 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
1268 * If the context relating to this as_nr is required, you must use
1269 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
1270 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
1271 * to ensure the context doesn't disappear (but this has restrictions on what other locks
1272 * you can take whilst doing this) */
1277 * Flags are in jctx.sched_info.ctx.flags
1278 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
1280 * All other flags must be added there */
1281 spinlock_t mm_update_lock;
1282 struct mm_struct *process_mm;
1283 /* End of the SAME_VA zone */
1286 #ifdef CONFIG_MALI_TRACE_TIMELINE
1287 struct kbase_trace_kctx_timeline timeline;
1289 #ifdef CONFIG_DEBUG_FS
1290 /* Content of mem_profile file */
1291 char *mem_profile_data;
1292 /* Size of @c mem_profile_data */
1293 size_t mem_profile_size;
1294 /* Mutex guarding memory profile state */
1295 struct mutex mem_profile_lock;
1296 /* Memory profile file created */
1297 bool mem_profile_initialized;
1298 struct dentry *kctx_dentry;
1300 /* for job fault debug */
1301 unsigned int *reg_dump;
1302 atomic_t job_fault_count;
1303 /* This list will keep the following atoms during the dump
1304 * in the same context
1306 struct list_head job_fault_resume_event_list;
1308 #endif /* CONFIG_DEBUG_FS */
1310 struct jsctx_queue jsctx_queue
1311 [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
1313 /* Number of atoms currently pulled from this context */
1314 atomic_t atoms_pulled;
1315 /* Number of atoms currently pulled from this context, per slot */
1316 atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
1317 /* true if last kick() caused atoms to be pulled from this context */
1319 /* true if infinite cache is to be enabled for new allocations. Existing
1320 * allocations will not change. bool stored as a u32 per Linux API */
1321 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
1322 bool infinite_cache_active;
1324 u32 infinite_cache_active;
1326 /* Bitmask of slots that can be pulled from */
1329 /* Backend specific data */
1330 struct kbase_context_backend backend;
1332 /* Work structure used for deferred ASID assignment */
1333 struct work_struct work;
1335 /* Only one userspace vinstr client per kbase context */
1336 struct kbase_vinstr_client *vinstr_cli;
1337 struct mutex vinstr_cli_lock;
1339 /* Must hold queue_mutex when accessing */
1342 /* List of completed jobs waiting for events to be posted */
1343 struct list_head completed_jobs;
1344 /* Number of work items currently pending on job_done_wq */
1345 atomic_t work_count;
1347 /* true if context is counted in kbdev->js_data.nr_contexts_runnable */
1348 bool ctx_runnable_ref;
1350 /* Waiting soft-jobs will fail when this timer expires */
1351 struct timer_list soft_job_timeout;
1353 /* JIT allocation management */
1354 struct kbase_va_region *jit_alloc[256];
1355 struct list_head jit_active_head;
1356 struct list_head jit_pool_head;
1357 struct list_head jit_destroy_head;
1358 struct mutex jit_lock;
1359 struct work_struct jit_work;
1361 /* External sticky resource management */
1362 struct list_head ext_res_meta_head;
1364 /* Used to record that a drain was requested from atomic context */
1365 atomic_t drain_pending;
1367 /* Current age count, used to determine age for newly submitted atoms */
1372 * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
1373 * to a @kbase_context.
1374 * @ext_res_node: List head for adding the metadata to a
1376 * @alloc: The physical memory allocation structure
1378 * @gpu_addr: The GPU virtual address the resource is
1381 * External resources can be mapped into multiple contexts as well as the same
1382 * context multiple times.
1383 * As kbase_va_region itself isn't refcounted we can't attach our extra
1384 * information to it as it could be removed under our feet leaving external
1386 * This metadata structure binds a single external resource to a single
1387 * context, ensuring that per context mapping is tracked separately so it can
1388 * be overridden when needed and abuses by the application (freeing the resource
1389 * multiple times) don't effect the refcount of the physical allocation.
1391 struct kbase_ctx_ext_res_meta {
1392 struct list_head ext_res_node;
1393 struct kbase_mem_phy_alloc *alloc;
1397 enum kbase_reg_access_type {
1402 enum kbase_share_attr_bits {
1403 /* (1ULL << 8) bit is reserved */
1404 SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */
1405 SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
1409 * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
1410 * @kbdev: kbase device
1412 * Return: true if the device access are coherent, false if not.
1414 static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
1416 if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
1417 (kbdev->system_coherency == COHERENCY_ACE))
1423 /* Conversion helpers for setting up high resolution timers */
1424 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
1425 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
1427 /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
1428 #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000
1429 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
1430 #define KBASE_AS_INACTIVE_MAX_LOOPS 100000
1432 /* Maximum number of times a job can be replayed */
1433 #define BASEP_JD_REPLAY_LIMIT 15
1435 /* JobDescriptorHeader - taken from the architecture specifications, the layout
1436 * is currently identical for all GPU archs. */
1437 struct job_descriptor_header {
1438 u32 exception_status;
1439 u32 first_incomplete_task;
1441 u8 job_descriptor_size : 1;
1444 u8 _reserved_01 : 1;
1446 u8 _reserved_02 : 1;
1447 u8 _reserved_03 : 1;
1449 u8 _reserved_04 : 1;
1450 u8 _reserved_05 : 1;
1452 u16 job_dependency_index_1;
1453 u16 job_dependency_index_2;
1460 #endif /* _KBASE_DEFS_H_ */