3 * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * @file mali_kbase_js.h
22 * Job Scheduler Type Definitions
25 #ifndef _KBASE_JS_DEFS_H_
26 #define _KBASE_JS_DEFS_H_
29 * @addtogroup base_api
34 * @addtogroup base_kbase_api
39 * @addtogroup kbase_js
47 /* Types used by the policies must go here */
49 /** Context will not submit any jobs */
50 KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
52 /** Set if the context uses an address space and should be kept scheduled in */
53 KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
55 /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
58 typedef u32 kbase_context_flags;
60 struct kbasep_atom_req {
61 base_jd_core_req core_req;
62 kbase_context_flags ctx_req;
66 #include "mali_kbase_js_policy_cfs.h"
68 /* Wrapper Interface - doxygen is elsewhere */
69 union kbasep_js_policy {
70 struct kbasep_js_policy_cfs cfs;
73 /* Wrapper Interface - doxygen is elsewhere */
74 union kbasep_js_policy_ctx_info {
75 struct kbasep_js_policy_cfs_ctx cfs;
78 /* Wrapper Interface - doxygen is elsewhere */
79 union kbasep_js_policy_job_info {
80 struct kbasep_js_policy_cfs_job cfs;
84 /** Callback function run on all of a context's jobs registered with the Job
86 typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
89 * @brief Maximum number of jobs that can be submitted to a job slot whilst
90 * inside the IRQ handler.
92 * This is important because GPU NULL jobs can complete whilst the IRQ handler
93 * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
94 * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
96 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
99 * @brief the IRQ_THROTTLE time in microseconds
101 * This will be converted via the GPU's clock frequency into a cycle-count.
103 * @note we can make an estimate of the GPU's frequency by periodically
104 * sampling its CYCLE_COUNT register
106 #define KBASE_JS_IRQ_THROTTLE_TIME_US 20
109 * @brief Context attributes
111 * Each context attribute can be thought of as a boolean value that caches some
112 * state information about either the runpool, or the context:
113 * - In the case of the runpool, it is a cache of "Do any contexts owned by
114 * the runpool have attribute X?"
115 * - In the case of a context, it is a cache of "Do any atoms owned by the
116 * context have attribute X?"
118 * The boolean value of the context attributes often affect scheduling
119 * decisions, such as affinities to use and job slots to use.
121 * To accomodate changes of state in the context, each attribute is refcounted
122 * in the context, and in the runpool for all running contexts. Specifically:
123 * - The runpool holds a refcount of how many contexts in the runpool have this
125 * - The context holds a refcount of how many atoms have this attribute.
127 enum kbasep_js_ctx_attr {
128 /** Attribute indicating a context that contains Compute jobs. That is,
129 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
131 * @note A context can be both 'Compute' and 'Non Compute' if it contains
132 * both types of jobs.
134 KBASEP_JS_CTX_ATTR_COMPUTE,
136 /** Attribute indicating a context that contains Non-Compute jobs. That is,
137 * the context has some jobs that are \b not of type @ref
138 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
139 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
140 * workarounds in use in the Job Scheduling Policy.
142 * @note A context can be both 'Compute' and 'Non Compute' if it contains
143 * both types of jobs.
145 KBASEP_JS_CTX_ATTR_NON_COMPUTE,
147 /** Attribute indicating that a context contains compute-job atoms that
148 * aren't restricted to a coherent group, and can run on all cores.
150 * Specifically, this is when the atom's \a core_req satisfy:
151 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
152 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
154 * Such atoms could be blocked from running if one of the coherent groups
155 * is being used by another job slot, so tracking this context attribute
156 * allows us to prevent such situations.
158 * @note This doesn't take into account the 1-coregroup case, where all
159 * compute atoms would effectively be able to run on 'all cores', but
160 * contexts will still not always get marked with this attribute. Instead,
161 * it is the caller's responsibility to take into account the number of
162 * coregroups when interpreting this attribute.
164 * @note Whilst Tiler atoms are normally combined with
165 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
166 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
167 * enough to handle anyway.
169 KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
171 /** Must be the last in the enum */
172 KBASEP_JS_CTX_ATTR_COUNT
176 /** Bit indicating that new atom should be started because this atom completed */
177 KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
178 /** Bit indicating that the atom was evicted from the JS_NEXT registers */
179 KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
182 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */
183 typedef u32 kbasep_js_atom_done_code;
186 * Data used by the scheduler that is unique for each Address Space.
188 * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
189 * must be held whilst accessing this data (inculding reads and atomic
190 * decisions based on the read).
192 struct kbasep_js_per_as_data {
194 * Ref count of whether this AS is busy, and must not be scheduled out
196 * When jobs are running this is always positive. However, it can still be
197 * positive when no jobs are running. If all you need is a heuristic to
198 * tell you whether jobs might be running, this should be sufficient.
200 int as_busy_refcount;
202 /** Pointer to the current context on this address space, or NULL for no context */
203 struct kbase_context *kctx;
207 * @brief KBase Device Data Job Scheduler sub-structure
209 * This encapsulates the current context of the Job Scheduler on a particular
210 * device. This context is global to the device, and is not tied to any
211 * particular struct kbase_context running on the device.
213 * nr_contexts_running and as_free are optimized for packing together (by making
214 * them smaller types than u32). The operations on them should rarely involve
215 * masking. The use of signed types for arithmetic indicates to the compiler that
216 * the value will not rollover (which would be undefined behavior), and so under
217 * the Total License model, it is free to make optimizations based on that (i.e.
218 * to remove masking).
220 struct kbasep_js_device_data {
221 /** Sub-structure to collect together Job Scheduling data used in IRQ context */
224 * Lock for accessing Job Scheduling data used in IRQ context
226 * This lock must be held whenever this data is accessed (read, or
227 * write). Even for read-only access, memory barriers would be needed.
228 * In any case, it is likely that decisions based on only reading must
229 * also be atomic with respect to data held here and elsewhere in the
232 * This lock must also be held for accessing:
233 * - kbase_context::as_nr
234 * - kbase_device::jm_slots
235 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
236 * the policy in question for more information)
237 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
238 * the policy in question for more information)
242 /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
243 * When bit 'N' is set in this, it indicates whether the context bound to address space
244 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
246 * It is placed here because it's much more memory efficient than having a u8 in
247 * struct kbasep_js_per_as_data to store this flag */
250 /** Context Attributes:
251 * Each is large enough to hold a refcount of the number of contexts
252 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
254 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
255 * the refcount. Hence, it's not worthwhile reducing this to
256 * bit-manipulation on u32s to save space (where in contrast, 4 bit
257 * sub-fields would be easy to do and would save space).
259 * Whilst this must not become negative, the sign bit is used for:
260 * - error detection in debug builds
261 * - Optimization: it is undefined for a signed int to overflow, and so
262 * the compiler can optimize for that never happening (thus, no masking
263 * is required on updating the variable) */
264 s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
266 /** Data that is unique for each AS */
267 struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
270 * Affinity management and tracking
272 /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
273 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
274 u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
275 /** Refcount for each core owned by each slot. Used to generate the
276 * slot_affinities array of bitvectors
278 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
279 * because it is refcounted only when a job is definitely about to be
280 * submitted to a slot, and is de-refcounted immediately after a job
282 s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
286 * Run Pool mutex, for managing contexts within the runpool.
287 * Unless otherwise specified, you must hold this lock whilst accessing any
288 * members that follow
290 * In addition, this is used to access:
291 * - the kbasep_js_kctx_info::runpool substructure
293 struct mutex runpool_mutex;
296 * Queue Lock, used to access the Policy's queue of contexts independently
299 * Of course, you don't need the Run Pool lock to access this.
301 struct mutex queue_mutex;
304 * Scheduling semaphore. This must be held when calling
307 struct semaphore schedule_sem;
310 * List of contexts that can currently be pulled from
312 struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
314 * List of contexts that can not currently be pulled from, but have
315 * jobs currently running.
317 struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
319 u16 as_free; /**< Bitpattern of free Address Spaces */
321 /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
322 s8 nr_user_contexts_running;
323 /** Number of currently scheduled contexts (including ones that are not submitting jobs) */
324 s8 nr_all_contexts_running;
327 * Policy-specific information.
329 * Refer to the structure defined by the current policy to determine which
330 * locks must be held when accessing this.
332 union kbasep_js_policy policy;
334 /** Core Requirements to match up with base_js_atom's core_req memeber
335 * @note This is a write-once member, and so no locking is required to read */
336 base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
338 u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */
339 u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */
340 u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */
341 u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */
342 u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */
343 u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
344 u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */
345 u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */
346 u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
347 u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */
348 u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
349 u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
351 /** List of suspended soft jobs */
352 struct list_head suspended_soft_jobs_list;
354 #ifdef CONFIG_MALI_DEBUG
355 /* Support soft-stop on a single context */
356 bool softstop_always;
357 #endif /* CONFIG_MALI_DEBUG */
359 /** The initalized-flag is placed at the end, to avoid cache-pollution (we should
360 * only be using this during init/term paths).
361 * @note This is a write-once member, and so no locking is required to read */
364 /* Number of contexts that can currently be pulled from */
365 u32 nr_contexts_pullable;
367 /* Number of contexts that can either be pulled from or are currently
369 atomic_t nr_contexts_runnable;
373 * @brief KBase Context Job Scheduling information structure
375 * This is a substructure in the struct kbase_context that encapsulates all the
376 * scheduling information.
378 struct kbasep_js_kctx_info {
380 * Runpool substructure. This must only be accessed whilst the Run Pool
381 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
383 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
384 * held for certain sub-members.
386 * @note some of the members could be moved into struct kbasep_js_device_data for
387 * improved d-cache/tlb efficiency.
390 union kbasep_js_policy_ctx_info policy_ctx; /**< Policy-specific context */
394 * Job Scheduler Context information sub-structure. These members are
395 * accessed regardless of whether the context is:
396 * - In the Policy's Run Pool
397 * - In the Policy's Queue
398 * - Not queued nor in the Run Pool.
400 * You must obtain the jsctx_mutex before accessing any other members of
403 * You may not access any of these members from IRQ context.
406 struct mutex jsctx_mutex; /**< Job Scheduler Context lock */
408 /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
409 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
413 /** Context Attributes:
414 * Each is large enough to hold a refcount of the number of atoms on
416 u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
418 kbase_context_flags flags;
419 /* NOTE: Unify the following flags into kbase_context_flags */
421 * Is the context scheduled on the Run Pool?
423 * This is only ever updated whilst the jsctx_mutex is held.
427 * Wait queue to wait for is_scheduled state changes.
429 wait_queue_head_t is_scheduled_wait;
431 bool is_dying; /**< Is the context in the process of being evicted? */
433 /** Link implementing JS queues. Context can be present on one
436 struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
439 /* The initalized-flag is placed at the end, to avoid cache-pollution (we should
440 * only be using this during init/term paths) */
444 /** Subset of atom state that can be available after jd_done_nolock() is called
445 * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
446 * because the original atom could disappear. */
447 struct kbasep_js_atom_retained_state {
448 /** Event code - to determine whether the atom has finished */
449 enum base_jd_event_code event_code;
450 /** core requirements */
451 base_jd_core_req core_req;
454 /** Job Slot to retry submitting to if submission from IRQ handler failed */
455 int retry_submit_on_slot;
456 /* Core group atom was executed on */
462 * Value signifying 'no retry on a slot required' for:
463 * - kbase_js_atom_retained_state::retry_submit_on_slot
464 * - kbase_jd_atom::retry_submit_on_slot
466 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
469 * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
471 * @see kbase_atom_retained_state_is_valid()
473 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
476 * @brief The JS timer resolution, in microseconds
478 * Any non-zero difference in time will be at least this size.
480 #define KBASEP_JS_TICK_RESOLUTION_US 1
483 * Internal atom priority defines for kbase_jd_atom::sched_prio
486 KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
487 KBASE_JS_ATOM_SCHED_PRIO_MED,
488 KBASE_JS_ATOM_SCHED_PRIO_LOW,
489 KBASE_JS_ATOM_SCHED_PRIO_COUNT,
492 /* Invalid priority for kbase_jd_atom::sched_prio */
493 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
495 /* Default priority in the case of contexts with no atoms, or being lenient
496 * about invalid priorities from userspace */
497 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
499 /** @} *//* end group kbase_js */
500 /** @} *//* end group base_kbase_api */
501 /** @} *//* end group base_api */
503 #endif /* _KBASE_JS_DEFS_H_ */