drivers/gpu/arm/midgard/mali_kbase_js_policy.h

   1 /*
   2  *
   3  * (C) COPYRIGHT ARM Limited. All rights reserved.
   4  *
   5  * This program is free software and is provided to you under the terms of the
   6  * GNU General Public License version 2 as published by the Free Software
   7  * Foundation, and any use by you of this program is subject to the terms
   8  * of such GNU licence.
   9  *
  10  * A copy of the licence is included with the program, and can also be obtained
  11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12  * Boston, MA  02110-1301, USA.
  13  *
  14  */
  15
  16
  17
  18
  19
  20 /**
  21  * @file mali_kbase_js_policy.h
  22  * Job Scheduler Policy APIs.
  23  */
  24
  25 #ifndef _KBASE_JS_POLICY_H_
  26 #define _KBASE_JS_POLICY_H_
  27
  28 /**
  29  * @page page_kbase_js_policy Job Scheduling Policies
  30  * The Job Scheduling system is described in the following:
  31  * - @subpage page_kbase_js_policy_overview
  32  * - @subpage page_kbase_js_policy_operation
  33  *
  34  * The API details are as follows:
  35  * - @ref kbase_jm
  36  * - @ref kbase_js
  37  * - @ref kbase_js_policy
  38  */
  39
  40 /**
  41  * @page page_kbase_js_policy_overview Overview of the Policy System
  42  *
  43  * The Job Scheduler Policy manages:
  44  * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
  45  * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
  46  * GPU's Job Slots (\em JSs).
  47  * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
  48  * Address Space
  49  * - The amount of \em time a Job spends running on the GPU
  50  *
  51  * The Policy implements this management via 2 components:
  52  * - A Policy Queue, which manages a set of contexts that are ready to run,
  53  * but not currently running.
  54  * - A Policy Run Pool, which manages the currently running contexts (one per Address
  55  * Space) and the jobs to run on the Job Slots.
  56  *
  57  * Each Graphics Process in the system has at least one KBase Context. Therefore,
  58  * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
  59  * the GPU.
  60  *
  61  * <!-- The following needs to be all on one line, due to doxygen's parser -->
  62  * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
  63  *
  64  * The main operations on the queue are:
  65  * - Enqueuing a Context to it
  66  * - Dequeuing a Context from it, to run it.
  67  * - Note: requeuing a context is much the same as enqueuing a context, but
  68  * occurs when a context is scheduled out of the system to allow other contexts
  69  * to run.
  70  *
  71  * These operations have much the same meaning for the Run Pool - Jobs are
  72  * dequeued to run on a Jobslot, and requeued when they are scheduled out of
  73  * the GPU.
  74  *
  75  * @note This is an over-simplification of the Policy APIs - there are more
  76  * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
  77  * takes at least two function calls: one to Dequeue from the Queue, one to add
  78  * to the Run Pool.
  79  *
  80  * As indicated on the diagram, Jobs permanently leave the scheduling system
  81  * when they are completed, otherwise they get dequeued/requeued until this
  82  * happens. Similarly, Contexts leave the scheduling system when their jobs
  83  * have all completed. However, Contexts may later return to the scheduling
  84  * system (not shown on the diagram) if more Bags of Jobs are submitted to
  85  * them.
  86  */
  87
  88 /**
  89  * @page page_kbase_js_policy_operation Policy Operation
  90  *
  91  * We describe the actions that the Job Scheduler Core takes on the Policy in
  92  * the following cases:
  93  * - The IRQ Path
  94  * - The Job Submission Path
  95  * - The High Priority Job Submission Path
  96  *
  97  * This shows how the Policy APIs will be used by the Job Scheduler core.
  98  *
  99  * The following diagram shows an example Policy that contains a Low Priority
 100  * queue, and a Real-time (High Priority) Queue. The RT queue is examined
 101  * before the LowP one on dequeuing from the head. The Low Priority Queue is
 102  * ordered by time, and the RT queue is ordered by RT-priority, and then by
 103  * time. In addition, it shows that the Job Scheduler Core will start a
 104  * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
 105  * Soft-Stop time is set by a global configuration value, and must be a value
 106  * appropriate for the policy. For example, this could include "don't run a
 107  * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
 108  *
 109  * <!-- The following needs to be all on one line, due to doxygen's parser -->
 110  * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
 111  *
 112  * @section sec_kbase_js_policy_operation_prio Dealing with Priority
 113  *
 114  * Priority applies both to a context as a whole, and to the jobs within a
 115  * context. The jobs specify a priority in the base_jd_atom::prio member, which
 116  * is relative to that of the context. A positive setting indicates a reduction
 117  * in priority, whereas a negative setting indicates a boost in priority. Of
 118  * course, the boost in priority should only be honoured when the originating
 119  * process has sufficient priviledges, and should be ignored for unpriviledged
 120  * processes. The meaning of the combined priority value is up to the policy
 121  * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
 122  * policy could implement an increase/decrease in priority by 1 results in an
 123  * increase/decrease in \em proportion of time spent scheduled in by 25%, an
 124  * effective change in timeslice by 11%).
 125  *
 126  * It is up to the policy whether a boost in priority boosts the priority of
 127  * the entire context (e.g. to such an extent where it may pre-empt other
 128  * running contexts). If it chooses to do this, the Policy must make sure that
 129  * only the high-priority jobs are run, and that the context is scheduled out
 130  * once only low priority jobs remain. This ensures that the low priority jobs
 131  * within the context do not gain from the priority boost, yet they still get
 132  * scheduled correctly with respect to other low priority contexts.
 133  *
 134  *
 135  * @section sec_kbase_js_policy_operation_irq IRQ Path
 136  *
 137  * The following happens on the IRQ path from the Job Scheduler Core:
 138  * - Note the slot that completed (for later)
 139  * - Log the time spent by the job (and implicitly, the time spent by the
 140  * context)
 141  *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
 142  * handler.</em>
 143  *  - This must happen regardless of whether the job completed successfully or
 144  * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
 145  * - What was the result of the job?
 146  *  - If Completed: job is just removed from the system
 147  *  - If Hard-stop or failure: job is removed from the system
 148  *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
 149  * work-queue call kbasep_js_policy_enqueue_job()
 150  * - Check the timeslice used by the owning context
 151  *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
 152  * handler.</em>
 153  *  - If this returns true, clear the "allowed" flag.
 154  * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
 155  * jobs"
 156  * - And so, should the context stay scheduled in?
 157  *  - If No, push onto a work-queue the work of scheduling out the old context,
 158  * and getting a new one. That is:
 159  *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
 160  *   - kbasep_js_policy_enqueue_ctx() on old_ctx
 161  *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
 162  *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
 163  *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
 164  * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
 165  *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
 166  * handler</em> with core_req set to that of the completing slot
 167  *  - if this returned MALI_TRUE, submit the job to the completed slot.
 168  *  - This is repeated until kbasep_js_policy_dequeue_job() returns
 169  * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers.
 170  *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
 171  * the work-queue to retry from outside of IRQ context (calling
 172  * kbasep_js_policy_dequeue_job() from a work-queue).
 173  *
 174  * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
 175  * this sequence could loop a large number of times: this could happen if
 176  * the jobs submitted completed on the GPU very quickly (in a few cycles), such
 177  * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
 178  * more jobs, causing us to loop until we run out of jobs.
 179  *
 180  * To mitigate this, we must limit the number of jobs submitted per slot during
 181  * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
 182  * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
 183  * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
 184  * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
 185  * could complete but the IRQ for each of them is delayed by the throttling. By
 186  * the time you get the IRQ, all 6 jobs could've completed, meaning you can
 187  * submit jobs to fill all 6 HEAD+NEXT registers again.
 188  *
 189  * @note As much work is deferred as possible, which includes the scheduling
 190  * out of a context and scheduling in a new context. However, we can still make
 191  * starting a single high-priorty context quick despite this:
 192  * - On Mali-T600 family, there is one more AS than JSs.
 193  * - This means we can very quickly schedule out one AS, no matter what the
 194  * situation (because there will always be one AS that's not currently running
 195  * on the job slot - it can only have a job in the NEXT register).
 196  *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
 197  * a timeline-based Completely Fair Scheduler.
 198  * - When our high-priority context comes in, we can do this quick-scheduling
 199  * out immediately, and then schedule in the high-priority context without having to block.
 200  * - This all assumes that the context to schedule out is of lower
 201  * priority. Otherwise, we will have to block waiting for some other low
 202  * priority context to finish its jobs. Note that it's likely (but not
 203  * impossible) that the high-priority context \b is running jobs, by virtue of
 204  * it being high priority.
 205  * - Therefore, we can give a high liklihood that on Mali-T600 at least one
 206  * high-priority context can be started very quickly. For the general case, we
 207  * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
 208  * quickly. In any case, there is a high likelihood that we're able to start
 209  * more than one high priority context quickly.
 210  *
 211  * In terms of the functions used in the IRQ handler directly, these are the
 212  * perfomance considerations:
 213  * - kbase_js_policy_log_job_result():
 214  *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
 215  * only store the time the job's recently spent - see below on 'priority weighting')
 216  *  - For priority weighting, a divide operation ('div') could happen, but
 217  * this can happen in a deferred context (outside of IRQ) when scheduling out
 218  * the ctx; as per our Engineering Specification, the contexts of different
 219  * priority still stay scheduled in for the same timeslice, but higher priority
 220  * ones scheduled back in more often.
 221  *  - That is, the weighted and unweighted times must be stored separately, and
 222  * the weighted time is only updated \em outside of IRQ context.
 223  *  - Of course, this divide is more likely to be a 'multiply by inverse of the
 224  * weight', assuming that the weight (priority) doesn't change.
 225  * - kbasep_js_policy_should_remove_ctx():
 226  *  - This is usually just a comparison of the stored time value against some
 227  * maximum value.
 228  *
 229  * @note all deferred work can be wrapped up into one call - we usually need to
 230  * indicate that a job/bag is done outside of IRQ context anyway.
 231  *
 232  *
 233  *
 234  * @section sec_kbase_js_policy_operation_submit Submission path
 235  *
 236  * Start with a Context with no jobs present, and assume equal priority of all
 237  * contexts in the system. The following work all happens outside of IRQ
 238  * Context :
 239  * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
 240  * Scheduler Policy:
 241  *  - 'Ready to run' means they've satisified their dependencies in the
 242  * Kernel-side Job Dispatch system.
 243  *  - Call kbasep_js_policy_enqueue_job()
 244  *  - This indicates that the job should be scheduled (it is ready to run).
 245  * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
 246  * 'ready to run', we enqueue the context on the policy queue:
 247  *  - Call kbasep_js_policy_enqueue_ctx()
 248  *  - This indicates that the \em ctx should be scheduled (it is ready to run)
 249  *
 250  * Next, we need to handle adding a context to the Run Pool - if it's sensible
 251  * to do so. This can happen due to two reasons:
 252  * -# A context is enqueued as above, and there are ASs free for it to run on
 253  * (e.g. it is the first context to be run, in which case it can be added to
 254  * the Run Pool immediately after enqueuing on the Policy Queue)
 255  * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
 256  * context at the head of the queue be scheduled in. Such steps would happen in
 257  * a work queue (work deferred from the IRQ context).
 258  *
 259  * In both cases, we'd handle it as follows:
 260  * - Get the context at the Head of the Policy Queue:
 261  *  - Call kbasep_js_policy_dequeue_head_ctx()
 262  * - Assign the Context an Address Space (Assert that there will be one free,
 263  * given the above two reasons)
 264  * - Add this context to the Run Pool:
 265  *  - Call kbasep_js_policy_runpool_add_ctx()
 266  * - Now see if a job should be run:
 267  *  - Mostly, this will be done in the IRQ handler at the completion of a
 268  * previous job.
 269  *  - However, there are two cases where this cannot be done: a) The first job
 270  * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
 271  * are submitted at a low enough rate to not fill up all Job Slots (or, not to
 272  * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
 273  *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
 274  * can run a job:
 275  *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
 276  *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
 277  * slot
 278  *   - if we got one, submit it to the job slot.
 279  *   - This is repeated until kbasep_js_policy_dequeue_job() returns
 280  * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers.
 281  *
 282  * The above case shows that we should attempt to run jobs in cases where a) a ctx
 283  * has been added to the Run Pool, and b) new jobs have been added to a context
 284  * in the Run Pool:
 285  * - In the latter case, the context is in the runpool because it's got a job
 286  * ready to run, or is already running a job
 287  * - We could just wait until the IRQ handler fires, but for certain types of
 288  * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
 289  * generally take much longer to run that GLES CS jobs, which are vertex shader
 290  * jobs.
 291  * - Therefore, when a new job appears in the ctx, we must check the job-slots
 292  * to see if they're free, and run the jobs as before.
 293  *
 294  *
 295  *
 296  * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
 297  *
 298  * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
 299  * them can be scheduled in immediately to start high prioriy jobs. In general,
 300  * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
 301  * following describes how this happens:
 302  *
 303  * Similar to the previous section, consider what happens with a high-priority
 304  * context (a context with a priority higher than that of any in the Run Pool)
 305  * that starts out with no jobs:
 306  * - A job becomes ready to run on the context, and so we enqueue the context
 307  * on the Policy's Queue.
 308  * - However, we'd like to schedule in this context immediately, instead of
 309  * waiting for one of the Run Pool contexts' timeslice to expire
 310  * - The policy's Enqueue function must detect this (because it is the policy
 311  * that embodies the concept of priority), and take appropriate action
 312  *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
 313  * Pool to see if a lower priority context should be scheduled out, and then
 314  * schedule in the High Priority context.
 315  *  - For Mali-T600, we can always pick a context to schedule out immediately
 316  * (because there are more ASs than JSs), and so scheduling out a victim context
 317  * and scheduling in the high priority context can happen immediately.
 318  *   - If a policy implements fair-sharing, then this can still ensure the
 319  * victim later on gets a fair share of the GPU.
 320  *   - As a note, consider whether the victim can be of equal/higher priority
 321  * than the incoming context:
 322  *   - Usually, higher priority contexts will be the ones currently running
 323  * jobs, and so the context with the lowest priority is usually not running
 324  * jobs.
 325  *   - This makes it likely that the victim context is low priority, but
 326  * it's not impossible for it to be a high priority one:
 327  *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
 328  * priority context submitting CS jobs. Then, the context not running jobs will
 329  * be one of the hi priority contexts (because only 2 FS jobs can be
 330  * queued/running on the GPU HW for Mali-T600).
 331  *   - The problem can be mitigated by extra action, but it's questionable
 332  * whether we need to: we already have a high likelihood that there's at least
 333  * one high priority context - that should be good enough.
 334  *   - And so, this method makes sure that at least one high priority context
 335  * can be started very quickly, but more than one high priority contexts could be
 336  * delayed (up to one timeslice).
 337  *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
 338  * Slots.
 339  * - At this point, let's assume this high priority context has been scheduled
 340  * in immediately. The next step is to ensure it can start some jobs quickly.
 341  *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
 342  * submit to.
 343  *  - The rest of the logic for starting the jobs is taken care of by the IRQ
 344  * handler. All the policy needs to do is ensure that
 345  * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
 346  * context.
 347  *
 348  * @note in SS state, we currently only use 2 job-slots (even for T608, but
 349  * this might change in future). In this case, it's always possible to schedule
 350  * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
 351  * time, this maximizes usage of the job-slots (only 2 are in use), because you
 352  * can guarantee starting of the jobs from the High Priority contexts immediately too.
 353  *
 354  *
 355  *
 356  * @section sec_kbase_js_policy_operation_notes Notes
 357  *
 358  * - In this design, a separate 'init' is needed from dequeue/requeue, so that
 359  * information can be retained between the dequeue/requeue calls. For example,
 360  * the total time spent for a context/job could be logged between
 361  * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
 362  * initializes that information to some known state.
 363  *
 364  *
 365  *
 366  */
 367
 368 /**
 369  * @addtogroup base_api
 370  * @{
 371  */
 372
 373 /**
 374  * @addtogroup base_kbase_api
 375  * @{
 376  */
 377
 378 /**
 379  * @addtogroup kbase_js_policy Job Scheduler Policy APIs
 380  * @{
 381  *
 382  * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
 383  * the Job Scheduler Policy and its use from the Job Scheduler Core.</b>
 384  */
 385
 386 /**
 387  * @brief Job Scheduler Policy structure
 388  */
 389 union kbasep_js_policy;
 390
 391 /**
 392  * @brief Initialize the Job Scheduler Policy
 393  */
 394 mali_error kbasep_js_policy_init(kbase_device *kbdev);
 395
 396 /**
 397  * @brief Terminate the Job Scheduler Policy
 398  */
 399 void kbasep_js_policy_term(kbasep_js_policy *js_policy);
 400
 401 /**
 402  * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
 403  * @{
 404  *
 405  * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
 406  * the Job Scheduler Policy and its use from the Job Scheduler Core.</b>
 407  */
 408
 409 /**
 410  * @brief Job Scheduler Policy Ctx Info structure
 411  *
 412  * This structure is embedded in the kbase_context structure. It is used to:
 413  * - track information needed for the policy to schedule the context (e.g. time
 414  * used, OS priority etc.)
 415  * - link together kbase_contexts into a queue, so that a kbase_context can be
 416  * obtained as the container of the policy ctx info. This allows the API to
 417  * return what "the next context" should be.
 418  * - obtain other information already stored in the kbase_context for
 419  * scheduling purposes (e.g process ID to get the priority of the originating
 420  * process)
 421  */
 422 union kbasep_js_policy_ctx_info;
 423
 424 /**
 425  * @brief Initialize a ctx for use with the Job Scheduler Policy
 426  *
 427  * This effectively initializes the kbasep_js_policy_ctx_info structure within
 428  * the kbase_context (itself located within the kctx->jctx.sched_info structure).
 429  */
 430 mali_error kbasep_js_policy_init_ctx(kbase_device *kbdev, kbase_context *kctx);
 431
 432 /**
 433  * @brief Terminate resources associated with using a ctx in the Job Scheduler
 434  * Policy.
 435  */
 436 void kbasep_js_policy_term_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 437
 438 /**
 439  * @brief Enqueue a context onto the Job Scheduler Policy Queue
 440  *
 441  * If the context enqueued has a priority higher than any in the Run Pool, then
 442  * it is the Policy's responsibility to decide whether to schedule out a low
 443  * priority context from the Run Pool to allow the high priority context to be
 444  * scheduled in.
 445  *
 446  * If the context has the privileged flag set, it will always be kept at the
 447  * head of the queue.
 448  *
 449  * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
 450  * The caller will be holding kbasep_js_device_data::queue_mutex.
 451  */
 452 void kbasep_js_policy_enqueue_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 453
 454 /**
 455  * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
 456  *
 457  * The caller will be holding kbasep_js_device_data::queue_mutex.
 458  *
 459  * @return MALI_TRUE if a context was available, and *kctx_ptr points to
 460  * the kctx dequeued.
 461  * @return MALI_FALSE if no contexts were available.
 462  */
 463 mali_bool kbasep_js_policy_dequeue_head_ctx(kbasep_js_policy *js_policy, kbase_context ** const kctx_ptr);
 464
 465 /**
 466  * @brief Evict a context from the Job Scheduler Policy Queue
 467  *
 468  * This is only called as part of destroying a kbase_context.
 469  *
 470  * There are many reasons why this might fail during the lifetime of a
 471  * context. For example, the context is in the process of being scheduled. In
 472  * that case a thread doing the scheduling might have a pointer to it, but the
 473  * context is neither in the Policy Queue, nor is it in the Run
 474  * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
 475  * are locked.
 476  *
 477  * Hence to find out where in the system the context is, it is important to do
 478  * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
 479  *
 480  * The caller will be holding kbasep_js_device_data::queue_mutex.
 481  *
 482  * @return MALI_TRUE if the context was evicted from the Policy Queue
 483  * @return MALI_FALSE if the context was not found in the Policy Queue
 484  */
 485 mali_bool kbasep_js_policy_try_evict_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 486
 487 /**
 488  * @brief Call a function on all jobs belonging to a non-queued, non-running
 489  * context, optionally detaching the jobs from the context as it goes.
 490  *
 491  * At the time of the call, the context is guarenteed to be not-currently
 492  * scheduled on the Run Pool (is_scheduled == MALI_FALSE), and not present in
 493  * the Policy Queue. This is because one of the following functions was used
 494  * recently on the context:
 495  * - kbasep_js_policy_evict_ctx()
 496  * - kbasep_js_policy_runpool_remove_ctx()
 497  *
 498  * In both cases, no subsequent call was made on the context to any of:
 499  * - kbasep_js_policy_runpool_add_ctx()
 500  * - kbasep_js_policy_enqueue_ctx()
 501  *
 502  * Due to the locks that might be held at the time of the call, the callback
 503  * may need to defer work on a workqueue to complete its actions (e.g. when
 504  * cancelling jobs)
 505  *
 506  * \a detach_jobs must only be set when cancelling jobs (which occurs as part
 507  * of context destruction).
 508  *
 509  * The locking conditions on the caller are as follows:
 510  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
 511  */
 512 void kbasep_js_policy_foreach_ctx_job(kbasep_js_policy *js_policy, kbase_context *kctx,
 513         kbasep_js_policy_ctx_job_cb callback, mali_bool detach_jobs);
 514
 515 /**
 516  * @brief Add a context to the Job Scheduler Policy's Run Pool
 517  *
 518  * If the context enqueued has a priority higher than any in the Run Pool, then
 519  * it is the Policy's responsibility to decide whether to schedule out low
 520  * priority jobs that are currently running on the GPU.
 521  *
 522  * The number of contexts present in the Run Pool will never be more than the
 523  * number of Address Spaces.
 524  *
 525  * The following guarentees are made about the state of the system when this
 526  * is called:
 527  * - kctx->as_nr member is valid
 528  * - the context has its submit_allowed flag set
 529  * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
 530  * - The refcount of the context is guarenteed to be zero.
 531  * - kbasep_js_kctx_info::ctx::is_scheduled will be MALI_TRUE.
 532  *
 533  * The locking conditions on the caller are as follows:
 534  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
 535  * - it will be holding kbasep_js_device_data::runpool_mutex.
 536  * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
 537  *
 538  * Due to a spinlock being held, this function must not call any APIs that sleep.
 539  */
 540 void kbasep_js_policy_runpool_add_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 541
 542 /**
 543  * @brief Remove a context from the Job Scheduler Policy's Run Pool
 544  *
 545  * The kctx->as_nr member is valid and the context has its submit_allowed flag
 546  * set when this is called. The state of
 547  * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
 548  * valid. The refcount of the context is guarenteed to be zero.
 549  *
 550  * The locking conditions on the caller are as follows:
 551  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
 552  * - it will be holding kbasep_js_device_data::runpool_mutex.
 553  * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
 554  *
 555  * Due to a spinlock being held, this function must not call any APIs that sleep.
 556  */
 557 void kbasep_js_policy_runpool_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 558
 559 /**
 560  * @brief Indicate whether a context should be removed from the Run Pool
 561  * (should be scheduled out).
 562  *
 563  * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
 564  *
 565  * @note This API is called from IRQ context.
 566  */
 567 mali_bool kbasep_js_policy_should_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx);
 568
 569 /**
 570  * @brief Synchronize with any timers acting upon the runpool
 571  *
 572  * The policy should check whether any timers it owns should be running. If
 573  * they should not, the policy must cancel such timers and ensure they are not
 574  * re-run by the time this function finishes.
 575  *
 576  * In particular, the timers must not be running when there are no more contexts
 577  * on the runpool, because the GPU could be powered off soon after this call.
 578  *
 579  * The locking conditions on the caller are as follows:
 580  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
 581  * - it will be holding kbasep_js_device_data::runpool_mutex.
 582  */
 583 void kbasep_js_policy_runpool_timers_sync(kbasep_js_policy *js_policy);
 584
 585
 586 /**
 587  * @brief Indicate whether a new context has an higher priority than the current context.
 588  *
 589  *
 590  * The caller has the following conditions on locking:
 591  * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
 592  *
 593  * This function must not sleep, because an IRQ spinlock might be held whilst
 594  * this is called.
 595  *
 596  * @note There is nothing to stop the priority of \a current_ctx changing
 597  * during or immediately after this function is called (because its jsctx_mutex
 598  * cannot be held). Therefore, this function should only be seen as a heuristic
 599  * guide as to whether \a new_ctx is higher priority than \a current_ctx
 600  */
 601 mali_bool kbasep_js_policy_ctx_has_priority(kbasep_js_policy *js_policy, kbase_context *current_ctx, kbase_context *new_ctx);
 602
 603           /** @} *//* end group kbase_js_policy_ctx */
 604
 605 /**
 606  * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
 607  * @{
 608  *
 609  * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
 610  * the Job Scheduler Policy and its use from the Job Scheduler Core.</b>
 611  */
 612
 613 /**
 614  * @brief Job Scheduler Policy Job Info structure
 615  *
 616  * This structure is embedded in the kbase_jd_atom structure. It is used to:
 617  * - track information needed for the policy to schedule the job (e.g. time
 618  * used, OS priority etc.)
 619  * - link together jobs into a queue/buffer, so that a kbase_jd_atom can be
 620  * obtained as the container of the policy job info. This allows the API to
 621  * return what "the next job" should be.
 622  * - obtain other information already stored in the kbase_context for
 623  * scheduling purposes (e.g user-side relative priority)
 624  */
 625 union kbasep_js_policy_job_info;
 626
 627 /**
 628  * @brief Initialize a job for use with the Job Scheduler Policy
 629  *
 630  * This function initializes the kbasep_js_policy_job_info structure within the
 631  * kbase_jd_atom. It will only initialize/allocate resources that are specific
 632  * to the job.
 633  *
 634  * That is, this function makes \b no attempt to:
 635  * - initialize any context/policy-wide information
 636  * - enqueue the job on the policy.
 637  *
 638  * At some later point, the following functions must be called on the job, in this order:
 639  * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
 640  * - kbasep_js_policy_enqueue_job() to enqueue the job
 641  *
 642  * A job must only ever be initialized on the Policy once, and must be
 643  * terminated on the Policy before the job is freed.
 644  *
 645  * The caller will not be holding any locks, and so this function will not
 646  * modify any information in \a kctx or \a js_policy.
 647  *
 648  * @return MALI_ERROR_NONE if initialization was correct.
 649  */
 650 mali_error kbasep_js_policy_init_job(const kbasep_js_policy *js_policy, const kbase_context *kctx, kbase_jd_atom *katom);
 651
 652 /**
 653  * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
 654  *
 655  * Registers the job with the policy. This is used to track the job before it
 656  * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
 657  * it is used to update information under a lock that could not be updated at
 658  * kbasep_js_policy_init_job() time (such as context/policy-wide data).
 659  *
 660  * @note This function will not fail, and hence does not allocate any
 661  * resources. Any failures that could occur on registration will be caught
 662  * during kbasep_js_policy_init_job() instead.
 663  *
 664  * A job must only ever be registerd on the Policy once, and must be
 665  * deregistered on the Policy on completion (whether or not that completion was
 666  * success/failure).
 667  *
 668  * The caller has the following conditions on locking:
 669  * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
 670  */
 671 void kbasep_js_policy_register_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom);
 672
 673 /**
 674  * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
 675  *
 676  * This must be used before terminating the resources associated with using a
 677  * job in the Job Scheduler Policy. This function does not itself terminate any
 678  * resources, at most it just updates information in the policy and context.
 679  *
 680  * The caller has the following conditions on locking:
 681  * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
 682  */
 683 void kbasep_js_policy_deregister_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom);
 684
 685 /**
 686  * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
 687  *
 688  * The job returned by the policy will match at least one of the bits in the
 689  * job slot's core requirements (but it may match more than one, or all @ref
 690  * base_jd_core_req bits supported by the job slot).
 691  *
 692  * In addition, the requirements of the job returned will be a subset of those
 693  * requested - the job returned will not have requirements that \a job_slot_idx
 694  * cannot satisfy.
 695  *
 696  * The caller will submit the job to the GPU as soon as the GPU's NEXT register
 697  * for the corresponding slot is empty. Of course, the GPU will then only run
 698  * this new job when the currently executing job (in the jobslot's HEAD
 699  * register) has completed.
 700  *
 701  * @return MALI_TRUE if a job was available, and *kctx_ptr points to
 702  * the kctx dequeued.
 703  * @return MALI_FALSE if no jobs were available among all ctxs in the Run Pool.
 704  *
 705  * @note base_jd_core_req is currently a u8 - beware of type conversion.
 706  *
 707  * The caller has the following conditions on locking:
 708  * - kbasep_js_device_data::runpool_lock::irq will be held.
 709  * - kbasep_js_device_data::runpool_mutex will be held.
 710  * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
 711  */
 712 mali_bool kbasep_js_policy_dequeue_job(kbase_device *kbdev, int job_slot_idx, kbase_jd_atom ** const katom_ptr);
 713
 714 /**
 715  * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
 716  *
 717  * This will be used to enqueue a job after its creation and also to requeue
 718  * a job into the Run Pool that was previously dequeued (running). It notifies
 719  * the policy that the job should be run again at some point later.
 720  *
 721  * The caller has the following conditions on locking:
 722  * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
 723  * - kbasep_js_device_data::runpool_mutex will be held.
 724  * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
 725  */
 726 void kbasep_js_policy_enqueue_job(kbasep_js_policy *js_policy, kbase_jd_atom *katom);
 727
 728 /**
 729  * @brief Log the result of a job: the time spent on a job/context, and whether
 730  * the job failed or not.
 731  *
 732  * Since a kbase_jd_atom contains a pointer to the kbase_context owning it,
 733  * then this can also be used to log time on either/both the job and the
 734  * containing context.
 735  *
 736  * The completion state of the job can be found by examining \a katom->event.event_code
 737  *
 738  * If the Job failed and the policy is implementing fair-sharing, then the
 739  * policy must penalize the failing job/context:
 740  * - At the very least, it should penalize the time taken by the amount of
 741  * time spent processing the IRQ in SW. This because a job in the NEXT slot
 742  * waiting to run will be delayed until the failing job has had the IRQ
 743  * cleared.
 744  * - \b Optionally, the policy could apply other penalties. For example, based
 745  * on a threshold of a number of failing jobs, after which a large penalty is
 746  * applied.
 747  *
 748  * The kbasep_js_device_data::runpool_mutex will be held by the caller.
 749  *
 750  * @note This API is called from IRQ context.
 751  *
 752  * The caller has the following conditions on locking:
 753  * - kbasep_js_device_data::runpool_irq::lock will be held.
 754  *
 755  * @param js_policy     job scheduler policy
 756  * @param katom         job dispatch atom
 757  * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
 758  */
 759 void kbasep_js_policy_log_job_result(kbasep_js_policy *js_policy, kbase_jd_atom *katom, u64 time_spent_us);
 760
 761           /** @} *//* end group kbase_js_policy_job */
 762
 763           /** @} *//* end group kbase_js_policy */
 764           /** @} *//* end group base_kbase_api */
 765           /** @} *//* end group base_api */
 766
 767 #endif                          /* _KBASE_JS_POLICY_H_ */