drivers/gpu/arm/midgard/mali_kbase_js_defs.h

   1 /*
   2  *
   3  * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
   4  *
   5  * This program is free software and is provided to you under the terms of the
   6  * GNU General Public License version 2 as published by the Free Software
   7  * Foundation, and any use by you of this program is subject to the terms
   8  * of such GNU licence.
   9  *
  10  * A copy of the licence is included with the program, and can also be obtained
  11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12  * Boston, MA  02110-1301, USA.
  13  *
  14  */
  15
  16
  17
  18
  19
  20 /**
  21  * @file mali_kbase_js.h
  22  * Job Scheduler Type Definitions
  23  */
  24
  25 #ifndef _KBASE_JS_DEFS_H_
  26 #define _KBASE_JS_DEFS_H_
  27
  28 /**
  29  * @addtogroup base_api
  30  * @{
  31  */
  32
  33 /**
  34  * @addtogroup base_kbase_api
  35  * @{
  36  */
  37
  38 /**
  39  * @addtogroup kbase_js
  40  * @{
  41  */
  42 /* Forward decls */
  43 struct kbase_device;
  44 struct kbase_jd_atom;
  45
  46
  47 /* Types used by the policies must go here */
  48 enum {
  49         /** Context will not submit any jobs */
  50         KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
  51
  52         /** Set if the context uses an address space and should be kept scheduled in */
  53         KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
  54
  55             /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
  56 };
  57
  58 typedef u32 kbase_context_flags;
  59
  60 struct kbasep_atom_req {
  61         base_jd_core_req core_req;
  62         kbase_context_flags ctx_req;
  63         u32 device_nr;
  64 };
  65
  66 #include "mali_kbase_js_policy_cfs.h"
  67
  68 /* Wrapper Interface - doxygen is elsewhere */
  69 union kbasep_js_policy {
  70         struct kbasep_js_policy_cfs cfs;
  71 };
  72
  73 /* Wrapper Interface - doxygen is elsewhere */
  74 union kbasep_js_policy_ctx_info {
  75         struct kbasep_js_policy_cfs_ctx cfs;
  76 };
  77
  78 /* Wrapper Interface - doxygen is elsewhere */
  79 union kbasep_js_policy_job_info {
  80         struct kbasep_js_policy_cfs_job cfs;
  81 };
  82
  83
  84 /** Callback function run on all of a context's jobs registered with the Job
  85  * Scheduler */
  86 typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
  87
  88 /**
  89  * @brief Maximum number of jobs that can be submitted to a job slot whilst
  90  * inside the IRQ handler.
  91  *
  92  * This is important because GPU NULL jobs can complete whilst the IRQ handler
  93  * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
  94  * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
  95  */
  96 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
  97
  98 /**
  99  * @brief the IRQ_THROTTLE time in microseconds
 100  *
 101  * This will be converted via the GPU's clock frequency into a cycle-count.
 102  *
 103  * @note we can make an estimate of the GPU's frequency by periodically
 104  * sampling its CYCLE_COUNT register
 105  */
 106 #define KBASE_JS_IRQ_THROTTLE_TIME_US 20
 107
 108 /**
 109  * @brief Context attributes
 110  *
 111  * Each context attribute can be thought of as a boolean value that caches some
 112  * state information about either the runpool, or the context:
 113  * - In the case of the runpool, it is a cache of "Do any contexts owned by
 114  * the runpool have attribute X?"
 115  * - In the case of a context, it is a cache of "Do any atoms owned by the
 116  * context have attribute X?"
 117  *
 118  * The boolean value of the context attributes often affect scheduling
 119  * decisions, such as affinities to use and job slots to use.
 120  *
 121  * To accomodate changes of state in the context, each attribute is refcounted
 122  * in the context, and in the runpool for all running contexts. Specifically:
 123  * - The runpool holds a refcount of how many contexts in the runpool have this
 124  * attribute.
 125  * - The context holds a refcount of how many atoms have this attribute.
 126  */
 127 enum kbasep_js_ctx_attr {
 128         /** Attribute indicating a context that contains Compute jobs. That is,
 129          * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
 130          *
 131          * @note A context can be both 'Compute' and 'Non Compute' if it contains
 132          * both types of jobs.
 133          */
 134         KBASEP_JS_CTX_ATTR_COMPUTE,
 135
 136         /** Attribute indicating a context that contains Non-Compute jobs. That is,
 137          * the context has some jobs that are \b not of type @ref
 138          * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
 139          * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
 140          * workarounds in use in the Job Scheduling Policy.
 141          *
 142          * @note A context can be both 'Compute' and 'Non Compute' if it contains
 143          * both types of jobs.
 144          */
 145         KBASEP_JS_CTX_ATTR_NON_COMPUTE,
 146
 147         /** Attribute indicating that a context contains compute-job atoms that
 148          * aren't restricted to a coherent group, and can run on all cores.
 149          *
 150          * Specifically, this is when the atom's \a core_req satisfy:
 151          * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
 152          * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
 153          *
 154          * Such atoms could be blocked from running if one of the coherent groups
 155          * is being used by another job slot, so tracking this context attribute
 156          * allows us to prevent such situations.
 157          *
 158          * @note This doesn't take into account the 1-coregroup case, where all
 159          * compute atoms would effectively be able to run on 'all cores', but
 160          * contexts will still not always get marked with this attribute. Instead,
 161          * it is the caller's responsibility to take into account the number of
 162          * coregroups when interpreting this attribute.
 163          *
 164          * @note Whilst Tiler atoms are normally combined with
 165          * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
 166          * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
 167          * enough to handle anyway.
 168          */
 169         KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
 170
 171         /** Must be the last in the enum */
 172         KBASEP_JS_CTX_ATTR_COUNT
 173 };
 174
 175 enum {
 176         /** Bit indicating that new atom should be started because this atom completed */
 177         KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
 178         /** Bit indicating that the atom was evicted from the JS_NEXT registers */
 179         KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
 180 };
 181
 182 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */
 183 typedef u32 kbasep_js_atom_done_code;
 184
 185 /**
 186  * Data used by the scheduler that is unique for each Address Space.
 187  *
 188  * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
 189  * must be held whilst accessing this data (inculding reads and atomic
 190  * decisions based on the read).
 191  */
 192 struct kbasep_js_per_as_data {
 193         /**
 194          * Ref count of whether this AS is busy, and must not be scheduled out
 195          *
 196          * When jobs are running this is always positive. However, it can still be
 197          * positive when no jobs are running. If all you need is a heuristic to
 198          * tell you whether jobs might be running, this should be sufficient.
 199          */
 200         int as_busy_refcount;
 201
 202         /** Pointer to the current context on this address space, or NULL for no context */
 203         struct kbase_context *kctx;
 204 };
 205
 206 /**
 207  * @brief KBase Device Data Job Scheduler sub-structure
 208  *
 209  * This encapsulates the current context of the Job Scheduler on a particular
 210  * device. This context is global to the device, and is not tied to any
 211  * particular struct kbase_context running on the device.
 212  *
 213  * nr_contexts_running and as_free are optimized for packing together (by making
 214  * them smaller types than u32). The operations on them should rarely involve
 215  * masking. The use of signed types for arithmetic indicates to the compiler that
 216  * the value will not rollover (which would be undefined behavior), and so under
 217  * the Total License model, it is free to make optimizations based on that (i.e.
 218  * to remove masking).
 219  */
 220 struct kbasep_js_device_data {
 221         /** Sub-structure to collect together Job Scheduling data used in IRQ context */
 222         struct runpool_irq {
 223                 /**
 224                  * Lock for accessing Job Scheduling data used in IRQ context
 225                  *
 226                  * This lock must be held whenever this data is accessed (read, or
 227                  * write). Even for read-only access, memory barriers would be needed.
 228                  * In any case, it is likely that decisions based on only reading must
 229                  * also be atomic with respect to data held here and elsewhere in the
 230                  * Job Scheduler.
 231                  *
 232                  * This lock must also be held for accessing:
 233                  * - kbase_context::as_nr
 234                  * - kbase_device::jm_slots
 235                  * - Parts of the kbasep_js_policy, dependent on the policy (refer to
 236                  * the policy in question for more information)
 237                  * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
 238                  * the policy in question for more information)
 239                  */
 240                 spinlock_t lock;
 241
 242                 /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
 243                  * When bit 'N' is set in this, it indicates whether the context bound to address space
 244                  * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
 245                  *
 246                  * It is placed here because it's much more memory efficient than having a u8 in
 247                  * struct kbasep_js_per_as_data to store this flag  */
 248                 u16 submit_allowed;
 249
 250                 /** Context Attributes:
 251                  * Each is large enough to hold a refcount of the number of contexts
 252                  * that can fit into the runpool. This is currently BASE_MAX_NR_AS
 253                  *
 254                  * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
 255                  * the refcount. Hence, it's not worthwhile reducing this to
 256                  * bit-manipulation on u32s to save space (where in contrast, 4 bit
 257                  * sub-fields would be easy to do and would save space).
 258                  *
 259                  * Whilst this must not become negative, the sign bit is used for:
 260                  * - error detection in debug builds
 261                  * - Optimization: it is undefined for a signed int to overflow, and so
 262                  * the compiler can optimize for that never happening (thus, no masking
 263                  * is required on updating the variable) */
 264                 s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
 265
 266                 /** Data that is unique for each AS */
 267                 struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
 268
 269                 /*
 270                  * Affinity management and tracking
 271                  */
 272                 /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
 273                  * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
 274                 u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
 275                 /** Refcount for each core owned by each slot. Used to generate the
 276                  * slot_affinities array of bitvectors
 277                  *
 278                  * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
 279                  * because it is refcounted only when a job is definitely about to be
 280                  * submitted to a slot, and is de-refcounted immediately after a job
 281                  * finishes */
 282                 s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
 283         } runpool_irq;
 284
 285         /**
 286          * Run Pool mutex, for managing contexts within the runpool.
 287          * Unless otherwise specified, you must hold this lock whilst accessing any
 288          * members that follow
 289          *
 290          * In addition, this is used to access:
 291          * - the kbasep_js_kctx_info::runpool substructure
 292          */
 293         struct mutex runpool_mutex;
 294
 295         /**
 296          * Queue Lock, used to access the Policy's queue of contexts independently
 297          * of the Run Pool.
 298          *
 299          * Of course, you don't need the Run Pool lock to access this.
 300          */
 301         struct mutex queue_mutex;
 302
 303         /**
 304          * Scheduling semaphore. This must be held when calling
 305          * kbase_jm_kick()
 306          */
 307         struct semaphore schedule_sem;
 308
 309         /**
 310          * List of contexts that can currently be pulled from
 311          */
 312         struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
 313         /**
 314          * List of contexts that can not currently be pulled from, but have
 315          * jobs currently running.
 316          */
 317         struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
 318
 319         u16 as_free;                            /**< Bitpattern of free Address Spaces */
 320
 321         /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
 322         s8 nr_user_contexts_running;
 323         /** Number of currently scheduled contexts (including ones that are not submitting jobs) */
 324         s8 nr_all_contexts_running;
 325
 326         /**
 327          * Policy-specific information.
 328          *
 329          * Refer to the structure defined by the current policy to determine which
 330          * locks must be held when accessing this.
 331          */
 332         union kbasep_js_policy policy;
 333
 334         /** Core Requirements to match up with base_js_atom's core_req memeber
 335          * @note This is a write-once member, and so no locking is required to read */
 336         base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
 337
 338         u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
 339         u32 soft_stop_ticks;         /*< Value for JS_SOFT_STOP_TICKS */
 340         u32 soft_stop_ticks_cl;      /*< Value for JS_SOFT_STOP_TICKS_CL */
 341         u32 hard_stop_ticks_ss;      /*< Value for JS_HARD_STOP_TICKS_SS */
 342         u32 hard_stop_ticks_cl;      /*< Value for JS_HARD_STOP_TICKS_CL */
 343         u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
 344         u32 gpu_reset_ticks_ss;      /*< Value for JS_RESET_TICKS_SS */
 345         u32 gpu_reset_ticks_cl;      /*< Value for JS_RESET_TICKS_CL */
 346         u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
 347         u32 ctx_timeslice_ns;            /**< Value for JS_CTX_TIMESLICE_NS */
 348         u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
 349         u32 cfs_ctx_runtime_min_slices;  /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
 350
 351         /** List of suspended soft jobs */
 352         struct list_head suspended_soft_jobs_list;
 353
 354 #ifdef CONFIG_MALI_DEBUG
 355         /* Support soft-stop on a single context */
 356         bool softstop_always;
 357 #endif                          /* CONFIG_MALI_DEBUG */
 358
 359         /** The initalized-flag is placed at the end, to avoid cache-pollution (we should
 360          * only be using this during init/term paths).
 361          * @note This is a write-once member, and so no locking is required to read */
 362         int init_status;
 363
 364         /* Number of contexts that can currently be pulled from */
 365         u32 nr_contexts_pullable;
 366
 367         /* Number of contexts that can either be pulled from or are currently
 368          * running */
 369         atomic_t nr_contexts_runnable;
 370 };
 371
 372 /**
 373  * @brief KBase Context Job Scheduling information structure
 374  *
 375  * This is a substructure in the struct kbase_context that encapsulates all the
 376  * scheduling information.
 377  */
 378 struct kbasep_js_kctx_info {
 379         /**
 380          * Runpool substructure. This must only be accessed whilst the Run Pool
 381          * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
 382          *
 383          * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
 384          * held for certain sub-members.
 385          *
 386          * @note some of the members could be moved into struct kbasep_js_device_data for
 387          * improved d-cache/tlb efficiency.
 388          */
 389         struct {
 390                 union kbasep_js_policy_ctx_info policy_ctx;     /**< Policy-specific context */
 391         } runpool;
 392
 393         /**
 394          * Job Scheduler Context information sub-structure. These members are
 395          * accessed regardless of whether the context is:
 396          * - In the Policy's Run Pool
 397          * - In the Policy's Queue
 398          * - Not queued nor in the Run Pool.
 399          *
 400          * You must obtain the jsctx_mutex before accessing any other members of
 401          * this substructure.
 402          *
 403          * You may not access any of these members from IRQ context.
 404          */
 405         struct {
 406                 struct mutex jsctx_mutex;                   /**< Job Scheduler Context lock */
 407
 408                 /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
 409                  * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
 410                  * for such jobs*/
 411                 u32 nr_jobs;
 412
 413                 /** Context Attributes:
 414                  * Each is large enough to hold a refcount of the number of atoms on
 415                  * the context. **/
 416                 u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
 417
 418                 kbase_context_flags flags;
 419                 /* NOTE: Unify the following flags into kbase_context_flags */
 420                 /**
 421                  * Is the context scheduled on the Run Pool?
 422                  *
 423                  * This is only ever updated whilst the jsctx_mutex is held.
 424                  */
 425                 bool is_scheduled;
 426                 /**
 427                  * Wait queue to wait for is_scheduled state changes.
 428                  * */
 429                 wait_queue_head_t is_scheduled_wait;
 430
 431                 bool is_dying;                  /**< Is the context in the process of being evicted? */
 432
 433                 /** Link implementing JS queues. Context can be present on one
 434                  * list per job slot
 435                  */
 436                 struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
 437         } ctx;
 438
 439         /* The initalized-flag is placed at the end, to avoid cache-pollution (we should
 440          * only be using this during init/term paths) */
 441         int init_status;
 442 };
 443
 444 /** Subset of atom state that can be available after jd_done_nolock() is called
 445  * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
 446  * because the original atom could disappear. */
 447 struct kbasep_js_atom_retained_state {
 448         /** Event code - to determine whether the atom has finished */
 449         enum base_jd_event_code event_code;
 450         /** core requirements */
 451         base_jd_core_req core_req;
 452         /* priority */
 453         int sched_priority;
 454         /** Job Slot to retry submitting to if submission from IRQ handler failed */
 455         int retry_submit_on_slot;
 456         /* Core group atom was executed on */
 457         u32 device_nr;
 458
 459 };
 460
 461 /**
 462  * Value signifying 'no retry on a slot required' for:
 463  * - kbase_js_atom_retained_state::retry_submit_on_slot
 464  * - kbase_jd_atom::retry_submit_on_slot
 465  */
 466 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
 467
 468 /**
 469  * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
 470  *
 471  * @see kbase_atom_retained_state_is_valid()
 472  */
 473 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
 474
 475 /**
 476  * @brief The JS timer resolution, in microseconds
 477  *
 478  * Any non-zero difference in time will be at least this size.
 479  */
 480 #define KBASEP_JS_TICK_RESOLUTION_US 1
 481
 482 /*
 483  * Internal atom priority defines for kbase_jd_atom::sched_prio
 484  */
 485 enum {
 486         KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
 487         KBASE_JS_ATOM_SCHED_PRIO_MED,
 488         KBASE_JS_ATOM_SCHED_PRIO_LOW,
 489         KBASE_JS_ATOM_SCHED_PRIO_COUNT,
 490 };
 491
 492 /* Invalid priority for kbase_jd_atom::sched_prio */
 493 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
 494
 495 /* Default priority in the case of contexts with no atoms, or being lenient
 496  * about invalid priorities from userspace */
 497 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
 498
 499           /** @} *//* end group kbase_js */
 500           /** @} *//* end group base_kbase_api */
 501           /** @} *//* end group base_api */
 502
 503 #endif                          /* _KBASE_JS_DEFS_H_ */