drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c

   1 /*
   2  *
   3  * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
   4  *
   5  * This program is free software and is provided to you under the terms of the
   6  * GNU General Public License version 2 as published by the Free Software
   7  * Foundation, and any use by you of this program is subject to the terms
   8  * of such GNU licence.
   9  *
  10  * A copy of the licence is included with the program, and can also be obtained
  11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12  * Boston, MA  02110-1301, USA.
  13  *
  14  */
  15
  16
  17
  18
  19
  20 /*
  21  * Base kernel affinity manager APIs
  22  */
  23
  24 #include <mali_kbase.h>
  25 #include "mali_kbase_js_affinity.h"
  26 #include "mali_kbase_hw.h"
  27
  28 #include <backend/gpu/mali_kbase_pm_internal.h>
  29
  30
  31 bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
  32                                                                         int js)
  33 {
  34         /*
  35          * Here are the reasons for using job slot 2:
  36          * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
  37          * - In absence of the above, then:
  38          *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
  39          *  - But, only when there aren't contexts with
  40          *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
  41          *  all cores on slot 1 could be blocked by those using a coherent group
  42          *  on slot 2
  43          *  - And, only when you actually have 2 or more coregroups - if you
  44          *  only have 1 coregroup, then having jobs for slot 2 implies they'd
  45          *  also be for slot 1, meaning you'll get interference from them. Jobs
  46          *  able to run on slot 2 could also block jobs that can only run on
  47          *  slot 1 (tiler jobs)
  48          */
  49         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
  50                 return true;
  51
  52         if (js != 2)
  53                 return true;
  54
  55         /* Only deal with js==2 now: */
  56         if (kbdev->gpu_props.num_core_groups > 1) {
  57                 /* Only use slot 2 in the 2+ coregroup case */
  58                 if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
  59                                         KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
  60                                                                 false) {
  61                         /* ...But only when we *don't* have atoms that run on
  62                          * all cores */
  63
  64                         /* No specific check for BASE_JD_REQ_COHERENT_GROUP
  65                          * atoms - the policy will sort that out */
  66                         return true;
  67                 }
  68         }
  69
  70         /* Above checks failed mean we shouldn't use slot 2 */
  71         return false;
  72 }
  73
  74 /*
  75  * As long as it has been decided to have a deeper modification of
  76  * what job scheduler, power manager and affinity manager will
  77  * implement, this function is just an intermediate step that
  78  * assumes:
  79  * - all working cores will be powered on when this is called.
  80  * - largest current configuration is 2 core groups.
  81  * - It has been decided not to have hardcoded values so the low
  82  *   and high cores in a core split will be evently distributed.
  83  * - Odd combinations of core requirements have been filtered out
  84  *   and do not get to this function (e.g. CS+T+NSS is not
  85  *   supported here).
  86  * - This function is frequently called and can be optimized,
  87  *   (see notes in loops), but as the functionallity will likely
  88  *   be modified, optimization has not been addressed.
  89 */
  90 bool kbase_js_choose_affinity(u64 * const affinity,
  91                                         struct kbase_device *kbdev,
  92                                         struct kbase_jd_atom *katom, int js)
  93 {
  94         base_jd_core_req core_req = katom->core_req;
  95         unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
  96         u64 core_availability_mask;
  97         unsigned long flags;
  98
  99         spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 100
 101         core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
 102
 103         /*
 104          * If no cores are currently available (core availability policy is
 105          * transitioning) then fail.
 106          */
 107         if (0 == core_availability_mask) {
 108                 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 109                 *affinity = 0;
 110                 return false;
 111         }
 112
 113         KBASE_DEBUG_ASSERT(js >= 0);
 114
 115         if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
 116                                                                 BASE_JD_REQ_T) {
 117                 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 118                  /* If the hardware supports XAFFINITY then we'll only enable
 119                   * the tiler (which is the default so this is a no-op),
 120                   * otherwise enable shader core 0. */
 121                 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
 122                         *affinity = 1;
 123                 else
 124                         *affinity = 0;
 125
 126                 return true;
 127         }
 128
 129         if (1 == kbdev->gpu_props.num_cores) {
 130                 /* trivial case only one core, nothing to do */
 131                 *affinity = core_availability_mask &
 132                                 kbdev->pm.debug_core_mask[js];
 133         } else {
 134                 if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
 135                                         BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
 136                         if (js == 0 || num_core_groups == 1) {
 137                                 /* js[0] and single-core-group systems just get
 138                                  * the first core group */
 139                                 *affinity =
 140                                 kbdev->gpu_props.props.coherency_info.group[0].core_mask
 141                                                 & core_availability_mask &
 142                                                 kbdev->pm.debug_core_mask[js];
 143                         } else {
 144                                 /* js[1], js[2] use core groups 0, 1 for
 145                                  * dual-core-group systems */
 146                                 u32 core_group_idx = ((u32) js) - 1;
 147
 148                                 KBASE_DEBUG_ASSERT(core_group_idx <
 149                                                         num_core_groups);
 150                                 *affinity =
 151                                 kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
 152                                                 & core_availability_mask &
 153                                                 kbdev->pm.debug_core_mask[js];
 154
 155                                 /* If the job is specifically targeting core
 156                                  * group 1 and the core availability policy is
 157                                  * keeping that core group off, then fail */
 158                                 if (*affinity == 0 && core_group_idx == 1 &&
 159                                                 kbdev->pm.backend.cg1_disabled
 160                                                                 == true)
 161                                         katom->event_code =
 162                                                         BASE_JD_EVENT_PM_EVENT;
 163                         }
 164                 } else {
 165                         /* All cores are available when no core split is
 166                          * required */
 167                         *affinity = core_availability_mask &
 168                                         kbdev->pm.debug_core_mask[js];
 169                 }
 170         }
 171
 172         spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 173
 174         /*
 175          * If no cores are currently available in the desired core group(s)
 176          * (core availability policy is transitioning) then fail.
 177          */
 178         if (*affinity == 0)
 179                 return false;
 180
 181         /* Enable core 0 if tiler required for hardware without XAFFINITY
 182          * support (notes above) */
 183         if (core_req & BASE_JD_REQ_T) {
 184                 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
 185                         *affinity = *affinity | 1;
 186         }
 187
 188         return true;
 189 }
 190
 191 static inline bool kbase_js_affinity_is_violating(
 192                                                 struct kbase_device *kbdev,
 193                                                                 u64 *affinities)
 194 {
 195         /* This implementation checks whether the two slots involved in Generic
 196          * thread creation have intersecting affinity. This is due to micro-
 197          * architectural issues where a job in slot A targetting cores used by
 198          * slot B could prevent the job in slot B from making progress until the
 199          * job in slot A has completed.
 200          */
 201         u64 affinity_set_left;
 202         u64 affinity_set_right;
 203         u64 intersection;
 204
 205         KBASE_DEBUG_ASSERT(affinities != NULL);
 206
 207         affinity_set_left = affinities[1];
 208
 209         affinity_set_right = affinities[2];
 210
 211         /* A violation occurs when any bit in the left_set is also in the
 212          * right_set */
 213         intersection = affinity_set_left & affinity_set_right;
 214
 215         return (bool) (intersection != (u64) 0u);
 216 }
 217
 218 bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
 219                                                                 u64 affinity)
 220 {
 221         struct kbasep_js_device_data *js_devdata;
 222         u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
 223
 224         KBASE_DEBUG_ASSERT(kbdev != NULL);
 225         KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
 226         js_devdata = &kbdev->js_data;
 227
 228         memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
 229                         sizeof(js_devdata->runpool_irq.slot_affinities));
 230
 231         new_affinities[js] |= affinity;
 232
 233         return kbase_js_affinity_is_violating(kbdev, new_affinities);
 234 }
 235
 236 void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
 237                                                                 u64 affinity)
 238 {
 239         struct kbasep_js_device_data *js_devdata;
 240         u64 cores;
 241
 242         KBASE_DEBUG_ASSERT(kbdev != NULL);
 243         KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
 244         js_devdata = &kbdev->js_data;
 245
 246         KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
 247                                                                 == false);
 248
 249         cores = affinity;
 250         while (cores) {
 251                 int bitnum = fls64(cores) - 1;
 252                 u64 bit = 1ULL << bitnum;
 253                 s8 cnt;
 254
 255                 cnt =
 256                 ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
 257
 258                 if (cnt == 1)
 259                         js_devdata->runpool_irq.slot_affinities[js] |= bit;
 260
 261                 cores &= ~bit;
 262         }
 263 }
 264
 265 void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
 266                                                                 u64 affinity)
 267 {
 268         struct kbasep_js_device_data *js_devdata;
 269         u64 cores;
 270
 271         KBASE_DEBUG_ASSERT(kbdev != NULL);
 272         KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
 273         js_devdata = &kbdev->js_data;
 274
 275         cores = affinity;
 276         while (cores) {
 277                 int bitnum = fls64(cores) - 1;
 278                 u64 bit = 1ULL << bitnum;
 279                 s8 cnt;
 280
 281                 KBASE_DEBUG_ASSERT(
 282                 js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
 283
 284                 cnt =
 285                 --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
 286
 287                 if (0 == cnt)
 288                         js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
 289
 290                 cores &= ~bit;
 291         }
 292 }
 293
 294 #if KBASE_TRACE_ENABLE
 295 void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
 296 {
 297         struct kbasep_js_device_data *js_devdata;
 298         int slot_nr;
 299
 300         KBASE_DEBUG_ASSERT(kbdev != NULL);
 301         js_devdata = &kbdev->js_data;
 302
 303         for (slot_nr = 0; slot_nr < 3; ++slot_nr)
 304                 KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
 305                                                         NULL, 0u, slot_nr,
 306                         (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
 307 }
 308 #endif                          /* KBASE_TRACE_ENABLE  */