3 * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * Base kernel affinity manager APIs
24 #include <mali_kbase.h>
25 #include "mali_kbase_js_affinity.h"
26 #include "mali_kbase_hw.h"
28 #include <backend/gpu/mali_kbase_pm_internal.h>
31 bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
35 * Here are the reasons for using job slot 2:
36 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
37 * - In absence of the above, then:
38 * - Atoms with BASE_JD_REQ_COHERENT_GROUP
39 * - But, only when there aren't contexts with
40 * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
41 * all cores on slot 1 could be blocked by those using a coherent group
43 * - And, only when you actually have 2 or more coregroups - if you
44 * only have 1 coregroup, then having jobs for slot 2 implies they'd
45 * also be for slot 1, meaning you'll get interference from them. Jobs
46 * able to run on slot 2 could also block jobs that can only run on
49 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
55 /* Only deal with js==2 now: */
56 if (kbdev->gpu_props.num_core_groups > 1) {
57 /* Only use slot 2 in the 2+ coregroup case */
58 if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
59 KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
61 /* ...But only when we *don't* have atoms that run on
64 /* No specific check for BASE_JD_REQ_COHERENT_GROUP
65 * atoms - the policy will sort that out */
70 /* Above checks failed mean we shouldn't use slot 2 */
75 * As long as it has been decided to have a deeper modification of
76 * what job scheduler, power manager and affinity manager will
77 * implement, this function is just an intermediate step that
79 * - all working cores will be powered on when this is called.
80 * - largest current configuration is 2 core groups.
81 * - It has been decided not to have hardcoded values so the low
82 * and high cores in a core split will be evently distributed.
83 * - Odd combinations of core requirements have been filtered out
84 * and do not get to this function (e.g. CS+T+NSS is not
86 * - This function is frequently called and can be optimized,
87 * (see notes in loops), but as the functionallity will likely
88 * be modified, optimization has not been addressed.
90 bool kbase_js_choose_affinity(u64 * const affinity,
91 struct kbase_device *kbdev,
92 struct kbase_jd_atom *katom, int js)
94 base_jd_core_req core_req = katom->core_req;
95 unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
96 u64 core_availability_mask;
99 spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
101 core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
104 * If no cores are currently available (core availability policy is
105 * transitioning) then fail.
107 if (0 == core_availability_mask) {
108 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
113 KBASE_DEBUG_ASSERT(js >= 0);
115 if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
117 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
118 /* If the hardware supports XAFFINITY then we'll only enable
119 * the tiler (which is the default so this is a no-op),
120 * otherwise enable shader core 0. */
121 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
129 if (1 == kbdev->gpu_props.num_cores) {
130 /* trivial case only one core, nothing to do */
131 *affinity = core_availability_mask &
132 kbdev->pm.debug_core_mask[js];
134 if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
135 BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
136 if (js == 0 || num_core_groups == 1) {
137 /* js[0] and single-core-group systems just get
138 * the first core group */
140 kbdev->gpu_props.props.coherency_info.group[0].core_mask
141 & core_availability_mask &
142 kbdev->pm.debug_core_mask[js];
144 /* js[1], js[2] use core groups 0, 1 for
145 * dual-core-group systems */
146 u32 core_group_idx = ((u32) js) - 1;
148 KBASE_DEBUG_ASSERT(core_group_idx <
151 kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
152 & core_availability_mask &
153 kbdev->pm.debug_core_mask[js];
155 /* If the job is specifically targeting core
156 * group 1 and the core availability policy is
157 * keeping that core group off, then fail */
158 if (*affinity == 0 && core_group_idx == 1 &&
159 kbdev->pm.backend.cg1_disabled
162 BASE_JD_EVENT_PM_EVENT;
165 /* All cores are available when no core split is
167 *affinity = core_availability_mask &
168 kbdev->pm.debug_core_mask[js];
172 spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
175 * If no cores are currently available in the desired core group(s)
176 * (core availability policy is transitioning) then fail.
181 /* Enable core 0 if tiler required for hardware without XAFFINITY
182 * support (notes above) */
183 if (core_req & BASE_JD_REQ_T) {
184 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
185 *affinity = *affinity | 1;
191 static inline bool kbase_js_affinity_is_violating(
192 struct kbase_device *kbdev,
195 /* This implementation checks whether the two slots involved in Generic
196 * thread creation have intersecting affinity. This is due to micro-
197 * architectural issues where a job in slot A targetting cores used by
198 * slot B could prevent the job in slot B from making progress until the
199 * job in slot A has completed.
201 u64 affinity_set_left;
202 u64 affinity_set_right;
205 KBASE_DEBUG_ASSERT(affinities != NULL);
207 affinity_set_left = affinities[1];
209 affinity_set_right = affinities[2];
211 /* A violation occurs when any bit in the left_set is also in the
213 intersection = affinity_set_left & affinity_set_right;
215 return (bool) (intersection != (u64) 0u);
218 bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
221 struct kbasep_js_device_data *js_devdata;
222 u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
224 KBASE_DEBUG_ASSERT(kbdev != NULL);
225 KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
226 js_devdata = &kbdev->js_data;
228 memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
229 sizeof(js_devdata->runpool_irq.slot_affinities));
231 new_affinities[js] |= affinity;
233 return kbase_js_affinity_is_violating(kbdev, new_affinities);
236 void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
239 struct kbasep_js_device_data *js_devdata;
242 KBASE_DEBUG_ASSERT(kbdev != NULL);
243 KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
244 js_devdata = &kbdev->js_data;
246 KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
251 int bitnum = fls64(cores) - 1;
252 u64 bit = 1ULL << bitnum;
256 ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
259 js_devdata->runpool_irq.slot_affinities[js] |= bit;
265 void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
268 struct kbasep_js_device_data *js_devdata;
271 KBASE_DEBUG_ASSERT(kbdev != NULL);
272 KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
273 js_devdata = &kbdev->js_data;
277 int bitnum = fls64(cores) - 1;
278 u64 bit = 1ULL << bitnum;
282 js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
285 --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
288 js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
294 #if KBASE_TRACE_ENABLE
295 void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
297 struct kbasep_js_device_data *js_devdata;
300 KBASE_DEBUG_ASSERT(kbdev != NULL);
301 js_devdata = &kbdev->js_data;
303 for (slot_nr = 0; slot_nr < 3; ++slot_nr)
304 KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
306 (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
308 #endif /* KBASE_TRACE_ENABLE */