1 #include <linux/cgroup.h>
3 #include <linux/percpu.h>
4 #include <linux/printk.h>
5 #include <linux/rcupdate.h>
6 #include <linux/slab.h>
10 unsigned int sysctl_sched_cfs_boost __read_mostly;
12 #ifdef CONFIG_CGROUP_SCHEDTUNE
15 * EAS scheduler tunables for task groups.
18 /* SchdTune tunables for a group of tasks */
20 /* SchedTune CGroup subsystem */
21 struct cgroup_subsys_state css;
23 /* Boost group allocated ID */
26 /* Boost value for tasks on that SchedTune CGroup */
31 static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
33 return css ? container_of(css, struct schedtune, css) : NULL;
36 static inline struct schedtune *task_schedtune(struct task_struct *tsk)
38 return css_st(task_css(tsk, schedtune_cgrp_id));
41 static inline struct schedtune *parent_st(struct schedtune *st)
43 return css_st(st->css.parent);
47 * SchedTune root control group
48 * The root control group is used to defined a system-wide boosting tuning,
49 * which is applied to all tasks in the system.
50 * Task specific boost tuning could be specified by creating and
51 * configuring a child control group under the root one.
52 * By default, system-wide boosting is disabled, i.e. no boosting is applied
53 * to tasks which are not into a child control group.
55 static struct schedtune
61 * Maximum number of boost groups to support
62 * When per-task boosting is used we still allow only limited number of
63 * boost groups for two main reasons:
64 * 1. on a real system we usually have only few classes of workloads which
65 * make sense to boost with different values (e.g. background vs foreground
66 * tasks, interactive vs low-priority tasks)
67 * 2. a limited number allows for a simpler and more memory/time efficient
68 * implementation especially for the computation of the per-CPU boost
71 #define BOOSTGROUPS_COUNT 4
73 /* Array of configured boostgroups */
74 static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
79 /* SchedTune boost groups
80 * Keep track of all the boost groups which impact on CPU, for example when a
81 * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
82 * likely with different boost values.
83 * Since on each system we expect only a limited number of boost groups, here
84 * we use a simple array to keep track of the metrics required to compute the
85 * maximum per-CPU boosting value.
88 /* Maximum boost value for all RUNNABLE tasks on a CPU */
91 /* The boost for tasks on that boost group */
93 /* Count of RUNNABLE tasks on that boost group */
95 } group[BOOSTGROUPS_COUNT];
98 /* Boost groups affecting each CPU in the system */
99 DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
102 schedtune_cpu_update(int cpu)
104 struct boost_groups *bg;
108 bg = &per_cpu(cpu_boost_groups, cpu);
110 /* The root boost group is always active */
111 boost_max = bg->group[0].boost;
112 for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
114 * A boost group affects a CPU only if it has
115 * RUNNABLE tasks on that CPU
117 if (bg->group[idx].tasks == 0)
119 boost_max = max(boost_max, bg->group[idx].boost);
122 bg->boost_max = boost_max;
126 schedtune_boostgroup_update(int idx, int boost)
128 struct boost_groups *bg;
133 /* Update per CPU boost groups */
134 for_each_possible_cpu(cpu) {
135 bg = &per_cpu(cpu_boost_groups, cpu);
138 * Keep track of current boost values to compute the per CPU
139 * maximum only when it has been affected by the new value of
140 * the updated boost group
142 cur_boost_max = bg->boost_max;
143 old_boost = bg->group[idx].boost;
145 /* Update the boost value of this boost group */
146 bg->group[idx].boost = boost;
148 /* Check if this update increase current max */
149 if (boost > cur_boost_max && bg->group[idx].tasks) {
150 bg->boost_max = boost;
154 /* Check if this update has decreased current max */
155 if (cur_boost_max == old_boost && old_boost > boost)
156 schedtune_cpu_update(cpu);
163 schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
165 struct boost_groups *bg;
168 bg = &per_cpu(cpu_boost_groups, cpu);
170 /* Update boosted tasks count while avoiding to make it negative */
171 if (task_count < 0 && bg->group[idx].tasks <= -task_count)
172 bg->group[idx].tasks = 0;
174 bg->group[idx].tasks += task_count;
176 /* Boost group activation or deactivation on that RQ */
177 tasks = bg->group[idx].tasks;
178 if (tasks == 1 || tasks == 0)
179 schedtune_cpu_update(cpu);
183 * NOTE: This function must be called while holding the lock on the CPU RQ
185 void schedtune_enqueue_task(struct task_struct *p, int cpu)
187 struct schedtune *st;
191 * When a task is marked PF_EXITING by do_exit() it's going to be
192 * dequeued and enqueued multiple times in the exit path.
193 * Thus we avoid any further update, since we do not want to change
194 * CPU boosting while the task is exiting.
196 if (p->flags & PF_EXITING)
199 /* Get task boost group */
201 st = task_schedtune(p);
205 schedtune_tasks_update(p, cpu, idx, 1);
209 * NOTE: This function must be called while holding the lock on the CPU RQ
211 void schedtune_dequeue_task(struct task_struct *p, int cpu)
213 struct schedtune *st;
217 * When a task is marked PF_EXITING by do_exit() it's going to be
218 * dequeued and enqueued multiple times in the exit path.
219 * Thus we avoid any further update, since we do not want to change
220 * CPU boosting while the task is exiting.
221 * The last dequeue will be done by cgroup exit() callback.
223 if (p->flags & PF_EXITING)
226 /* Get task boost group */
228 st = task_schedtune(p);
232 schedtune_tasks_update(p, cpu, idx, -1);
235 int schedtune_cpu_boost(int cpu)
237 struct boost_groups *bg;
239 bg = &per_cpu(cpu_boost_groups, cpu);
240 return bg->boost_max;
243 int schedtune_task_boost(struct task_struct *p)
245 struct schedtune *st;
248 /* Get task boost value */
250 st = task_schedtune(p);
251 task_boost = st->boost;
258 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
260 struct schedtune *st = css_st(css);
266 boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
269 struct schedtune *st = css_st(css);
271 if (boost < 0 || boost > 100)
275 if (css == &root_schedtune.css)
276 sysctl_sched_cfs_boost = boost;
278 /* Update CPU boost */
279 schedtune_boostgroup_update(st->idx, st->boost);
284 static struct cftype files[] = {
287 .read_u64 = boost_read,
288 .write_u64 = boost_write,
294 schedtune_boostgroup_init(struct schedtune *st)
296 struct boost_groups *bg;
299 /* Keep track of allocated boost groups */
300 allocated_group[st->idx] = st;
302 /* Initialize the per CPU boost groups */
303 for_each_possible_cpu(cpu) {
304 bg = &per_cpu(cpu_boost_groups, cpu);
305 bg->group[st->idx].boost = 0;
306 bg->group[st->idx].tasks = 0;
315 struct boost_groups *bg;
318 /* Initialize the per CPU boost groups */
319 for_each_possible_cpu(cpu) {
320 bg = &per_cpu(cpu_boost_groups, cpu);
321 memset(bg, 0, sizeof(struct boost_groups));
324 pr_info(" schedtune configured to support %d boost groups\n",
329 static struct cgroup_subsys_state *
330 schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
332 struct schedtune *st;
337 return &root_schedtune.css;
340 /* Allow only single level hierachies */
341 if (parent_css != &root_schedtune.css) {
342 pr_err("Nested SchedTune boosting groups not allowed\n");
343 return ERR_PTR(-ENOMEM);
346 /* Allow only a limited number of boosting groups */
347 for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
348 if (!allocated_group[idx])
350 if (idx == BOOSTGROUPS_COUNT) {
351 pr_err("Trying to create more than %d SchedTune boosting groups\n",
353 return ERR_PTR(-ENOSPC);
356 st = kzalloc(sizeof(*st), GFP_KERNEL);
360 /* Initialize per CPUs boost group support */
362 if (schedtune_boostgroup_init(st))
370 return ERR_PTR(-ENOMEM);
374 schedtune_boostgroup_release(struct schedtune *st)
376 /* Reset this boost group */
377 schedtune_boostgroup_update(st->idx, 0);
379 /* Keep track of allocated boost groups */
380 allocated_group[st->idx] = NULL;
384 schedtune_css_free(struct cgroup_subsys_state *css)
386 struct schedtune *st = css_st(css);
388 schedtune_boostgroup_release(st);
392 struct cgroup_subsys schedtune_cgrp_subsys = {
393 .css_alloc = schedtune_css_alloc,
394 .css_free = schedtune_css_free,
395 .legacy_cftypes = files,
399 #endif /* CONFIG_CGROUP_SCHEDTUNE */
402 sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
403 void __user *buffer, size_t *lenp,
406 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);