2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/cgroup.h>
|
|
|
|
#include <linux/err.h>
|
2016-01-12 18:12:13 +00:00
|
|
|
#include <linux/kernel.h>
|
2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/printk.h>
|
2016-01-12 18:12:13 +00:00
|
|
|
#include <linux/reciprocal_div.h>
|
2015-07-07 15:33:20 +01:00
|
|
|
#include <linux/rcupdate.h>
|
2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/slab.h>
|
|
|
|
|
2015-06-22 13:49:07 +01:00
|
|
|
#include <trace/events/sched.h>
|
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
#include "sched.h"
|
|
|
|
|
|
|
|
unsigned int sysctl_sched_cfs_boost __read_mostly;
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
/*
|
|
|
|
* System energy normalization constants
|
|
|
|
*/
|
|
|
|
static struct target_nrg {
|
|
|
|
unsigned long min_power;
|
|
|
|
unsigned long max_power;
|
|
|
|
struct reciprocal_value rdiv;
|
|
|
|
} schedtune_target_nrg;
|
|
|
|
|
|
|
|
/* Performance Boost region (B) threshold params */
|
|
|
|
static int perf_boost_idx;
|
|
|
|
|
|
|
|
/* Performance Constraint region (C) threshold params */
|
|
|
|
static int perf_constrain_idx;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Performance-Energy (P-E) Space thresholds constants
|
|
|
|
*/
|
|
|
|
struct threshold_params {
|
|
|
|
int nrg_gain;
|
|
|
|
int cap_gain;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* System specific P-E space thresholds constants
|
|
|
|
*/
|
|
|
|
static struct threshold_params
|
|
|
|
threshold_gains[] = {
|
|
|
|
{ 0, 4 }, /* >= 0% */
|
|
|
|
{ 0, 4 }, /* >= 10% */
|
|
|
|
{ 1, 4 }, /* >= 20% */
|
|
|
|
{ 2, 4 }, /* >= 30% */
|
|
|
|
{ 3, 4 }, /* >= 40% */
|
|
|
|
{ 4, 3 }, /* >= 50% */
|
|
|
|
{ 4, 2 }, /* >= 60% */
|
|
|
|
{ 4, 1 }, /* >= 70% */
|
|
|
|
{ 4, 0 }, /* >= 80% */
|
|
|
|
{ 4, 0 } /* >= 90% */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
__schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
int perf_boost_idx, int perf_constrain_idx)
|
|
|
|
{
|
|
|
|
int payoff = -INT_MAX;
|
|
|
|
|
|
|
|
/* Performance Boost (B) region */
|
|
|
|
if (nrg_delta > 0 && cap_delta > 0) {
|
|
|
|
/*
|
|
|
|
* Evaluate "Performance Boost" vs "Energy Increase"
|
|
|
|
* payoff criteria:
|
|
|
|
* cap_delta / nrg_delta < cap_gain / nrg_gain
|
|
|
|
* which is:
|
|
|
|
* nrg_delta * cap_gain > cap_delta * nrg_gain
|
|
|
|
*/
|
|
|
|
payoff = nrg_delta * threshold_gains[perf_boost_idx].cap_gain;
|
|
|
|
payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain;
|
2016-01-20 14:06:05 +00:00
|
|
|
|
|
|
|
trace_sched_tune_filter(
|
|
|
|
nrg_delta, cap_delta,
|
|
|
|
threshold_gains[perf_boost_idx].nrg_gain,
|
|
|
|
threshold_gains[perf_boost_idx].cap_gain,
|
|
|
|
payoff, 8);
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
return payoff;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Performance Constraint (C) region */
|
|
|
|
if (nrg_delta < 0 && cap_delta < 0) {
|
|
|
|
/*
|
|
|
|
* Evaluate "Performance Boost" vs "Energy Increase"
|
|
|
|
* payoff criteria:
|
|
|
|
* cap_delta / nrg_delta > cap_gain / nrg_gain
|
|
|
|
* which is:
|
|
|
|
* cap_delta * nrg_gain > nrg_delta * cap_gain
|
|
|
|
*/
|
|
|
|
payoff = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain;
|
|
|
|
payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain;
|
2016-01-20 14:06:05 +00:00
|
|
|
|
|
|
|
trace_sched_tune_filter(
|
|
|
|
nrg_delta, cap_delta,
|
|
|
|
threshold_gains[perf_constrain_idx].nrg_gain,
|
|
|
|
threshold_gains[perf_constrain_idx].cap_gain,
|
|
|
|
payoff, 6);
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
return payoff;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Default: reject schedule candidate */
|
|
|
|
return payoff;
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
#ifdef CONFIG_CGROUP_SCHEDTUNE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* EAS scheduler tunables for task groups.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* SchdTune tunables for a group of tasks */
|
|
|
|
struct schedtune {
|
|
|
|
/* SchedTune CGroup subsystem */
|
|
|
|
struct cgroup_subsys_state css;
|
|
|
|
|
|
|
|
/* Boost group allocated ID */
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/* Boost value for tasks on that SchedTune CGroup */
|
|
|
|
int boost;
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
/* Performance Boost (B) region threshold params */
|
|
|
|
int perf_boost_idx;
|
|
|
|
|
|
|
|
/* Performance Constraint (C) region threshold params */
|
|
|
|
int perf_constrain_idx;
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
return css ? container_of(css, struct schedtune, css) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct schedtune *task_schedtune(struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
return css_st(task_css(tsk, schedtune_cgrp_id));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct schedtune *parent_st(struct schedtune *st)
|
|
|
|
{
|
|
|
|
return css_st(st->css.parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SchedTune root control group
|
|
|
|
* The root control group is used to defined a system-wide boosting tuning,
|
|
|
|
* which is applied to all tasks in the system.
|
|
|
|
* Task specific boost tuning could be specified by creating and
|
|
|
|
* configuring a child control group under the root one.
|
|
|
|
* By default, system-wide boosting is disabled, i.e. no boosting is applied
|
|
|
|
* to tasks which are not into a child control group.
|
|
|
|
*/
|
|
|
|
static struct schedtune
|
|
|
|
root_schedtune = {
|
|
|
|
.boost = 0,
|
2016-01-12 18:12:13 +00:00
|
|
|
.perf_boost_idx = 0,
|
|
|
|
.perf_constrain_idx = 0,
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
int
|
|
|
|
schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
|
|
|
struct schedtune *ct;
|
|
|
|
int perf_boost_idx;
|
|
|
|
int perf_constrain_idx;
|
|
|
|
|
|
|
|
/* Optimal (O) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta < 0 && cap_delta > 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
|
2016-01-12 18:12:13 +00:00
|
|
|
return INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Suboptimal (S) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta > 0 && cap_delta < 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
|
2016-01-12 18:12:13 +00:00
|
|
|
return -INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Get task specific perf Boost/Constraints indexes */
|
|
|
|
rcu_read_lock();
|
|
|
|
ct = task_schedtune(task);
|
|
|
|
perf_boost_idx = ct->perf_boost_idx;
|
|
|
|
perf_constrain_idx = ct->perf_constrain_idx;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return __schedtune_accept_deltas(nrg_delta, cap_delta,
|
|
|
|
perf_boost_idx, perf_constrain_idx);
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/*
|
|
|
|
* Maximum number of boost groups to support
|
|
|
|
* When per-task boosting is used we still allow only limited number of
|
|
|
|
* boost groups for two main reasons:
|
|
|
|
* 1. on a real system we usually have only few classes of workloads which
|
|
|
|
* make sense to boost with different values (e.g. background vs foreground
|
|
|
|
* tasks, interactive vs low-priority tasks)
|
|
|
|
* 2. a limited number allows for a simpler and more memory/time efficient
|
|
|
|
* implementation especially for the computation of the per-CPU boost
|
|
|
|
* value
|
|
|
|
*/
|
|
|
|
#define BOOSTGROUPS_COUNT 4
|
|
|
|
|
|
|
|
/* Array of configured boostgroups */
|
|
|
|
static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
|
|
|
|
&root_schedtune,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* SchedTune boost groups
|
|
|
|
* Keep track of all the boost groups which impact on CPU, for example when a
|
|
|
|
* CPU has two RUNNABLE tasks belonging to two different boost groups and thus
|
|
|
|
* likely with different boost values.
|
|
|
|
* Since on each system we expect only a limited number of boost groups, here
|
|
|
|
* we use a simple array to keep track of the metrics required to compute the
|
|
|
|
* maximum per-CPU boosting value.
|
|
|
|
*/
|
|
|
|
struct boost_groups {
|
|
|
|
/* Maximum boost value for all RUNNABLE tasks on a CPU */
|
|
|
|
unsigned boost_max;
|
|
|
|
struct {
|
|
|
|
/* The boost for tasks on that boost group */
|
|
|
|
unsigned boost;
|
|
|
|
/* Count of RUNNABLE tasks on that boost group */
|
|
|
|
unsigned tasks;
|
|
|
|
} group[BOOSTGROUPS_COUNT];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Boost groups affecting each CPU in the system */
|
|
|
|
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
|
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
static void
|
|
|
|
schedtune_cpu_update(int cpu)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
unsigned boost_max;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
|
|
|
|
/* The root boost group is always active */
|
|
|
|
boost_max = bg->group[0].boost;
|
|
|
|
for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
|
|
|
|
/*
|
|
|
|
* A boost group affects a CPU only if it has
|
|
|
|
* RUNNABLE tasks on that CPU
|
|
|
|
*/
|
|
|
|
if (bg->group[idx].tasks == 0)
|
|
|
|
continue;
|
|
|
|
boost_max = max(boost_max, bg->group[idx].boost);
|
|
|
|
}
|
|
|
|
|
|
|
|
bg->boost_max = boost_max;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
schedtune_boostgroup_update(int idx, int boost)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int cur_boost_max;
|
|
|
|
int old_boost;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Update per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep track of current boost values to compute the per CPU
|
|
|
|
* maximum only when it has been affected by the new value of
|
|
|
|
* the updated boost group
|
|
|
|
*/
|
|
|
|
cur_boost_max = bg->boost_max;
|
|
|
|
old_boost = bg->group[idx].boost;
|
|
|
|
|
|
|
|
/* Update the boost value of this boost group */
|
|
|
|
bg->group[idx].boost = boost;
|
|
|
|
|
|
|
|
/* Check if this update increase current max */
|
|
|
|
if (boost > cur_boost_max && bg->group[idx].tasks) {
|
|
|
|
bg->boost_max = boost;
|
2015-06-24 15:36:08 +01:00
|
|
|
trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
|
2016-01-14 12:31:35 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if this update has decreased current max */
|
2015-06-24 15:36:08 +01:00
|
|
|
if (cur_boost_max == old_boost && old_boost > boost) {
|
2016-01-14 12:31:35 +00:00
|
|
|
schedtune_cpu_update(cpu);
|
2015-06-24 15:36:08 +01:00
|
|
|
trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
|
2016-01-14 12:31:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
static inline void
|
|
|
|
schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int tasks;
|
|
|
|
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
|
|
|
|
/* Update boosted tasks count while avoiding to make it negative */
|
|
|
|
if (task_count < 0 && bg->group[idx].tasks <= -task_count)
|
|
|
|
bg->group[idx].tasks = 0;
|
|
|
|
else
|
|
|
|
bg->group[idx].tasks += task_count;
|
|
|
|
|
|
|
|
/* Boost group activation or deactivation on that RQ */
|
|
|
|
tasks = bg->group[idx].tasks;
|
|
|
|
if (tasks == 1 || tasks == 0)
|
|
|
|
schedtune_cpu_update(cpu);
|
2015-06-24 15:36:08 +01:00
|
|
|
|
|
|
|
trace_sched_tune_tasks_update(p, cpu, tasks, idx,
|
|
|
|
bg->group[idx].boost, bg->boost_max);
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: This function must be called while holding the lock on the CPU RQ
|
|
|
|
*/
|
|
|
|
void schedtune_enqueue_task(struct task_struct *p, int cpu)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a task is marked PF_EXITING by do_exit() it's going to be
|
|
|
|
* dequeued and enqueued multiple times in the exit path.
|
|
|
|
* Thus we avoid any further update, since we do not want to change
|
|
|
|
* CPU boosting while the task is exiting.
|
|
|
|
*/
|
|
|
|
if (p->flags & PF_EXITING)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Get task boost group */
|
|
|
|
rcu_read_lock();
|
|
|
|
st = task_schedtune(p);
|
|
|
|
idx = st->idx;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
schedtune_tasks_update(p, cpu, idx, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: This function must be called while holding the lock on the CPU RQ
|
|
|
|
*/
|
|
|
|
void schedtune_dequeue_task(struct task_struct *p, int cpu)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a task is marked PF_EXITING by do_exit() it's going to be
|
|
|
|
* dequeued and enqueued multiple times in the exit path.
|
|
|
|
* Thus we avoid any further update, since we do not want to change
|
|
|
|
* CPU boosting while the task is exiting.
|
|
|
|
* The last dequeue will be done by cgroup exit() callback.
|
|
|
|
*/
|
|
|
|
if (p->flags & PF_EXITING)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Get task boost group */
|
|
|
|
rcu_read_lock();
|
|
|
|
st = task_schedtune(p);
|
|
|
|
idx = st->idx;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
schedtune_tasks_update(p, cpu, idx, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int schedtune_cpu_boost(int cpu)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
return bg->boost_max;
|
|
|
|
}
|
|
|
|
|
sched/fair: add boosted task utilization
The task utilization signal, which is derived from PELT signals and
properly scaled to be architecture and frequency invariant, is used by
EAS as an estimation of the task requirements in terms of CPU bandwidth.
When the energy aware scheduler is in use, this signal affects the CPU
selection. Thus, a convenient way to bias that decision, which is also
little intrusive, is to boost the task utilization signal each time it
is required to support them.
This patch introduces the new function:
boosted_task_util(task)
which returns a boosted value for the utilization of the specified task.
The margin added to the original utilization is:
1. computed based on the "boosting strategy" in use
2. proportional to boost value defined either by the sysctl interface,
when global boosting is in use, or the "taskgroup" value, when
per-task boosting is enabled.
The boosted signal is used by EAS
a. transparently, via its integration into the task_fits() function
b. explicitly, in the energy-aware wakeup path
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-01-14 18:31:53 +00:00
|
|
|
int schedtune_task_boost(struct task_struct *p)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int task_boost;
|
|
|
|
|
|
|
|
/* Get task boost value */
|
|
|
|
rcu_read_lock();
|
|
|
|
st = task_schedtune(p);
|
|
|
|
task_boost = st->boost;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return task_boost;
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
static u64
|
|
|
|
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->boost;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
|
|
|
u64 boost)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
if (boost < 0 || boost > 100)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
st->boost = boost;
|
|
|
|
if (css == &root_schedtune.css)
|
|
|
|
sysctl_sched_cfs_boost = boost;
|
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
/* Update CPU boost */
|
|
|
|
schedtune_boostgroup_update(st->idx, st->boost);
|
|
|
|
|
2015-06-22 13:49:07 +01:00
|
|
|
trace_sched_tune_config(st->boost);
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cftype files[] = {
|
|
|
|
{
|
|
|
|
.name = "boost",
|
|
|
|
.read_u64 = boost_read,
|
|
|
|
.write_u64 = boost_write,
|
|
|
|
},
|
|
|
|
{ } /* terminate */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
schedtune_boostgroup_init(struct schedtune *st)
|
|
|
|
{
|
2016-01-14 12:31:35 +00:00
|
|
|
struct boost_groups *bg;
|
|
|
|
int cpu;
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Keep track of allocated boost groups */
|
|
|
|
allocated_group[st->idx] = st;
|
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
/* Initialize the per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
bg->group[st->idx].boost = 0;
|
|
|
|
bg->group[st->idx].tasks = 0;
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
schedtune_init(void)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Initialize the per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
memset(bg, 0, sizeof(struct boost_groups));
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_info(" schedtune configured to support %d boost groups\n",
|
|
|
|
BOOSTGROUPS_COUNT);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cgroup_subsys_state *
|
|
|
|
schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
if (!parent_css) {
|
|
|
|
schedtune_init();
|
|
|
|
return &root_schedtune.css;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allow only single level hierachies */
|
|
|
|
if (parent_css != &root_schedtune.css) {
|
|
|
|
pr_err("Nested SchedTune boosting groups not allowed\n");
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allow only a limited number of boosting groups */
|
|
|
|
for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
|
|
|
|
if (!allocated_group[idx])
|
|
|
|
break;
|
|
|
|
if (idx == BOOSTGROUPS_COUNT) {
|
|
|
|
pr_err("Trying to create more than %d SchedTune boosting groups\n",
|
|
|
|
BOOSTGROUPS_COUNT);
|
|
|
|
return ERR_PTR(-ENOSPC);
|
|
|
|
}
|
|
|
|
|
|
|
|
st = kzalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Initialize per CPUs boost group support */
|
|
|
|
st->idx = idx;
|
|
|
|
if (schedtune_boostgroup_init(st))
|
|
|
|
goto release;
|
|
|
|
|
|
|
|
return &st->css;
|
|
|
|
|
|
|
|
release:
|
|
|
|
kfree(st);
|
|
|
|
out:
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
schedtune_boostgroup_release(struct schedtune *st)
|
|
|
|
{
|
2016-01-14 12:31:35 +00:00
|
|
|
/* Reset this boost group */
|
|
|
|
schedtune_boostgroup_update(st->idx, 0);
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Keep track of allocated boost groups */
|
|
|
|
allocated_group[st->idx] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
schedtune_css_free(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
schedtune_boostgroup_release(st);
|
|
|
|
kfree(st);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct cgroup_subsys schedtune_cgrp_subsys = {
|
|
|
|
.css_alloc = schedtune_css_alloc,
|
|
|
|
.css_free = schedtune_css_free,
|
|
|
|
.legacy_cftypes = files,
|
|
|
|
.early_init = 1,
|
|
|
|
};
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
#else /* CONFIG_CGROUP_SCHEDTUNE */
|
|
|
|
|
|
|
|
int
|
|
|
|
schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
|
|
|
/* Optimal (O) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta < 0 && cap_delta > 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
|
2016-01-12 18:12:13 +00:00
|
|
|
return INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Suboptimal (S) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta > 0 && cap_delta < 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
|
2016-01-12 18:12:13 +00:00
|
|
|
return -INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
return __schedtune_accept_deltas(nrg_delta, cap_delta,
|
|
|
|
perf_boost_idx, perf_constrain_idx);
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
#endif /* CONFIG_CGROUP_SCHEDTUNE */
|
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
int
|
|
|
|
sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp,
|
|
|
|
loff_t *ppos)
|
|
|
|
{
|
|
|
|
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
|
|
|
|
if (ret || !write)
|
|
|
|
return ret;
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
/* Performance Boost (B) region threshold params */
|
|
|
|
perf_boost_idx = sysctl_sched_cfs_boost;
|
|
|
|
perf_boost_idx /= 10;
|
|
|
|
|
|
|
|
/* Performance Constraint (C) region threshold params */
|
|
|
|
perf_constrain_idx = 100 - sysctl_sched_cfs_boost;
|
|
|
|
perf_constrain_idx /= 10;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* System energy normalization
|
|
|
|
* Returns the normalized value, in the range [0..SCHED_LOAD_SCALE],
|
|
|
|
* corresponding to the specified energy variation.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
schedtune_normalize_energy(int energy_diff)
|
|
|
|
{
|
|
|
|
u32 normalized_nrg;
|
|
|
|
int max_delta;
|
|
|
|
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
/* Check for boundaries */
|
|
|
|
max_delta = schedtune_target_nrg.max_power;
|
|
|
|
max_delta -= schedtune_target_nrg.min_power;
|
|
|
|
WARN_ON(abs(energy_diff) >= max_delta);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Do scaling using positive numbers to increase the range */
|
|
|
|
normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
|
|
|
|
|
|
|
|
/* Scale by energy magnitude */
|
|
|
|
normalized_nrg <<= SCHED_LOAD_SHIFT;
|
|
|
|
|
|
|
|
/* Normalize on max energy for target platform */
|
|
|
|
normalized_nrg = reciprocal_divide(
|
|
|
|
normalized_nrg, schedtune_target_nrg.rdiv);
|
|
|
|
|
|
|
|
return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
static void
|
|
|
|
schedtune_test_nrg(unsigned long delta_pwr)
|
|
|
|
{
|
|
|
|
unsigned long test_delta_pwr;
|
|
|
|
unsigned long test_norm_pwr;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check normalization constants using some constant system
|
|
|
|
* energy values
|
|
|
|
*/
|
|
|
|
pr_info("schedtune: verify normalization constants...\n");
|
|
|
|
for (idx = 0; idx < 6; ++idx) {
|
|
|
|
test_delta_pwr = delta_pwr >> idx;
|
|
|
|
|
|
|
|
/* Normalize on max energy for target platform */
|
|
|
|
test_norm_pwr = reciprocal_divide(
|
|
|
|
test_delta_pwr << SCHED_LOAD_SHIFT,
|
|
|
|
schedtune_target_nrg.rdiv);
|
|
|
|
|
|
|
|
pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
|
|
|
|
idx, test_delta_pwr, test_norm_pwr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define schedtune_test_nrg(delta_pwr)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the min/max power consumption of a cluster and all its CPUs
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
schedtune_add_cluster_nrg(
|
|
|
|
struct sched_domain *sd,
|
|
|
|
struct sched_group *sg,
|
|
|
|
struct target_nrg *ste)
|
|
|
|
{
|
|
|
|
struct sched_domain *sd2;
|
|
|
|
struct sched_group *sg2;
|
|
|
|
|
|
|
|
struct cpumask *cluster_cpus;
|
|
|
|
char str[32];
|
|
|
|
|
|
|
|
unsigned long min_pwr;
|
|
|
|
unsigned long max_pwr;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Get Cluster energy using EM data for the first CPU */
|
|
|
|
cluster_cpus = sched_group_cpus(sg);
|
|
|
|
snprintf(str, 32, "CLUSTER[%*pbl]",
|
|
|
|
cpumask_pr_args(cluster_cpus));
|
|
|
|
|
|
|
|
min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
|
|
|
|
max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
str, min_pwr, max_pwr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep track of this cluster's energy in the computation of the
|
|
|
|
* overall system energy
|
|
|
|
*/
|
|
|
|
ste->min_power += min_pwr;
|
|
|
|
ste->max_power += max_pwr;
|
|
|
|
|
|
|
|
/* Get CPU energy using EM data for each CPU in the group */
|
|
|
|
for_each_cpu(cpu, cluster_cpus) {
|
|
|
|
/* Get a SD view for the specific CPU */
|
|
|
|
for_each_domain(cpu, sd2) {
|
|
|
|
/* Get the CPU group */
|
|
|
|
sg2 = sd2->groups;
|
|
|
|
min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
|
|
|
|
max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
|
|
|
|
|
|
|
|
ste->min_power += min_pwr;
|
|
|
|
ste->max_power += max_pwr;
|
|
|
|
|
|
|
|
snprintf(str, 32, "CPU[%d]", cpu);
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
str, min_pwr, max_pwr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assume we have EM data only at the CPU and
|
|
|
|
* the upper CLUSTER level
|
|
|
|
*/
|
|
|
|
BUG_ON(!cpumask_equal(
|
|
|
|
sched_group_cpus(sg),
|
|
|
|
sched_group_cpus(sd2->parent->groups)
|
|
|
|
));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the constants required to compute normalized energy.
|
|
|
|
* The values of these constants depends on the EM data for the specific
|
|
|
|
* target system and topology.
|
|
|
|
* Thus, this function is expected to be called by the code
|
|
|
|
* that bind the EM to the topology information.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
schedtune_init_late(void)
|
|
|
|
{
|
|
|
|
struct target_nrg *ste = &schedtune_target_nrg;
|
|
|
|
unsigned long delta_pwr = 0;
|
|
|
|
struct sched_domain *sd;
|
|
|
|
struct sched_group *sg;
|
|
|
|
|
|
|
|
pr_info("schedtune: init normalization constants...\n");
|
|
|
|
ste->max_power = 0;
|
|
|
|
ste->min_power = 0;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When EAS is in use, we always have a pointer to the highest SD
|
|
|
|
* which provides EM data.
|
|
|
|
*/
|
|
|
|
sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
|
|
|
|
if (!sd) {
|
|
|
|
pr_info("schedtune: no energy model data\n");
|
|
|
|
goto nodata;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg = sd->groups;
|
|
|
|
do {
|
|
|
|
schedtune_add_cluster_nrg(sd, sg, ste);
|
|
|
|
} while (sg = sg->next, sg != sd->groups);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
"SYSTEM", ste->min_power, ste->max_power);
|
|
|
|
|
|
|
|
/* Compute normalization constants */
|
|
|
|
delta_pwr = ste->max_power - ste->min_power;
|
|
|
|
ste->rdiv = reciprocal_value(delta_pwr);
|
|
|
|
pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
|
|
|
|
ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
|
|
|
|
|
|
|
|
schedtune_test_nrg(delta_pwr);
|
2015-06-22 18:11:44 +01:00
|
|
|
return 0;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
nodata:
|
|
|
|
rcu_read_unlock();
|
|
|
|
return -EINVAL;
|
2015-06-22 18:11:44 +01:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
late_initcall(schedtune_init_late);
|