sched/fair: add tunable to force selection at cpu granularity

EAS assumes that clusters with smaller capacity cores are more
energy-efficient. This may not be true on non-big-little devices,
so EAS can make incorrect cluster selections when finding a CPU
to wake. The "sched_is_big_little" hint can be used to cause a
cpu-based selection instead of cluster-based selection.

This change incorporates the addition of the sync hint enable patch

EAS did not honour synchronous wakeup hints, a new sysctl is
created to ask EAS to use this information when selecting a CPU.
The control is called "sched_sync_hint_enable".

Also contains:

EAS: sched/fair: for SMP bias toward idle core with capacity

For SMP devices, on wakeup bias towards idle cores that have capacity
vs busy devices that need a higher OPP

eas: favor idle cpus for boosted tasks

BUG: 29533997
BUG: 29512132
Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>

eas/sched/fair: Favoring busy cpus with low OPPs

BUG: 29533997
BUG: 29512132
Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
This commit is contained in:
Juri Lelli 2016-07-29 14:04:11 +01:00 committed by John Stultz
parent 2e9abbc942
commit 4a5e890ec6
3 changed files with 167 additions and 40 deletions

View file

@ -39,6 +39,8 @@ extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_is_big_little;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_cstate_aware;
enum sched_tunable_scaling {

View file

@ -51,7 +51,10 @@
unsigned int sysctl_sched_latency = 6000000ULL;
unsigned int normalized_sysctl_sched_latency = 6000000ULL;
unsigned int sysctl_sched_is_big_little = 0;
unsigned int sysctl_sched_sync_hint_enable = 1;
unsigned int sysctl_sched_cstate_aware = 1;
/*
* The initial- and re-scaling of tunables is configurable
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@ -5534,7 +5537,97 @@ done:
return target;
}
static int energy_aware_wake_cpu(struct task_struct *p, int target)
static inline int find_best_target(struct task_struct *p)
{
int i, boosted;
int target_cpu = -1;
int target_capacity = 0;
int backup_capacity = 0;
int idle_cpu = -1;
int best_idle_cstate = INT_MAX;
int backup_cpu = -1;
unsigned long task_util_boosted, new_util;
/*
* Favor 1) busy cpu with most capacity at current OPP
* 2) idle_cpu with capacity at current OPP
* 3) busy cpu with capacity at higher OPP
*/
#ifdef CONFIG_CGROUP_SCHEDTUNE
boosted = schedtune_task_boost(p);
#else
boosted = 0;
#endif
task_util_boosted = boosted_task_util(p);
for_each_cpu(i, tsk_cpus_allowed(p)) {
int cur_capacity = capacity_curr_of(i);
struct rq *rq = cpu_rq(i);
int idle_idx = idle_get_state_idx(rq);
/*
* p's blocked utilization is still accounted for on prev_cpu
* so prev_cpu will receive a negative bias due to the double
* accounting. However, the blocked utilization may be zero.
*/
new_util = cpu_util(i) + task_util_boosted;
/*
* Ensure minimum capacity to grant the required boost.
* The target CPU can be already at a capacity level higher
* than the one required to boost the task.
*/
if (new_util > capacity_orig_of(i))
continue;
/*
* For boosted tasks we favor idle cpus unconditionally to
* improve latency.
*/
if (idle_idx >= 0 && boosted) {
if (idle_cpu < 0 ||
(sysctl_sched_cstate_aware &&
best_idle_cstate > idle_idx)) {
best_idle_cstate = idle_idx;
idle_cpu = i;
}
continue;
}
if (new_util < cur_capacity) {
if (cpu_rq(i)->nr_running) {
if (target_capacity == 0 ||
target_capacity > cur_capacity) {
/* busy CPU with most capacity at current OPP */
target_cpu = i;
target_capacity = cur_capacity;
}
} else if (!boosted) {
if (idle_cpu < 0 ||
(sysctl_sched_cstate_aware &&
best_idle_cstate > idle_idx)) {
best_idle_cstate = idle_idx;
idle_cpu = i;
}
}
} else if (backup_capacity == 0 ||
backup_capacity > cur_capacity) {
/* first busy CPU with capacity at higher OPP */
backup_capacity = cur_capacity;
backup_cpu = i;
}
}
if (!boosted && target_cpu < 0) {
target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
}
if (boosted && idle_cpu >= 0)
target_cpu = idle_cpu;
return target_cpu;
}
static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
{
struct sched_domain *sd;
struct sched_group *sg, *sg_target;
@ -5542,6 +5635,14 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
int target_cpu = task_cpu(p);
int i;
if (sysctl_sched_sync_hint_enable && sync) {
int cpu = smp_processor_id();
cpumask_t search_cpus;
cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
if (cpumask_test_cpu(cpu, &search_cpus))
return cpu;
}
sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
if (!sd)
@ -5550,50 +5651,60 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
sg = sd->groups;
sg_target = sg;
/*
* Find group with sufficient capacity. We only get here if no cpu is
* overutilized. We may end up overutilizing a cpu by adding the task,
* but that should not be any worse than select_idle_sibling().
* load_balance() should sort it out later as we get above the tipping
* point.
*/
do {
/* Assuming all cpus are the same in group */
int max_cap_cpu = group_first_cpu(sg);
if (sysctl_sched_is_big_little) {
/*
* Assume smaller max capacity means more energy-efficient.
* Ideally we should query the energy model for the right
* answer but it easily ends up in an exhaustive search.
* Find group with sufficient capacity. We only get here if no cpu is
* overutilized. We may end up overutilizing a cpu by adding the task,
* but that should not be any worse than select_idle_sibling().
* load_balance() should sort it out later as we get above the tipping
* point.
*/
if (capacity_of(max_cap_cpu) < target_max_cap &&
task_fits_max(p, max_cap_cpu)) {
sg_target = sg;
target_max_cap = capacity_of(max_cap_cpu);
}
} while (sg = sg->next, sg != sd->groups);
do {
/* Assuming all cpus are the same in group */
int max_cap_cpu = group_first_cpu(sg);
/* Find cpu with sufficient capacity */
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
/*
* Assume smaller max capacity means more energy-efficient.
* Ideally we should query the energy model for the right
* answer but it easily ends up in an exhaustive search.
*/
if (capacity_of(max_cap_cpu) < target_max_cap &&
task_fits_max(p, max_cap_cpu)) {
sg_target = sg;
target_max_cap = capacity_of(max_cap_cpu);
}
} while (sg = sg->next, sg != sd->groups);
/* Find cpu with sufficient capacity */
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
/*
* p's blocked utilization is still accounted for on prev_cpu
* so prev_cpu will receive a negative bias due to the double
* accounting. However, the blocked utilization may be zero.
*/
int new_util = cpu_util(i) + boosted_task_util(p);
if (new_util > capacity_orig_of(i))
continue;
if (new_util < capacity_curr_of(i)) {
target_cpu = i;
if (cpu_rq(i)->nr_running)
break;
}
/* cpu has capacity at higher OPP, keep it as fallback */
if (target_cpu == task_cpu(p))
target_cpu = i;
}
} else {
/*
* p's blocked utilization is still accounted for on prev_cpu
* so prev_cpu will receive a negative bias due to the double
* accounting. However, the blocked utilization may be zero.
* Find a cpu with sufficient capacity
*/
int new_util = cpu_util(i) + boosted_task_util(p);
if (new_util > capacity_orig_of(i))
continue;
if (new_util < capacity_curr_of(i)) {
target_cpu = i;
if (cpu_rq(i)->nr_running)
break;
}
/* cpu has capacity at higher OPP, keep it as fallback */
if (target_cpu == task_cpu(p))
target_cpu = i;
int tmp_target = find_best_target(p);
if (tmp_target >= 0)
target_cpu = tmp_target;
}
if (target_cpu != task_cpu(p)) {
@ -5670,7 +5781,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (!sd) {
if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
new_cpu = energy_aware_wake_cpu(p, prev_cpu);
new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
new_cpu = select_idle_sibling(p, new_cpu);

View file

@ -303,6 +303,20 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
.procname = "sched_is_big_little",
.data = &sysctl_sched_is_big_little,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_sync_hint_enable",
.data = &sysctl_sched_sync_hint_enable,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_cstate_aware",
.data = &sysctl_sched_cstate_aware,