sched/fair: add tunable to force selection at cpu granularity
EAS assumes that clusters with smaller capacity cores are more energy-efficient. This may not be true on non-big-little devices, so EAS can make incorrect cluster selections when finding a CPU to wake. The "sched_is_big_little" hint can be used to cause a cpu-based selection instead of cluster-based selection. This change incorporates the addition of the sync hint enable patch EAS did not honour synchronous wakeup hints, a new sysctl is created to ask EAS to use this information when selecting a CPU. The control is called "sched_sync_hint_enable". Also contains: EAS: sched/fair: for SMP bias toward idle core with capacity For SMP devices, on wakeup bias towards idle cores that have capacity vs busy devices that need a higher OPP eas: favor idle cpus for boosted tasks BUG: 29533997 BUG: 29512132 Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c Signed-off-by: Juri Lelli <juri.lelli@arm.com> Signed-off-by: Srinath Sridharan <srinathsr@google.com> eas/sched/fair: Favoring busy cpus with low OPPs BUG: 29533997 BUG: 29512132 Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79 Signed-off-by: Juri Lelli <juri.lelli@arm.com>
This commit is contained in:
parent
2e9abbc942
commit
4a5e890ec6
3 changed files with 167 additions and 40 deletions
|
@ -39,6 +39,8 @@ extern unsigned int sysctl_sched_latency;
|
|||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_is_big_little;
|
||||
extern unsigned int sysctl_sched_sync_hint_enable;
|
||||
extern unsigned int sysctl_sched_cstate_aware;
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
|
|
|
@ -51,7 +51,10 @@
|
|||
unsigned int sysctl_sched_latency = 6000000ULL;
|
||||
unsigned int normalized_sysctl_sched_latency = 6000000ULL;
|
||||
|
||||
unsigned int sysctl_sched_is_big_little = 0;
|
||||
unsigned int sysctl_sched_sync_hint_enable = 1;
|
||||
unsigned int sysctl_sched_cstate_aware = 1;
|
||||
|
||||
/*
|
||||
* The initial- and re-scaling of tunables is configurable
|
||||
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
||||
|
@ -5534,7 +5537,97 @@ done:
|
|||
return target;
|
||||
}
|
||||
|
||||
static int energy_aware_wake_cpu(struct task_struct *p, int target)
|
||||
static inline int find_best_target(struct task_struct *p)
|
||||
{
|
||||
int i, boosted;
|
||||
int target_cpu = -1;
|
||||
int target_capacity = 0;
|
||||
int backup_capacity = 0;
|
||||
int idle_cpu = -1;
|
||||
int best_idle_cstate = INT_MAX;
|
||||
int backup_cpu = -1;
|
||||
unsigned long task_util_boosted, new_util;
|
||||
|
||||
/*
|
||||
* Favor 1) busy cpu with most capacity at current OPP
|
||||
* 2) idle_cpu with capacity at current OPP
|
||||
* 3) busy cpu with capacity at higher OPP
|
||||
*/
|
||||
#ifdef CONFIG_CGROUP_SCHEDTUNE
|
||||
boosted = schedtune_task_boost(p);
|
||||
#else
|
||||
boosted = 0;
|
||||
#endif
|
||||
task_util_boosted = boosted_task_util(p);
|
||||
for_each_cpu(i, tsk_cpus_allowed(p)) {
|
||||
int cur_capacity = capacity_curr_of(i);
|
||||
struct rq *rq = cpu_rq(i);
|
||||
int idle_idx = idle_get_state_idx(rq);
|
||||
|
||||
/*
|
||||
* p's blocked utilization is still accounted for on prev_cpu
|
||||
* so prev_cpu will receive a negative bias due to the double
|
||||
* accounting. However, the blocked utilization may be zero.
|
||||
*/
|
||||
new_util = cpu_util(i) + task_util_boosted;
|
||||
|
||||
/*
|
||||
* Ensure minimum capacity to grant the required boost.
|
||||
* The target CPU can be already at a capacity level higher
|
||||
* than the one required to boost the task.
|
||||
*/
|
||||
|
||||
if (new_util > capacity_orig_of(i))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* For boosted tasks we favor idle cpus unconditionally to
|
||||
* improve latency.
|
||||
*/
|
||||
if (idle_idx >= 0 && boosted) {
|
||||
if (idle_cpu < 0 ||
|
||||
(sysctl_sched_cstate_aware &&
|
||||
best_idle_cstate > idle_idx)) {
|
||||
best_idle_cstate = idle_idx;
|
||||
idle_cpu = i;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (new_util < cur_capacity) {
|
||||
if (cpu_rq(i)->nr_running) {
|
||||
if (target_capacity == 0 ||
|
||||
target_capacity > cur_capacity) {
|
||||
/* busy CPU with most capacity at current OPP */
|
||||
target_cpu = i;
|
||||
target_capacity = cur_capacity;
|
||||
}
|
||||
} else if (!boosted) {
|
||||
if (idle_cpu < 0 ||
|
||||
(sysctl_sched_cstate_aware &&
|
||||
best_idle_cstate > idle_idx)) {
|
||||
best_idle_cstate = idle_idx;
|
||||
idle_cpu = i;
|
||||
}
|
||||
}
|
||||
} else if (backup_capacity == 0 ||
|
||||
backup_capacity > cur_capacity) {
|
||||
/* first busy CPU with capacity at higher OPP */
|
||||
backup_capacity = cur_capacity;
|
||||
backup_cpu = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (!boosted && target_cpu < 0) {
|
||||
target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
|
||||
}
|
||||
|
||||
if (boosted && idle_cpu >= 0)
|
||||
target_cpu = idle_cpu;
|
||||
return target_cpu;
|
||||
}
|
||||
|
||||
static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
struct sched_group *sg, *sg_target;
|
||||
|
@ -5542,6 +5635,14 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
|
|||
int target_cpu = task_cpu(p);
|
||||
int i;
|
||||
|
||||
if (sysctl_sched_sync_hint_enable && sync) {
|
||||
int cpu = smp_processor_id();
|
||||
cpumask_t search_cpus;
|
||||
cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
|
||||
if (cpumask_test_cpu(cpu, &search_cpus))
|
||||
return cpu;
|
||||
}
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
|
||||
|
||||
if (!sd)
|
||||
|
@ -5550,50 +5651,60 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target)
|
|||
sg = sd->groups;
|
||||
sg_target = sg;
|
||||
|
||||
/*
|
||||
* Find group with sufficient capacity. We only get here if no cpu is
|
||||
* overutilized. We may end up overutilizing a cpu by adding the task,
|
||||
* but that should not be any worse than select_idle_sibling().
|
||||
* load_balance() should sort it out later as we get above the tipping
|
||||
* point.
|
||||
*/
|
||||
do {
|
||||
/* Assuming all cpus are the same in group */
|
||||
int max_cap_cpu = group_first_cpu(sg);
|
||||
if (sysctl_sched_is_big_little) {
|
||||
|
||||
/*
|
||||
* Assume smaller max capacity means more energy-efficient.
|
||||
* Ideally we should query the energy model for the right
|
||||
* answer but it easily ends up in an exhaustive search.
|
||||
* Find group with sufficient capacity. We only get here if no cpu is
|
||||
* overutilized. We may end up overutilizing a cpu by adding the task,
|
||||
* but that should not be any worse than select_idle_sibling().
|
||||
* load_balance() should sort it out later as we get above the tipping
|
||||
* point.
|
||||
*/
|
||||
if (capacity_of(max_cap_cpu) < target_max_cap &&
|
||||
task_fits_max(p, max_cap_cpu)) {
|
||||
sg_target = sg;
|
||||
target_max_cap = capacity_of(max_cap_cpu);
|
||||
}
|
||||
} while (sg = sg->next, sg != sd->groups);
|
||||
do {
|
||||
/* Assuming all cpus are the same in group */
|
||||
int max_cap_cpu = group_first_cpu(sg);
|
||||
|
||||
/* Find cpu with sufficient capacity */
|
||||
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
|
||||
/*
|
||||
* Assume smaller max capacity means more energy-efficient.
|
||||
* Ideally we should query the energy model for the right
|
||||
* answer but it easily ends up in an exhaustive search.
|
||||
*/
|
||||
if (capacity_of(max_cap_cpu) < target_max_cap &&
|
||||
task_fits_max(p, max_cap_cpu)) {
|
||||
sg_target = sg;
|
||||
target_max_cap = capacity_of(max_cap_cpu);
|
||||
}
|
||||
} while (sg = sg->next, sg != sd->groups);
|
||||
|
||||
/* Find cpu with sufficient capacity */
|
||||
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
|
||||
/*
|
||||
* p's blocked utilization is still accounted for on prev_cpu
|
||||
* so prev_cpu will receive a negative bias due to the double
|
||||
* accounting. However, the blocked utilization may be zero.
|
||||
*/
|
||||
int new_util = cpu_util(i) + boosted_task_util(p);
|
||||
|
||||
if (new_util > capacity_orig_of(i))
|
||||
continue;
|
||||
|
||||
if (new_util < capacity_curr_of(i)) {
|
||||
target_cpu = i;
|
||||
if (cpu_rq(i)->nr_running)
|
||||
break;
|
||||
}
|
||||
|
||||
/* cpu has capacity at higher OPP, keep it as fallback */
|
||||
if (target_cpu == task_cpu(p))
|
||||
target_cpu = i;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* p's blocked utilization is still accounted for on prev_cpu
|
||||
* so prev_cpu will receive a negative bias due to the double
|
||||
* accounting. However, the blocked utilization may be zero.
|
||||
* Find a cpu with sufficient capacity
|
||||
*/
|
||||
int new_util = cpu_util(i) + boosted_task_util(p);
|
||||
|
||||
if (new_util > capacity_orig_of(i))
|
||||
continue;
|
||||
|
||||
if (new_util < capacity_curr_of(i)) {
|
||||
target_cpu = i;
|
||||
if (cpu_rq(i)->nr_running)
|
||||
break;
|
||||
}
|
||||
|
||||
/* cpu has capacity at higher OPP, keep it as fallback */
|
||||
if (target_cpu == task_cpu(p))
|
||||
target_cpu = i;
|
||||
int tmp_target = find_best_target(p);
|
||||
if (tmp_target >= 0)
|
||||
target_cpu = tmp_target;
|
||||
}
|
||||
|
||||
if (target_cpu != task_cpu(p)) {
|
||||
|
@ -5670,7 +5781,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|||
|
||||
if (!sd) {
|
||||
if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
|
||||
new_cpu = energy_aware_wake_cpu(p, prev_cpu);
|
||||
new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
|
||||
else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
|
||||
new_cpu = select_idle_sibling(p, new_cpu);
|
||||
|
||||
|
|
|
@ -303,6 +303,20 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &min_sched_granularity_ns,
|
||||
.extra2 = &max_sched_granularity_ns,
|
||||
},
|
||||
{
|
||||
.procname = "sched_is_big_little",
|
||||
.data = &sysctl_sched_is_big_little,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_sync_hint_enable",
|
||||
.data = &sysctl_sched_sync_hint_enable,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_cstate_aware",
|
||||
.data = &sysctl_sched_cstate_aware,
|
||||
|
|
Loading…
Add table
Reference in a new issue