sched: add migration load change notifier for frequency guidance

When a task moves between CPUs in two different frequency domains
the cpufreq governor may wish to immediately modify the frequency
of both the source and destination CPUs of the migrating task.

A tunable is provided to establish what size task is considered
"significant" enough to warrant notifying cpufreq.

Also fix a bug that would cause load to not be accounted properly
during wakeup migrations.

Change-Id: Ie8f6b1cc4d43a602840dac18590b42a81327c95a
Signed-off-by: Steve Muckle <smuckle@codeaurora.org>
[rameezmustafa@codeaurora.org: Add double rq locking for set_task_cpu()]
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
This commit is contained in:
Steve Muckle 2014-05-06 18:05:50 -07:00 committed by David Keitel
parent e640249dba
commit f469bce8e2
6 changed files with 78 additions and 7 deletions

View file

@ -3171,6 +3171,8 @@ struct migration_notify_data {
int load;
};
extern struct atomic_notifier_head load_alert_notifier_head;
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

View file

@ -47,6 +47,8 @@ extern unsigned int sysctl_sched_window_stats_policy;
extern unsigned int sysctl_sched_init_task_load_pct;
#endif
extern unsigned int sysctl_sched_task_migrate_notify_pct;
#ifdef CONFIG_SCHED_HMP
extern unsigned int sysctl_sched_enable_hmp_task_placement;
extern unsigned int sysctl_sched_mostly_idle_nr_run;
@ -87,6 +89,9 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
loff_t *ppos);
#endif
extern int sched_migrate_notify_proc_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos);
extern int sched_hmp_proc_update_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos);

View file

@ -92,6 +92,7 @@
#include <trace/events/sched.h>
ATOMIC_NOTIFIER_HEAD(migration_notifier_head);
ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head);
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@ -1095,6 +1096,29 @@ unsigned int __read_mostly sched_use_pelt;
unsigned int max_possible_efficiency = 1024;
unsigned int min_possible_efficiency = 1024;
__read_mostly unsigned int sysctl_sched_task_migrate_notify_pct = 25;
unsigned int sched_task_migrate_notify;
int sched_migrate_notify_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
if (*data > 100)
return -EINVAL;
sched_task_migrate_notify = div64_u64((u64)*data *
(u64)max_task_load(), 100);
return 0;
}
/*
* Called when new window is starting for a task, to record cpu usage over
* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
@ -1687,21 +1711,46 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
perf_event_task_migrate(p);
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
if (p->on_rq) {
if (p->on_rq || p->state == TASK_WAKING) {
struct rq *src_rq = task_rq(p);
struct rq *dest_rq = cpu_rq(new_cpu);
p->on_rq = 0; /* Fixme */
update_task_ravg(p, task_rq(p), 0, sched_clock());
p->on_rq = 1; /* Fixme */
/* In the wakeup case the task has already had
* its statisics updated (and the RQ is not locked). */
if (p->state != TASK_WAKING) {
p->on_rq = 0; /* todo */
update_task_ravg(p, task_rq(p), 0,
sched_clock());
p->on_rq = 1; /* todo */
}
if (p->state == TASK_WAKING)
double_rq_lock(src_rq, dest_rq);
update_task_ravg(dest_rq->curr, dest_rq,
1, sched_clock());
1, sched_clock());
src_rq->curr_runnable_sum -= p->ravg.sum;
src_rq->prev_runnable_sum -= p->ravg.prev_window;
dest_rq->curr_runnable_sum += p->ravg.sum;
dest_rq->prev_runnable_sum += p->ravg.prev_window;
if (p->state == TASK_WAKING)
double_rq_unlock(src_rq, dest_rq);
/* Is p->ravg.prev_window significant? Trigger a load
alert notifier if so. */
if (p->ravg.prev_window > sched_task_migrate_notify &&
!cpumask_test_cpu(new_cpu,
&src_rq->freq_domain_cpumask)) {
atomic_notifier_call_chain(
&load_alert_notifier_head, 0,
(void *)(long)task_cpu(p));
atomic_notifier_call_chain(
&load_alert_notifier_head, 0,
(void *)(long)new_cpu);
}
}
#endif
@ -7899,6 +7948,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
return 0;
for_each_cpu(i, policy->related_cpus) {
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
policy->related_cpus);
cpu_rq(i)->min_freq = policy->min;
cpu_rq(i)->max_freq = policy->max;
cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;

View file

@ -2551,7 +2551,7 @@ static inline unsigned int task_load(struct task_struct *p)
return p->ravg.demand;
}
static inline unsigned int max_task_load(void)
unsigned int max_task_load(void)
{
if (sched_use_pelt)
return LOAD_AVG_MAX;
@ -6442,7 +6442,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
double_lock_balance(env->src_rq, env->dst_rq);
set_task_cpu(p, env->dst_cpu);
double_unlock_balance(env->src_rq, env->dst_rq);
}
/*

View file

@ -650,6 +650,8 @@ struct rq {
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
struct cpumask freq_domain_cpumask;
u64 cumulative_runnable_avg;
int efficiency; /* Differentiate cpus with different IPC capability */
int load_scale_factor;
@ -961,7 +963,7 @@ static inline u64 scale_task_load(u64 load, int cpu)
return load;
}
#endif
unsigned int max_task_load(void);
static inline void
inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)

View file

@ -292,6 +292,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHED_FREQ_INPUT
{
.procname = "sched_task_migrate_notify",
.data = &sysctl_sched_task_migrate_notify_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_migrate_notify_proc_handler,
},
#endif
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
{
.procname = "sched_window_stats_policy",