sched: add migration load change notifier for frequency guidance

When a task moves between CPUs in two different frequency domains
the cpufreq governor may wish to immediately modify the frequency
of both the source and destination CPUs of the migrating task.

A tunable is provided to establish what size task is considered
"significant" enough to warrant notifying cpufreq.

Also fix a bug that would cause load to not be accounted properly
during wakeup migrations.

Change-Id: Ie8f6b1cc4d43a602840dac18590b42a81327c95a
Signed-off-by: Steve Muckle <smuckle@codeaurora.org>
[rameezmustafa@codeaurora.org: Add double rq locking for set_task_cpu()]
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
This commit is contained in:
Steve Muckle 2014-05-06 18:05:50 -07:00 committed by David Keitel
parent e640249dba
commit f469bce8e2
6 changed files with 78 additions and 7 deletions

View file

@ -3171,6 +3171,8 @@ struct migration_notify_data {
int load; int load;
}; };
extern struct atomic_notifier_head load_alert_notifier_head;
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

View file

@ -47,6 +47,8 @@ extern unsigned int sysctl_sched_window_stats_policy;
extern unsigned int sysctl_sched_init_task_load_pct; extern unsigned int sysctl_sched_init_task_load_pct;
#endif #endif
extern unsigned int sysctl_sched_task_migrate_notify_pct;
#ifdef CONFIG_SCHED_HMP #ifdef CONFIG_SCHED_HMP
extern unsigned int sysctl_sched_enable_hmp_task_placement; extern unsigned int sysctl_sched_enable_hmp_task_placement;
extern unsigned int sysctl_sched_mostly_idle_nr_run; extern unsigned int sysctl_sched_mostly_idle_nr_run;
@ -87,6 +89,9 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
loff_t *ppos); loff_t *ppos);
#endif #endif
extern int sched_migrate_notify_proc_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos);
extern int sched_hmp_proc_update_handler(struct ctl_table *table, extern int sched_hmp_proc_update_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos); int write, void __user *buffer, size_t *lenp, loff_t *ppos);

View file

@ -92,6 +92,7 @@
#include <trace/events/sched.h> #include <trace/events/sched.h>
ATOMIC_NOTIFIER_HEAD(migration_notifier_head); ATOMIC_NOTIFIER_HEAD(migration_notifier_head);
ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head);
DEFINE_MUTEX(sched_domains_mutex); DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@ -1095,6 +1096,29 @@ unsigned int __read_mostly sched_use_pelt;
unsigned int max_possible_efficiency = 1024; unsigned int max_possible_efficiency = 1024;
unsigned int min_possible_efficiency = 1024; unsigned int min_possible_efficiency = 1024;
__read_mostly unsigned int sysctl_sched_task_migrate_notify_pct = 25;
unsigned int sched_task_migrate_notify;
int sched_migrate_notify_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
if (*data > 100)
return -EINVAL;
sched_task_migrate_notify = div64_u64((u64)*data *
(u64)max_task_load(), 100);
return 0;
}
/* /*
* Called when new window is starting for a task, to record cpu usage over * Called when new window is starting for a task, to record cpu usage over
* recently concluded window(s). Normally 'samples' should be 1. It can be > 1 * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
@ -1687,21 +1711,46 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
perf_event_task_migrate(p); perf_event_task_migrate(p);
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
if (p->on_rq) { if (p->on_rq || p->state == TASK_WAKING) {
struct rq *src_rq = task_rq(p); struct rq *src_rq = task_rq(p);
struct rq *dest_rq = cpu_rq(new_cpu); struct rq *dest_rq = cpu_rq(new_cpu);
p->on_rq = 0; /* Fixme */ /* In the wakeup case the task has already had
update_task_ravg(p, task_rq(p), 0, sched_clock()); * its statisics updated (and the RQ is not locked). */
p->on_rq = 1; /* Fixme */ if (p->state != TASK_WAKING) {
p->on_rq = 0; /* todo */
update_task_ravg(p, task_rq(p), 0,
sched_clock());
p->on_rq = 1; /* todo */
}
if (p->state == TASK_WAKING)
double_rq_lock(src_rq, dest_rq);
update_task_ravg(dest_rq->curr, dest_rq, update_task_ravg(dest_rq->curr, dest_rq,
1, sched_clock()); 1, sched_clock());
src_rq->curr_runnable_sum -= p->ravg.sum; src_rq->curr_runnable_sum -= p->ravg.sum;
src_rq->prev_runnable_sum -= p->ravg.prev_window; src_rq->prev_runnable_sum -= p->ravg.prev_window;
dest_rq->curr_runnable_sum += p->ravg.sum; dest_rq->curr_runnable_sum += p->ravg.sum;
dest_rq->prev_runnable_sum += p->ravg.prev_window; dest_rq->prev_runnable_sum += p->ravg.prev_window;
if (p->state == TASK_WAKING)
double_rq_unlock(src_rq, dest_rq);
/* Is p->ravg.prev_window significant? Trigger a load
alert notifier if so. */
if (p->ravg.prev_window > sched_task_migrate_notify &&
!cpumask_test_cpu(new_cpu,
&src_rq->freq_domain_cpumask)) {
atomic_notifier_call_chain(
&load_alert_notifier_head, 0,
(void *)(long)task_cpu(p));
atomic_notifier_call_chain(
&load_alert_notifier_head, 0,
(void *)(long)new_cpu);
}
} }
#endif #endif
@ -7899,6 +7948,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
return 0; return 0;
for_each_cpu(i, policy->related_cpus) { for_each_cpu(i, policy->related_cpus) {
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
policy->related_cpus);
cpu_rq(i)->min_freq = policy->min; cpu_rq(i)->min_freq = policy->min;
cpu_rq(i)->max_freq = policy->max; cpu_rq(i)->max_freq = policy->max;
cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;

View file

@ -2551,7 +2551,7 @@ static inline unsigned int task_load(struct task_struct *p)
return p->ravg.demand; return p->ravg.demand;
} }
static inline unsigned int max_task_load(void) unsigned int max_task_load(void)
{ {
if (sched_use_pelt) if (sched_use_pelt)
return LOAD_AVG_MAX; return LOAD_AVG_MAX;
@ -6442,7 +6442,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
deactivate_task(env->src_rq, p, 0); deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING; p->on_rq = TASK_ON_RQ_MIGRATING;
double_lock_balance(env->src_rq, env->dst_rq);
set_task_cpu(p, env->dst_cpu); set_task_cpu(p, env->dst_cpu);
double_unlock_balance(env->src_rq, env->dst_rq);
} }
/* /*

View file

@ -650,6 +650,8 @@ struct rq {
* max_possible_freq = maximum supported by hardware * max_possible_freq = maximum supported by hardware
*/ */
unsigned int cur_freq, max_freq, min_freq, max_possible_freq; unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
struct cpumask freq_domain_cpumask;
u64 cumulative_runnable_avg; u64 cumulative_runnable_avg;
int efficiency; /* Differentiate cpus with different IPC capability */ int efficiency; /* Differentiate cpus with different IPC capability */
int load_scale_factor; int load_scale_factor;
@ -961,7 +963,7 @@ static inline u64 scale_task_load(u64 load, int cpu)
return load; return load;
} }
#endif #endif
unsigned int max_task_load(void);
static inline void static inline void
inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)

View file

@ -292,6 +292,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
#ifdef CONFIG_SCHED_FREQ_INPUT
{
.procname = "sched_task_migrate_notify",
.data = &sysctl_sched_task_migrate_notify_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_migrate_notify_proc_handler,
},
#endif
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
{ {
.procname = "sched_window_stats_policy", .procname = "sched_window_stats_policy",