diff --git a/include/linux/sched.h b/include/linux/sched.h index 763eb0312130..2d2a94575eaa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3171,6 +3171,8 @@ struct migration_notify_data { int load; }; +extern struct atomic_notifier_head load_alert_notifier_head; + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 66a978ca7a65..b65ee06f80c9 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -47,6 +47,8 @@ extern unsigned int sysctl_sched_window_stats_policy; extern unsigned int sysctl_sched_init_task_load_pct; #endif +extern unsigned int sysctl_sched_task_migrate_notify_pct; + #ifdef CONFIG_SCHED_HMP extern unsigned int sysctl_sched_enable_hmp_task_placement; extern unsigned int sysctl_sched_mostly_idle_nr_run; @@ -87,6 +89,9 @@ int sched_proc_update_handler(struct ctl_table *table, int write, loff_t *ppos); #endif +extern int sched_migrate_notify_proc_handler(struct ctl_table *table, + int write, void __user *buffer, size_t *lenp, loff_t *ppos); + extern int sched_hmp_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2b7d83fbd90..a96e2225755a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -92,6 +92,7 @@ #include ATOMIC_NOTIFIER_HEAD(migration_notifier_head); +ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head); DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -1095,6 +1096,29 @@ unsigned int __read_mostly sched_use_pelt; unsigned int max_possible_efficiency = 1024; unsigned int min_possible_efficiency = 1024; +__read_mostly unsigned int sysctl_sched_task_migrate_notify_pct = 25; +unsigned int sched_task_migrate_notify; + +int sched_migrate_notify_proc_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + unsigned int *data = (unsigned int *)table->data; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + if (*data > 100) + return -EINVAL; + + sched_task_migrate_notify = div64_u64((u64)*data * + (u64)max_task_load(), 100); + + return 0; +} + /* * Called when new window is starting for a task, to record cpu usage over * recently concluded window(s). Normally 'samples' should be 1. It can be > 1 @@ -1687,21 +1711,46 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) perf_event_task_migrate(p); #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) - if (p->on_rq) { + if (p->on_rq || p->state == TASK_WAKING) { struct rq *src_rq = task_rq(p); struct rq *dest_rq = cpu_rq(new_cpu); - p->on_rq = 0; /* Fixme */ - update_task_ravg(p, task_rq(p), 0, sched_clock()); - p->on_rq = 1; /* Fixme */ + /* In the wakeup case the task has already had + * its statisics updated (and the RQ is not locked). */ + if (p->state != TASK_WAKING) { + p->on_rq = 0; /* todo */ + update_task_ravg(p, task_rq(p), 0, + sched_clock()); + p->on_rq = 1; /* todo */ + } + + if (p->state == TASK_WAKING) + double_rq_lock(src_rq, dest_rq); + update_task_ravg(dest_rq->curr, dest_rq, - 1, sched_clock()); + 1, sched_clock()); src_rq->curr_runnable_sum -= p->ravg.sum; src_rq->prev_runnable_sum -= p->ravg.prev_window; dest_rq->curr_runnable_sum += p->ravg.sum; dest_rq->prev_runnable_sum += p->ravg.prev_window; + + if (p->state == TASK_WAKING) + double_rq_unlock(src_rq, dest_rq); + + /* Is p->ravg.prev_window significant? Trigger a load + alert notifier if so. */ + if (p->ravg.prev_window > sched_task_migrate_notify && + !cpumask_test_cpu(new_cpu, + &src_rq->freq_domain_cpumask)) { + atomic_notifier_call_chain( + &load_alert_notifier_head, 0, + (void *)(long)task_cpu(p)); + atomic_notifier_call_chain( + &load_alert_notifier_head, 0, + (void *)(long)new_cpu); + } } #endif @@ -7899,6 +7948,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, return 0; for_each_cpu(i, policy->related_cpus) { + cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, + policy->related_cpus); cpu_rq(i)->min_freq = policy->min; cpu_rq(i)->max_freq = policy->max; cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b04af1c436cc..6fe51274c748 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2551,7 +2551,7 @@ static inline unsigned int task_load(struct task_struct *p) return p->ravg.demand; } -static inline unsigned int max_task_load(void) +unsigned int max_task_load(void) { if (sched_use_pelt) return LOAD_AVG_MAX; @@ -6442,7 +6442,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env) deactivate_task(env->src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; + double_lock_balance(env->src_rq, env->dst_rq); set_task_cpu(p, env->dst_cpu); + double_unlock_balance(env->src_rq, env->dst_rq); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c5d593ba30f2..a0d35bbc2626 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,6 +650,8 @@ struct rq { * max_possible_freq = maximum supported by hardware */ unsigned int cur_freq, max_freq, min_freq, max_possible_freq; + struct cpumask freq_domain_cpumask; + u64 cumulative_runnable_avg; int efficiency; /* Differentiate cpus with different IPC capability */ int load_scale_factor; @@ -961,7 +963,7 @@ static inline u64 scale_task_load(u64 load, int cpu) return load; } #endif - +unsigned int max_task_load(void); static inline void inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 06fe2f6591e7..4560a50a4558 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -292,6 +292,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_FREQ_INPUT + { + .procname = "sched_task_migrate_notify", + .data = &sysctl_sched_task_migrate_notify_pct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_migrate_notify_proc_handler, + }, +#endif #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) { .procname = "sched_window_stats_policy",