sched: Avoid frequent migration of running task
Power values for cpus can drop quite considerably when it goes idle. As a result, the best choice for running a single task in a cluster can vary quite rapidly. As the task keeps hopping cpus, other cpus go idle and start being seen as more favorable target for running a task, leading to task migrating almost every scheduler tick! Prevent this by keeping track of when a task started running on a cpu and allowing task migration in tick path (migration_needed()) on account of energy efficiency reasons only if the task has run sufficiently long (as determined by sysctl_sched_min_runtime variable). Note that currently sysctl_sched_min_runtime setting is considered only in scheduler_tick()->migration_needed() path and not in idle_balance() path. In other words, a task could be migrated to another cpu which did a idle_balance(). This limitation should not affect high-frequency migrations seen typically (when a single high-demand task runs on high-performance cpu). CRs-Fixed: 756570 Change-Id: I96413b7a81b623193c3bbcec6f3fa9dfec367d99 Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> [joonwoop@codeaurora.org: fixed conflict in set_task_cpu() and __schedule().] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
parent
d1b240ccc7
commit
29a412dffa
6 changed files with 49 additions and 0 deletions
|
@ -1250,6 +1250,24 @@ Non-small tasks will prefer to wake up on idle CPUs if this tunable is set to 1.
|
|||
If the tunable is set to 0, non-small tasks will prefer to wake up on mostly
|
||||
idle CPUs which are not completely idle, increasing task packing behavior.
|
||||
|
||||
** 7.24 sched_min_runtime
|
||||
|
||||
Appears at: /proc/sys/kernel/sched_min_runtime
|
||||
|
||||
Default value: 200000000 (200ms)
|
||||
|
||||
This tunable helps avouid frequent migration of task on account of
|
||||
energy-awareness. During scheduler tick, a check is made (in migration_needed())
|
||||
whether the running task needs to be migrated to a "better" cpu, which could
|
||||
either offer better performance or power. When deciding to migrate task on
|
||||
account of power, we want to avoid "frequent" migration of task (say every
|
||||
tick), which could be add more overhead for comparatively little gains. A task's
|
||||
'run_start' attribute is set when it starts running on a cpu. This information
|
||||
is used in migration_needed() to avoid "frequent" migrations. Once a task has
|
||||
been associated with a cpu (in either running or runnable state) for more than
|
||||
'sched_min_vruntime' ns, it is considered eligible for migration in tick path on
|
||||
account of energy awareness reasons.
|
||||
|
||||
=========================
|
||||
8. HMP SCHEDULER TRACE POINTS
|
||||
=========================
|
||||
|
|
|
@ -1453,6 +1453,7 @@ struct task_struct {
|
|||
* of this task
|
||||
*/
|
||||
u32 init_load_pct;
|
||||
u64 run_start;
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *sched_task_group;
|
||||
|
|
|
@ -48,6 +48,7 @@ extern unsigned int sysctl_sched_cpu_high_irqload;
|
|||
extern unsigned int sysctl_sched_freq_account_wait_time;
|
||||
extern unsigned int sysctl_sched_migration_fixup;
|
||||
extern unsigned int sysctl_sched_heavy_task_pct;
|
||||
extern unsigned int sysctl_sched_min_runtime;
|
||||
|
||||
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
|
||||
extern unsigned int sysctl_sched_init_task_load_pct;
|
||||
|
|
|
@ -2538,6 +2538,16 @@ static void restore_orig_mark_start(struct task_struct *p, u64 mark_start)
|
|||
p->ravg.mark_start = mark_start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note down when task started running on a cpu. This information will be handy
|
||||
* to avoid "too" frequent task migrations for a running task on account of
|
||||
* power.
|
||||
*/
|
||||
static inline void note_run_start(struct task_struct *p, u64 wallclock)
|
||||
{
|
||||
p->run_start = wallclock;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
|
||||
|
@ -2569,6 +2579,8 @@ restore_orig_mark_start(struct task_struct *p, u64 mark_start)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void note_run_start(struct task_struct *p, u64 wallclock) { }
|
||||
|
||||
#endif /* CONFIG_SCHED_HMP */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -2834,6 +2846,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
|||
|
||||
trace_sched_migrate_task(p, new_cpu, pct_task_load(p));
|
||||
|
||||
note_run_start(p, -1);
|
||||
|
||||
if (task_cpu(p) != new_cpu) {
|
||||
if (p->sched_class->migrate_task_rq)
|
||||
p->sched_class->migrate_task_rq(p);
|
||||
|
@ -4772,6 +4786,7 @@ static void __sched notrace __schedule(bool preempt)
|
|||
prev->state = TASK_RUNNING;
|
||||
} else {
|
||||
deactivate_task(rq, prev, DEQUEUE_SLEEP);
|
||||
note_run_start(prev, -1);
|
||||
prev->on_rq = 0;
|
||||
|
||||
/*
|
||||
|
@ -4800,6 +4815,7 @@ static void __sched notrace __schedule(bool preempt)
|
|||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
rq->clock_skip_update = 0;
|
||||
note_run_start(next, wallclock);
|
||||
|
||||
BUG_ON(task_cpu(next) != cpu_of(rq));
|
||||
|
||||
|
|
|
@ -2543,6 +2543,8 @@ unsigned int __read_mostly sched_init_task_load_pelt;
|
|||
unsigned int __read_mostly sched_init_task_load_windows;
|
||||
unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
|
||||
|
||||
unsigned int __read_mostly sysctl_sched_min_runtime = 200000000; /* 200 ms */
|
||||
|
||||
static inline unsigned int task_load(struct task_struct *p)
|
||||
{
|
||||
if (sched_use_pelt)
|
||||
|
@ -3602,6 +3604,10 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu)
|
|||
int i;
|
||||
int lowest_power_cpu = task_cpu(p);
|
||||
int lowest_power = power_cost(p, task_cpu(p));
|
||||
u64 delta = sched_clock() - p->run_start;
|
||||
|
||||
if (delta < sysctl_sched_min_runtime)
|
||||
return 0;
|
||||
|
||||
/* Is a lower-powered idle CPU available which will fit this task? */
|
||||
for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) {
|
||||
|
|
|
@ -372,6 +372,13 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = sched_hmp_proc_update_handler,
|
||||
},
|
||||
{
|
||||
.procname = "sched_min_runtime",
|
||||
.data = &sysctl_sched_min_runtime,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_spill_load",
|
||||
.data = &sysctl_sched_spill_load_pct,
|
||||
|
|
Loading…
Add table
Reference in a new issue