sched: Ensure proper task migration when a CPU is isolated

migrate_tasks() migrates all tasks of a CPU by using pick_next_task().
This works in the hotplug case as we force migrate every single task
allowing pick_next_task() to return a new task on every loop iteration.
In the case of isolation, however, task migration is not guaranteed
which causes pick_next_task() to keep returning the same task over and
over again until we terminate the loop without having migrated all the
tasks that were supposed to migrated.

Fix the above problem by temporarily dequeuing tasks that are pinned
and marking them with TASK_ON_RQ_MIGRATING. This not only allows
pick_next_task() to properly walk the runqueue but also prevents any
migrations or changes in affinity for the dequeued tasks. Once we are
done with migrating all possible tasks, we re-enqueue all the dequeued
tasks.

While at it, ensure consistent ordering between task de-activation and
setting the TASK_ON_RQ_MIGRATING flag across all scheduling classes.

Change-Id: Id06151a8e34edab49ac76b4bffd50c132f0b792f
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
This commit is contained in:
Syed Rameez Mustafa 2016-12-07 17:00:27 -08:00
parent 368fecd7df
commit 6e24ba90a2
2 changed files with 44 additions and 10 deletions

View file

@ -5454,6 +5454,37 @@ static struct task_struct fake_task = {
.sched_class = &fake_sched_class,
};
/*
* Remove a task from the runqueue and pretend that it's migrating. This
* should prevent migrations for the detached task and disallow further
* changes to tsk_cpus_allowed.
*/
static void
detach_one_task(struct task_struct *p, struct rq *rq, struct list_head *tasks)
{
lockdep_assert_held(&rq->lock);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, p, 0);
list_add(&p->se.group_node, tasks);
}
static void attach_tasks(struct list_head *tasks, struct rq *rq)
{
struct task_struct *p;
lockdep_assert_held(&rq->lock);
while (!list_empty(tasks)) {
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
BUG_ON(task_rq(p) != rq);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
}
}
/*
* Migrate all tasks (not pinned if pinned argument say so) from the rq,
* sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
@ -5468,6 +5499,7 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
struct task_struct *next, *stop = rq->stop;
int dest_cpu;
unsigned int num_pinned_kthreads = 1; /* this thread */
LIST_HEAD(tasks);
cpumask_t avail_cpus;
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
@ -5492,12 +5524,10 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
for (;;) {
/*
* There's this thread running + pinned threads, bail when
* that's the only remaining threads.
* There's this thread running, bail when that's the only
* remaining thread.
*/
if ((migrate_pinned_tasks && rq->nr_running == 1) ||
(!migrate_pinned_tasks &&
rq->nr_running <= num_pinned_kthreads))
if (rq->nr_running == 1)
break;
/*
@ -5510,8 +5540,9 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
!cpumask_intersects(&avail_cpus, &next->cpus_allowed)) {
lockdep_unpin_lock(&rq->lock);
detach_one_task(next, rq, &tasks);
num_pinned_kthreads += 1;
lockdep_unpin_lock(&rq->lock);
continue;
}
@ -5559,6 +5590,9 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
}
rq->stop = stop;
if (num_pinned_kthreads > 1)
attach_tasks(&tasks, rq);
}
static void set_rq_online(struct rq *rq);

View file

@ -1970,11 +1970,11 @@ retry:
goto retry;
}
deactivate_task(rq, next_task, 0);
next_task->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, lowest_rq->cpu);
next_task->on_rq = TASK_ON_RQ_QUEUED;
activate_task(lowest_rq, next_task, 0);
next_task->on_rq = TASK_ON_RQ_QUEUED;
ret = 1;
resched_curr(lowest_rq);
@ -2226,11 +2226,11 @@ static void pull_rt_task(struct rq *this_rq)
resched = true;
deactivate_task(src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(this_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
/*
* We continue with the search, just in
* case there's an even higher prio task