sched: Ensure proper task migration when a CPU is isolated

migrate_tasks() migrates all tasks of a CPU by using pick_next_task(). This works in the hotplug case as we force migrate every single task allowing pick_next_task() to return a new task on every loop iteration. In the case of isolation, however, task migration is not guaranteed which causes pick_next_task() to keep returning the same task over and over again until we terminate the loop without having migrated all the tasks that were supposed to migrated. Fix the above problem by temporarily dequeuing tasks that are pinned and marking them with TASK_ON_RQ_MIGRATING. This not only allows pick_next_task() to properly walk the runqueue but also prevents any migrations or changes in affinity for the dequeued tasks. Once we are done with migrating all possible tasks, we re-enqueue all the dequeued tasks. While at it, ensure consistent ordering between task de-activation and setting the TASK_ON_RQ_MIGRATING flag across all scheduling classes. Change-Id: Id06151a8e34edab49ac76b4bffd50c132f0b792f Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
2016-12-07 17:00:27 -08:00 · 2016-12-07 17:00:27 -08:00 · 6e24ba90a2
commit 6e24ba90a2
parent 368fecd7df
2 changed files with 44 additions and 10 deletions
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -5454,6 +5454,37 @@ static struct task_struct fake_task = {
 	.sched_class = &fake_sched_class,
 };

+/*
+ * Remove a task from the runqueue and pretend that it's migrating. This
+ * should prevent migrations for the detached task and disallow further
+ * changes to tsk_cpus_allowed.
+ */
+static void
+detach_one_task(struct task_struct *p, struct rq *rq, struct list_head *tasks)
+{
+	lockdep_assert_held(&rq->lock);
+
+	p->on_rq = TASK_ON_RQ_MIGRATING;
+	deactivate_task(rq, p, 0);
+	list_add(&p->se.group_node, tasks);
+}
+
+static void attach_tasks(struct list_head *tasks, struct rq *rq)
+{
+	struct task_struct *p;
+
+	lockdep_assert_held(&rq->lock);
+
+	while (!list_empty(tasks)) {
+		p = list_first_entry(tasks, struct task_struct, se.group_node);
+		list_del_init(&p->se.group_node);
+
+		BUG_ON(task_rq(p) != rq);
+		activate_task(rq, p, 0);
+		p->on_rq = TASK_ON_RQ_QUEUED;
+	}
+}
+
 /*
 * Migrate all tasks (not pinned if pinned argument say so) from the rq,
 * sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
@ -5468,6 +5499,7 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
 	struct task_struct *next, *stop = rq->stop;
 	int dest_cpu;
 	unsigned int num_pinned_kthreads = 1; /* this thread */
+	LIST_HEAD(tasks);
 	cpumask_t avail_cpus;

 	cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
@ -5492,12 +5524,10 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)

 	for (;;) {
 		/*
-		 * There's this thread running + pinned threads, bail when
-		 * that's the only remaining threads.
+		 * There's this thread running, bail when that's the only
+		 * remaining thread.
 		 */
-		if ((migrate_pinned_tasks && rq->nr_running == 1) ||
-		   (!migrate_pinned_tasks &&
-		    rq->nr_running <= num_pinned_kthreads))
+		if (rq->nr_running == 1)
 			break;

 		/*
@ -5510,8 +5540,9 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)

 		if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
 			!cpumask_intersects(&avail_cpus, &next->cpus_allowed)) {
-			lockdep_unpin_lock(&rq->lock);
+			detach_one_task(next, rq, &tasks);
 			num_pinned_kthreads += 1;
+			lockdep_unpin_lock(&rq->lock);
 			continue;
 		}

@ -5559,6 +5590,9 @@ static void migrate_tasks(struct rq *dead_rq, bool migrate_pinned_tasks)
 	}

 	rq->stop = stop;
+
+	if (num_pinned_kthreads > 1)
+		attach_tasks(&tasks, rq);
 }

 static void set_rq_online(struct rq *rq);
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@ -1970,11 +1970,11 @@ retry:
 		goto retry;
 	}

-	deactivate_task(rq, next_task, 0);
 	next_task->on_rq = TASK_ON_RQ_MIGRATING;
+	deactivate_task(rq, next_task, 0);
 	set_task_cpu(next_task, lowest_rq->cpu);
-	next_task->on_rq = TASK_ON_RQ_QUEUED;
 	activate_task(lowest_rq, next_task, 0);
+	next_task->on_rq = TASK_ON_RQ_QUEUED;
 	ret = 1;

 	resched_curr(lowest_rq);
@ -2226,11 +2226,11 @@ static void pull_rt_task(struct rq *this_rq)

 			resched = true;

-			deactivate_task(src_rq, p, 0);
 			p->on_rq = TASK_ON_RQ_MIGRATING;
+			deactivate_task(src_rq, p, 0);
 			set_task_cpu(p, this_cpu);
-			p->on_rq = TASK_ON_RQ_QUEUED;
 			activate_task(this_rq, p, 0);
+			p->on_rq = TASK_ON_RQ_QUEUED;
 			/*
 			 * We continue with the search, just in
 			 * case there's an even higher prio task