sched: Fix race condition with active balance

There is a race condition between checking for whether an active load
balance request has been set and clearing the request. A cpu might have
an active load balance request set and queued but not executed yet.
Before the load balance request is executed the request flag might be
cleared by cpu isolation. Then subsequently the load balancer or tick
might try to do another active load balance.  This can cause the same
active load balance work to be queued twice causing report of list
corruption.

Fix this by moving the clearing of the request to the stopper thread and
ensuring that load balance will not try to queue a request on an
already isolated cpu.

Change-Id: I5c900d2ee161fa692d66e3e66012398869715662
Signed-off-by: Olav Haugan <ohaugan@codeaurora.org>
This commit is contained in:
Olav Haugan 2016-11-01 17:30:36 -07:00
parent 85d7e134cc
commit 411a978bce
2 changed files with 31 additions and 10 deletions

View file

@ -1912,7 +1912,7 @@ void scheduler_ipi(void)
/*
* Check if someone kicked us for doing the nohz idle load balance.
*/
if (unlikely(got_nohz_idle_kick())) {
if (unlikely(got_nohz_idle_kick()) && !cpu_isolated(cpu)) {
this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ);
}
@ -5570,7 +5570,6 @@ static void set_rq_offline(struct rq *rq);
int do_isolation_work_cpu_stop(void *data)
{
unsigned long flags;
unsigned int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
@ -5578,9 +5577,12 @@ int do_isolation_work_cpu_stop(void *data)
irq_migrate_all_off_this_cpu();
local_irq_disable();
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
raw_spin_lock(&rq->lock);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@ -5588,13 +5590,16 @@ int do_isolation_work_cpu_stop(void *data)
}
migrate_tasks(rq, false);
raw_spin_unlock_irqrestore(&rq->lock, flags);
raw_spin_unlock(&rq->lock);
/*
* We might have been in tickless state. Clear NOHZ flags to avoid
* us being kicked for helping out with balancing
*/
nohz_balance_clear_nohz_mask(cpu);
clear_hmp_request(cpu);
local_irq_enable();
return 0;
}
@ -5703,7 +5708,6 @@ int sched_isolate_cpu(int cpu)
migrate_sync_cpu(cpu, cpumask_first(&avail_cpus));
stop_cpus(cpumask_of(cpu), do_isolation_work_cpu_stop, 0);
clear_hmp_request(cpu);
calc_load_migrate(rq);
update_max_interval();
sched_update_group_capacities(cpu);

View file

@ -8121,8 +8121,11 @@ static struct rq *find_busiest_queue_hmp(struct lb_env *env,
int max_nr_big = 0, nr_big;
bool find_big = !!(env->flags & LBF_BIG_TASK_ACTIVE_BALANCE);
int i;
cpumask_t cpus;
for_each_cpu(i, sched_group_cpus(group)) {
cpumask_andnot(&cpus, sched_group_cpus(group), cpu_isolated_mask);
for_each_cpu(i, &cpus) {
struct rq *rq = cpu_rq(i);
u64 cumulative_runnable_avg =
rq->hmp_stats.cumulative_runnable_avg;
@ -8285,6 +8288,15 @@ static int need_active_balance(struct lb_env *env)
sd->cache_nice_tries + NEED_ACTIVE_BALANCE_THRESHOLD);
}
static int group_balance_cpu_not_isolated(struct sched_group *sg)
{
cpumask_t cpus;
cpumask_and(&cpus, sched_group_cpus(sg), sched_group_mask(sg));
cpumask_andnot(&cpus, &cpus, cpu_isolated_mask);
return cpumask_first(&cpus);
}
static int should_we_balance(struct lb_env *env)
{
struct sched_group *sg = env->sd->groups;
@ -8302,7 +8314,8 @@ static int should_we_balance(struct lb_env *env)
sg_mask = sched_group_mask(sg);
/* Try to find first idle cpu */
for_each_cpu_and(cpu, sg_cpus, env->cpus) {
if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu) ||
cpu_isolated(cpu))
continue;
balance_cpu = cpu;
@ -8310,7 +8323,7 @@ static int should_we_balance(struct lb_env *env)
}
if (balance_cpu == -1)
balance_cpu = group_balance_cpu(sg);
balance_cpu = group_balance_cpu_not_isolated(sg);
/*
* First idle cpu or the first cpu(busiest) in this sched group
@ -8530,7 +8543,8 @@ no_move:
* ->active_balance_work. Once set, it's cleared
* only after active load balance is finished.
*/
if (!busiest->active_balance) {
if (!busiest->active_balance &&
!cpu_isolated(cpu_of(busiest))) {
busiest->active_balance = 1;
busiest->push_cpu = this_cpu;
active_balance = 1;
@ -9198,12 +9212,15 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
/* Earliest time when we have to do rebalance again */
unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0;
cpumask_t cpus;
if (idle != CPU_IDLE ||
!test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
goto end;
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
cpumask_andnot(&cpus, nohz.idle_cpus_mask, cpu_isolated_mask);
for_each_cpu(balance_cpu, &cpus) {
if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
continue;