UPSTREAM: sched/core: Fix group_entity's share update

The update of the share of a cfs_rq is done when its load_avg is updated
but before the group_entity's load_avg has been updated for the past time
slot. This generates wrong load_avg accounting which can be significant
when small tasks are involved in the scheduling.

Let take the example of a task a that is dequeued of its task group A:
   root
  (cfs_rq)
    \
    (se)
     A
    (cfs_rq)
      \
      (se)
       a

Task "a" was the only task in task group A which becomes idle when a is
dequeued.

We have the sequence:

- dequeue_entity a->se
    - update_load_avg(a->se)
    - dequeue_entity_load_avg(A->cfs_rq, a->se)
    - update_cfs_shares(A->cfs_rq)
	A->cfs_rq->load.weight == 0
        A->se->load.weight is updated with the new share (0 in this case)
- dequeue_entity A->se
    - update_load_avg(A->se) but its weight is now null so the last time
      slot (up to a tick) will be accounted with a weight of 0 instead of
      its real weight during the time slot. The last time slot will be
      accounted as an idle one whereas it was a running one.

If the running time of task a is short enough that no tick happens when it
runs, all running time of group entity A->se will be accounted as idle
time.

Instead, we should update the share of a cfs_rq (in fact the weight of its
group entity) only after having updated the load_avg of the group_entity.

update_cfs_shares() now takes the sched_entity as a parameter instead of the
cfs_rq, and the weight of the group_entity is updated only once its load_avg
has been synced with current time.

Change-Id: Id6ce3be1767b44b444ce2a77ed1ba063e57c0664
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: pjt@google.com
Link: http://lkml.kernel.org/r/1482335426-7664-1-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 89ee048f3cc796db6f26906c6bef4edf0bee70fd)
[minor cherry pick stuff]
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
This commit is contained in:
Vincent Guittot 2016-12-21 16:50:26 +01:00 committed by Andres Oportus
parent baaa21b59b
commit e62a1ca36b

View file

@ -2583,16 +2583,20 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
static void update_cfs_shares(struct cfs_rq *cfs_rq) static void update_cfs_shares(struct sched_entity *se)
{ {
struct cfs_rq *cfs_rq = group_cfs_rq(se);
struct task_group *tg; struct task_group *tg;
struct sched_entity *se;
long shares; long shares;
tg = cfs_rq->tg; if (!cfs_rq)
se = tg->se[cpu_of(rq_of(cfs_rq))];
if (!se || throttled_hierarchy(cfs_rq))
return; return;
if (throttled_hierarchy(cfs_rq))
return;
tg = cfs_rq->tg;
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
if (likely(se->load.weight == tg->shares)) if (likely(se->load.weight == tg->shares))
return; return;
@ -2601,8 +2605,9 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
reweight_entity(cfs_rq_of(se), se, shares); reweight_entity(cfs_rq_of(se), se, shares);
} }
#else /* CONFIG_FAIR_GROUP_SCHED */ #else /* CONFIG_FAIR_GROUP_SCHED */
static inline void update_cfs_shares(struct cfs_rq *cfs_rq) static inline void update_cfs_shares(struct sched_entity *se)
{ {
} }
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
@ -3499,8 +3504,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_curr(cfs_rq); update_curr(cfs_rq);
update_load_avg(se, UPDATE_TG); update_load_avg(se, UPDATE_TG);
enqueue_entity_load_avg(cfs_rq, se); enqueue_entity_load_avg(cfs_rq, se);
update_cfs_shares(se);
account_entity_enqueue(cfs_rq, se); account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) { if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0); place_entity(cfs_rq, se, 0);
@ -3573,6 +3578,15 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* Update run-time statistics of the 'current'. * Update run-time statistics of the 'current'.
*/ */
update_curr(cfs_rq); update_curr(cfs_rq);
/*
* When dequeuing a sched_entity, we must:
* - Update loads to have both entity and cfs_rq synced with now.
* - Substract its load from the cfs_rq->runnable_avg.
* - Substract its previous weight from cfs_rq->load.weight.
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
update_load_avg(se, UPDATE_TG); update_load_avg(se, UPDATE_TG);
dequeue_entity_load_avg(cfs_rq, se); dequeue_entity_load_avg(cfs_rq, se);
@ -3609,7 +3623,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
return_cfs_rq_runtime(cfs_rq); return_cfs_rq_runtime(cfs_rq);
update_min_vruntime(cfs_rq); update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq); update_cfs_shares(se);
} }
/* /*
@ -3781,7 +3795,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* Ensure that runnable average is periodically updated. * Ensure that runnable average is periodically updated.
*/ */
update_load_avg(curr, UPDATE_TG); update_load_avg(curr, UPDATE_TG);
update_cfs_shares(cfs_rq); update_cfs_shares(curr);
#ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SCHED_HRTICK
/* /*
@ -4710,7 +4724,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break; break;
update_load_avg(se, UPDATE_TG); update_load_avg(se, UPDATE_TG);
update_cfs_shares(cfs_rq); update_cfs_shares(se);
} }
if (!se) if (!se)
@ -4812,7 +4826,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break; break;
update_load_avg(se, UPDATE_TG); update_load_avg(se, UPDATE_TG);
update_cfs_shares(cfs_rq); update_cfs_shares(se);
} }
if (!se) if (!se)
@ -9920,8 +9934,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/* Possible calls to update_curr() need rq clock */ /* Possible calls to update_curr() need rq clock */
update_rq_clock(rq); update_rq_clock(rq);
for_each_sched_entity(se) for_each_sched_entity(se) {
update_cfs_shares(group_cfs_rq(se)); update_load_avg(se, UPDATE_TG);
update_cfs_shares(se);
}
raw_spin_unlock_irqrestore(&rq->lock, flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
} }