UPSTREAM: sched/fair: Propagate load during synchronous attach/detach · e875665411 - evie/android_kernel_oneplus_msm8998 - Gay Catgirls Forgejo: gay catgirls having sex

evie/android_kernel_oneplus_msm8998

UPSTREAM: sched/fair: Propagate load during synchronous attach/detach

When a task moves from/to a cfs_rq, we set a flag which is then used to
propagate the change at parent level (sched_entity and cfs_rq) during
next update. If the cfs_rq is throttled, the flag will stay pending until
the cfs_rq is unthrottled.

For propagating the utilization, we copy the utilization of group cfs_rq to
the sched_entity.

For propagating the load, we have to take into account the load of the
whole task group in order to evaluate the load of the sched_entity.
Similarly to what was done before the rewrite of PELT, we add a correction
factor in case the task group's load is greater than its share so it will
contribute the same load of a task of equal weight.

Change-Id: Id34a9888484716961c9027299c0b4d82881a39d1
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Morten.Rasmussen@arm.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bsegall@google.com
Cc: kernellwp@gmail.com
Cc: pjt@google.com
Cc: yuyang.du@intel.com
Link: http://lkml.kernel.org/r/1478598827-32372-5-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 09a43ace1f986b003c118fdf6ddf1fd685692d49)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>

This commit is contained in:

Vincent Guittot

2016-11-08 10:53:45 +01:00

• committed by

Andres Oportus

parent 8370e07d82

commit e875665411

2 changed files with 240 additions and 1 deletions

									
										240

kernel/sched/fair.c
									
										View file
										
				@ -2828,6 +2828,26 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,

					return decayed;

				}

				/*

				 * Signed add and clamp on underflow.

				 *

				 * Explicitly do a load-store to ensure the intermediate value never hits

				 * memory. This allows lockless observations without ever seeing the negative

				 * values.

				 */

				#define add_positive(_ptr, _val) do {                           \

					typeof(_ptr) ptr = (_ptr);                              \

					typeof(_val) val = (_val);                              \

					typeof(*ptr) res, var = READ_ONCE(*ptr);                \

												\

					res = var + val;                                        \

												\

					if (val < 0 && res > var)                               \

						res = 0;                                        \

												\

					WRITE_ONCE(*ptr, res);                                  \

				} while (0)

				#ifdef CONFIG_FAIR_GROUP_SCHED

				/**

				 * update_tg_load_avg - update the tg's load avg

				@ -2849,14 +2869,196 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)

				{

					long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;

					/*

					 * No need to update load_avg for root_task_group as it is not used.

					 */

					if (cfs_rq->tg == &root_task_group)

						return;

					if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {

						atomic_long_add(delta, &cfs_rq->tg->load_avg);

						cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;

					}

				}

				/*

				 * Called within set_task_rq() right before setting a task's cpu. The

				 * caller only guarantees p->pi_lock is held; no other assumptions,

				 * including the state of rq->lock, should be made.

				 */

				void set_task_rq_fair(struct sched_entity *se,

						      struct cfs_rq *prev, struct cfs_rq *next)

				{

					if (!sched_feat(ATTACH_AGE_LOAD))

						return;

					/*

					 * We are supposed to update the task to "current" time, then its up to

					 * date and ready to go to new CPU/cfs_rq. But we have difficulty in

					 * getting what current time is, so simply throw away the out-of-date

					 * time. This will result in the wakee task is less decayed, but giving

					 * the wakee more load sounds not bad.

					 */

					if (se->avg.last_update_time && prev) {

						u64 p_last_update_time;

						u64 n_last_update_time;

				#ifndef CONFIG_64BIT

						u64 p_last_update_time_copy;

						u64 n_last_update_time_copy;

						do {

							p_last_update_time_copy = prev->load_last_update_time_copy;

							n_last_update_time_copy = next->load_last_update_time_copy;

							smp_rmb();

							p_last_update_time = prev->avg.last_update_time;

							n_last_update_time = next->avg.last_update_time;

						} while (p_last_update_time != p_last_update_time_copy ||

							 n_last_update_time != n_last_update_time_copy);

				#else

						p_last_update_time = prev->avg.last_update_time;

						n_last_update_time = next->avg.last_update_time;

				#endif

						__update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),

								  &se->avg, 0, 0, NULL);

						se->avg.last_update_time = n_last_update_time;

					}

				}

				/* Take into account change of utilization of a child task group */

				static inline void

				update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)

				{

					struct cfs_rq *gcfs_rq = group_cfs_rq(se);

					long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;

					/* Nothing to update */

					if (!delta)

						return;

					/* Set new sched_entity's utilization */

					se->avg.util_avg = gcfs_rq->avg.util_avg;

					se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;

					/* Update parent cfs_rq utilization */

					add_positive(&cfs_rq->avg.util_avg, delta);

					cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;

				}

				/* Take into account change of load of a child task group */

				static inline void

				update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)

				{

					struct cfs_rq *gcfs_rq = group_cfs_rq(se);

					long delta, load = gcfs_rq->avg.load_avg;

					/*

					 * If the load of group cfs_rq is null, the load of the

					 * sched_entity will also be null so we can skip the formula

					 */

					if (load) {

						long tg_load;

						/* Get tg's load and ensure tg_load > 0 */

						tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;

						/* Ensure tg_load >= load and updated with current load*/

						tg_load -= gcfs_rq->tg_load_avg_contrib;

						tg_load += load;

						/*

						 * We need to compute a correction term in the case that the

						 * task group is consuming more CPU than a task of equal

						 * weight. A task with a weight equals to tg->shares will have

						 * a load less or equal to scale_load_down(tg->shares).

						 * Similarly, the sched_entities that represent the task group

						 * at parent level, can't have a load higher than

						 * scale_load_down(tg->shares). And the Sum of sched_entities'

						 * load must be <= scale_load_down(tg->shares).

						 */

						if (tg_load > scale_load_down(gcfs_rq->tg->shares)) {

							/* scale gcfs_rq's load into tg's shares*/

							load *= scale_load_down(gcfs_rq->tg->shares);

							load /= tg_load;

						}

					}

					delta = load - se->avg.load_avg;

					/* Nothing to update */

					if (!delta)

						return;

					/* Set new sched_entity's load */

					se->avg.load_avg = load;

					se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;

					/* Update parent cfs_rq load */

					add_positive(&cfs_rq->avg.load_avg, delta);

					cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;

					/*

					 * If the sched_entity is already enqueued, we also have to update the

					 * runnable load avg.

					 */

					if (se->on_rq) {

						/* Update parent cfs_rq runnable_load_avg */

						add_positive(&cfs_rq->runnable_load_avg, delta);

						cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX;

					}

				}

				static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)

				{

					cfs_rq->propagate_avg = 1;

				}

				static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)

				{

					struct cfs_rq *cfs_rq = group_cfs_rq(se);

					if (!cfs_rq->propagate_avg)

						return 0;

					cfs_rq->propagate_avg = 0;

					return 1;

				}

				/* Update task and its cfs_rq load average */

				static inline int propagate_entity_load_avg(struct sched_entity *se)

				{

					struct cfs_rq *cfs_rq;

					if (entity_is_task(se))

						return 0;

					if (!test_and_clear_tg_cfs_propagate(se))

						return 0;

					cfs_rq = cfs_rq_of(se);

					set_tg_cfs_propagate(cfs_rq);

					update_tg_cfs_util(cfs_rq, se);

					update_tg_cfs_load(cfs_rq, se);

					return 1;

				}

				#else /* CONFIG_FAIR_GROUP_SCHED */

				static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}

				static inline int propagate_entity_load_avg(struct sched_entity *se)

				{

					return 0;

				}

				static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}

				#endif /* CONFIG_FAIR_GROUP_SCHED */

				static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)

				@ -2968,6 +3170,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags)

					struct cfs_rq *cfs_rq = cfs_rq_of(se);

					u64 now = cfs_rq_clock_task(cfs_rq);

					int cpu = cpu_of(rq_of(cfs_rq));

					int decayed;

					/*

					 * Track task load average for carrying it to new CPU after migrated, and

				@ -2979,7 +3182,10 @@ static inline void update_load_avg(struct sched_entity *se, int flags)

							  cfs_rq->curr == se, NULL);

					}

					if (update_cfs_rq_load_avg(now, cfs_rq, true) && (flags & UPDATE_TG))

					decayed  = update_cfs_rq_load_avg(now, cfs_rq, true);

					decayed |= propagate_entity_load_avg(se);

					if (decayed && (flags & UPDATE_TG))

						update_tg_load_avg(cfs_rq, 0);

					if (entity_is_task(se))

				@ -3001,6 +3207,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s

					cfs_rq->avg.load_sum += se->avg.load_sum;

					cfs_rq->avg.util_avg += se->avg.util_avg;

					cfs_rq->avg.util_sum += se->avg.util_sum;

					set_tg_cfs_propagate(cfs_rq);

					cfs_rq_util_change(cfs_rq);

				}

				@ -3020,6 +3227,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s

					sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);

					sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);

					sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);

					set_tg_cfs_propagate(cfs_rq);

					cfs_rq_util_change(cfs_rq);

				}

				@ -9408,6 +9616,31 @@ static inline bool vruntime_normalized(struct task_struct *p)

					return false;

				}

				#ifdef CONFIG_FAIR_GROUP_SCHED

				/*

				 * Propagate the changes of the sched_entity across the tg tree to make it

				 * visible to the root

				 */

				static void propagate_entity_cfs_rq(struct sched_entity *se)

				{

					struct cfs_rq *cfs_rq;

					/* Start to propagate at parent */

					se = se->parent;

					for_each_sched_entity(se) {

						cfs_rq = cfs_rq_of(se);

						if (cfs_rq_throttled(cfs_rq))

							break;

						update_load_avg(se, UPDATE_TG);

					}

				}

				#else

				static void propagate_entity_cfs_rq(struct sched_entity *se) { }

				#endif

				static void detach_entity_cfs_rq(struct sched_entity *se)

				{

					struct cfs_rq *cfs_rq = cfs_rq_of(se);

				@ -9416,6 +9649,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se)

					update_load_avg(se, 0);

					detach_entity_load_avg(cfs_rq, se);

					update_tg_load_avg(cfs_rq, false);

					propagate_entity_cfs_rq(se);

				}

				static void attach_entity_cfs_rq(struct sched_entity *se)

				@ -9434,6 +9668,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)

					update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);

					attach_entity_load_avg(cfs_rq, se);

					update_tg_load_avg(cfs_rq, false);

					propagate_entity_cfs_rq(se);

				}

				static void detach_task_cfs_rq(struct task_struct *p)

				@ -9512,6 +9747,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)

					cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;

				#endif

				#ifdef CONFIG_SMP

				#ifdef CONFIG_FAIR_GROUP_SCHED

					cfs_rq->propagate_avg = 0;

				#endif

					atomic_long_set(&cfs_rq->removed_load_avg, 0);

					atomic_long_set(&cfs_rq->removed_util_avg, 0);

				#endif

									
										1

kernel/sched/sched.h
									
										View file
										
				@ -376,6 +376,7 @@ struct cfs_rq {

					unsigned long runnable_load_avg;

				#ifdef CONFIG_FAIR_GROUP_SCHED

					unsigned long tg_load_avg_contrib;

					unsigned long propagate_avg;

				#endif

					atomic_long_t removed_load_avg, removed_util_avg;

				#ifndef CONFIG_64BIT