Merge "sched/hmp: Enhance co-location and scheduler boost features" · 54e5bae2ed - evie/android_kernel_oneplus_msm8998 - Gay Catgirls Forgejo: gay catgirls having sex

evie/android_kernel_oneplus_msm8998

Merge "sched/hmp: Enhance co-location and scheduler boost features"

This commit is contained in:

Linux Build Service Account

2016-11-18 01:54:54 -08:00

• committed by

Gerrit - the friendly Code Review server

parent d1a64e4014 30fc774235

commit 54e5bae2ed

11 changed files with 800 additions and 385 deletions

									
										2

include/linux/sched/sysctl.h
									
										View file
										
				@ -53,6 +53,8 @@ extern unsigned int sysctl_sched_spill_nr_run;

				extern unsigned int sysctl_sched_spill_load_pct;

				extern unsigned int sysctl_sched_upmigrate_pct;

				extern unsigned int sysctl_sched_downmigrate_pct;

				extern unsigned int sysctl_sched_group_upmigrate_pct;

				extern unsigned int sysctl_sched_group_downmigrate_pct;

				extern unsigned int sysctl_early_detection_duration;

				extern unsigned int sysctl_sched_boost;

				extern unsigned int sysctl_sched_small_wakee_task_load_pct;

									
										25

include/trace/events/sched.h
									
										View file
										
				@ -133,6 +133,7 @@ TRACE_EVENT(sched_task_load,

						__field(	u32,	flags			)

						__field(	int,	best_cpu		)

						__field(	u64,	latency			)

						__field(	int,	grp_id			)

					),

					TP_fast_assign(

				@ -148,12 +149,13 @@ TRACE_EVENT(sched_task_load,

						__entry->latency	= p->state == TASK_WAKING ?

										      sched_ktime_clock() -

										      p->ravg.mark_start : 0;

						__entry->grp_id		= p->grp ? p->grp->id : 0;

					),

					TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x best_cpu=%d latency=%llu",

					TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x grp=%d best_cpu=%d latency=%llu",

						__entry->pid, __entry->comm, __entry->demand,

						__entry->boost, __entry->reason, __entry->sync,

						__entry->need_idle, __entry->flags,

						__entry->need_idle, __entry->flags, __entry->grp_id,

						__entry->best_cpu, __entry->latency)

				);

				@ -164,9 +166,12 @@ TRACE_EVENT(sched_set_preferred_cluster,

					TP_ARGS(grp, total_demand),

					TP_STRUCT__entry(

						__field(		int,	id			)

						__field(		u64,	demand			)

						__field(		int,	cluster_first_cpu	)

						__field(	int,	id			)

						__field(	u64,	demand			)

						__field(	int,	cluster_first_cpu	)

						__array(	char,	comm,	TASK_COMM_LEN	)

						__field(	pid_t,	pid			)

						__field(unsigned int,	task_demand			)

					),

					TP_fast_assign(

				@ -245,19 +250,19 @@ DEFINE_EVENT(sched_cpu_load, sched_cpu_load_cgroup,

				TRACE_EVENT(sched_set_boost,

					TP_PROTO(int ref_count),

					TP_PROTO(int type),

					TP_ARGS(ref_count),

					TP_ARGS(type),

					TP_STRUCT__entry(

						__field(unsigned int, ref_count			)

						__field(int, type			)

					),

					TP_fast_assign(

						__entry->ref_count = ref_count;

						__entry->type = type;

					),

					TP_printk("ref_count=%d", __entry->ref_count)

					TP_printk("type %d", __entry->type)

				);

				#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP)

									
										2

kernel/sched/Makefile
									
										View file
										
				@ -15,7 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o

				obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o

				obj-y += wait.o completion.o idle.o sched_avg.o

				obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o

				obj-$(CONFIG_SCHED_HMP) += hmp.o

				obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o

				obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o

				obj-$(CONFIG_SCHEDSTATS) += stats.o

				obj-$(CONFIG_SCHED_DEBUG) += debug.o

									
										226

kernel/sched/boost.c
									
										Normal file
									
										View file
										
				@ -0,0 +1,226 @@

				/* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.

				 *

				 * This program is free software; you can redistribute it and/or modify

				 * it under the terms of the GNU General Public License version 2 and

				 * only version 2 as published by the Free Software Foundation.

				 *

				 * This program is distributed in the hope that it will be useful,

				 * but WITHOUT ANY WARRANTY; without even the implied warranty of

				 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

				 * GNU General Public License for more details.

				 */

				#include "sched.h"

				#include <linux/of.h>

				#include <linux/sched/core_ctl.h>

				#include <trace/events/sched.h>

				/*

				 * Scheduler boost is a mechanism to temporarily place tasks on CPUs

				 * with higher capacity than those where a task would have normally

				 * ended up with their load characteristics. Any entity enabling

				 * boost is responsible for disabling it as well.

				 */

				unsigned int sysctl_sched_boost;

				static enum sched_boost_policy boost_policy;

				static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;

				static DEFINE_MUTEX(boost_mutex);

				static unsigned int freq_aggr_threshold_backup;

				static inline void boost_kick(int cpu)

				{

					struct rq *rq = cpu_rq(cpu);

					if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))

						smp_send_reschedule(cpu);

				}

				static void boost_kick_cpus(void)

				{

					int i;

					struct cpumask kick_mask;

					if (boost_policy != SCHED_BOOST_ON_BIG)

						return;

					cpumask_andnot(&kick_mask, cpu_online_mask, cpu_isolated_mask);

					for_each_cpu(i, &kick_mask) {

						if (cpu_capacity(i) != max_capacity)

							boost_kick(i);

					}

				}

				int got_boost_kick(void)

				{

					int cpu = smp_processor_id();

					struct rq *rq = cpu_rq(cpu);

					return test_bit(BOOST_KICK, &rq->hmp_flags);

				}

				void clear_boost_kick(int cpu)

				{

					struct rq *rq = cpu_rq(cpu);

					clear_bit(BOOST_KICK, &rq->hmp_flags);

				}

				/*

				 * Scheduler boost type and boost policy might at first seem unrelated,

				 * however, there exists a connection between them that will allow us

				 * to use them interchangeably during placement decisions. We'll explain

				 * the connection here in one possible way so that the implications are

				 * clear when looking at placement policies.

				 *

				 * When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED

				 * When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can

				 * neither be none nor RESTRAINED.

				 */

				static void set_boost_policy(int type)

				{

					if (type == SCHED_BOOST_NONE || type == RESTRAINED_BOOST) {

						boost_policy = SCHED_BOOST_NONE;

						return;

					}

					if (boost_policy_dt) {

						boost_policy = boost_policy_dt;

						return;

					}

					if (min_possible_efficiency != max_possible_efficiency) {

						boost_policy = SCHED_BOOST_ON_BIG;

						return;

					}

					boost_policy = SCHED_BOOST_ON_ALL;

				}

				enum sched_boost_policy sched_boost_policy(void)

				{

					return boost_policy;

				}

				static bool verify_boost_params(int old_val, int new_val)

				{

					/*

					 * Boost can only be turned on or off. There is no possiblity of

					 * switching from one boost type to another or to set the same

					 * kind of boost several times.

					 */

					return !(!!old_val == !!new_val);

				}

				static void _sched_set_boost(int old_val, int type)

				{

					switch (type) {

					case NO_BOOST:

						if (old_val == FULL_THROTTLE_BOOST)

							core_ctl_set_boost(false);

						else if (old_val == CONSERVATIVE_BOOST)

							restore_cgroup_boost_settings();

						else

							update_freq_aggregate_threshold(

								freq_aggr_threshold_backup);

						break;

					case FULL_THROTTLE_BOOST:

						core_ctl_set_boost(true);

						boost_kick_cpus();

						break;

					case CONSERVATIVE_BOOST:

						update_cgroup_boost_settings();

						boost_kick_cpus();

						break;

					case RESTRAINED_BOOST:

						freq_aggr_threshold_backup =

							update_freq_aggregate_threshold(1);

						break;

					default:

						WARN_ON(1);

						return;

					}

					set_boost_policy(type);

					sysctl_sched_boost = type;

					trace_sched_set_boost(type);

				}

				void sched_boost_parse_dt(void)

				{

					struct device_node *sn;

					const char *boost_policy;

					if (!sched_enable_hmp)

						return;

					sn = of_find_node_by_path("/sched-hmp");

					if (!sn)

						return;

					if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {

						if (!strcmp(boost_policy, "boost-on-big"))

							boost_policy_dt = SCHED_BOOST_ON_BIG;

						else if (!strcmp(boost_policy, "boost-on-all"))

							boost_policy_dt = SCHED_BOOST_ON_ALL;

					}

				}

				int sched_set_boost(int type)

				{

					int ret = 0;

					if (!sched_enable_hmp)

						return -EINVAL;

					mutex_lock(&boost_mutex);

					if (verify_boost_params(sysctl_sched_boost, type))

						_sched_set_boost(sysctl_sched_boost, type);

					else

						ret = -EINVAL;

					mutex_unlock(&boost_mutex);

					return ret;

				}

				int sched_boost_handler(struct ctl_table *table, int write,

						void __user *buffer, size_t *lenp,

						loff_t *ppos)

				{

					int ret;

					unsigned int *data = (unsigned int *)table->data;

					unsigned int old_val;

					if (!sched_enable_hmp)

						return -EINVAL;

					mutex_lock(&boost_mutex);

					old_val = *data;

					ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);

					if (ret || !write)

						goto done;

					if (verify_boost_params(old_val, *data)) {

						_sched_set_boost(old_val, *data);

					} else {

						*data = old_val;

						ret = -EINVAL;

					}

				done:

					mutex_unlock(&boost_mutex);

					return ret;

				}

				int sched_boost(void)

				{

					return sysctl_sched_boost;

				}

									
										3

kernel/sched/core.c
									
										View file
										
				@ -7846,7 +7846,6 @@ void __init sched_init_smp(void)

					hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);

					update_cluster_topology();

					init_sched_hmp_boost_policy();

					init_hrtick();

				@ -7895,7 +7894,7 @@ void __init sched_init(void)

					BUG_ON(num_possible_cpus() > BITS_PER_LONG);

					sched_hmp_parse_dt();

					sched_boost_parse_dt();

					init_clusters();

				#ifdef CONFIG_FAIR_GROUP_SCHED

									
										153

kernel/sched/fair.c
									
										View file
										
				@ -2596,6 +2596,7 @@ static u32 __compute_runnable_contrib(u64 n)

				#define SBC_FLAG_COLOC_CLUSTER				0x10000

				#define SBC_FLAG_WAKER_CLUSTER				0x20000

				#define SBC_FLAG_BACKUP_CLUSTER				0x40000

				#define SBC_FLAG_BOOST_CLUSTER				0x80000

				struct cpu_select_env {

					struct task_struct *p;

				@ -2605,7 +2606,7 @@ struct cpu_select_env {

					u8 need_waker_cluster:1;

					u8 sync:1;

					u8 ignore_prev_cpu:1;

					enum sched_boost_type boost_type;

					enum sched_boost_policy boost_policy;

					int prev_cpu;

					DECLARE_BITMAP(candidate_list, NR_CPUS);

					DECLARE_BITMAP(backup_list, NR_CPUS);

				@ -2705,10 +2706,38 @@ select_least_power_cluster(struct cpu_select_env *env)

					struct sched_cluster *cluster;

					if (env->rtg) {

						env->task_load = scale_load_to_cpu(task_load(env->p),

							cluster_first_cpu(env->rtg->preferred_cluster));

						env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;

						return env->rtg->preferred_cluster;

						int cpu = cluster_first_cpu(env->rtg->preferred_cluster);

						env->task_load = scale_load_to_cpu(task_load(env->p), cpu);

						if (task_load_will_fit(env->p, env->task_load,

									cpu, env->boost_policy)) {

							env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;

							if (env->boost_policy == SCHED_BOOST_NONE)

								return env->rtg->preferred_cluster;

							for_each_sched_cluster(cluster) {

								if (cluster != env->rtg->preferred_cluster) {

									__set_bit(cluster->id,

										env->backup_list);

									__clear_bit(cluster->id,

										env->candidate_list);

								}

							}

							return env->rtg->preferred_cluster;

						}

						/*

						 * Since the task load does not fit on the preferred

						 * cluster anymore, pretend that the task does not

						 * have any preferred cluster. This allows the waking

						 * task to get the appropriate CPU it needs as per the

						 * non co-location placement policy without having to

						 * wait until the preferred cluster is updated.

						 */

						env->rtg = NULL;

					}

					for_each_sched_cluster(cluster) {

				@ -2718,7 +2747,7 @@ select_least_power_cluster(struct cpu_select_env *env)

							env->task_load = scale_load_to_cpu(task_load(env->p),

													 cpu);

							if (task_load_will_fit(env->p, env->task_load, cpu,

									       env->boost_type))

									       env->boost_policy))

								return cluster;

							__set_bit(cluster->id, env->backup_list);

				@ -2961,7 +2990,14 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,

						update_spare_capacity(stats, env, i, c->capacity,

								      env->cpu_load);

						if (env->boost_type == SCHED_BOOST_ON_ALL ||

						/*

						 * need_idle takes precedence over sched boost but when both

						 * are set, idlest CPU with in all the clusters is selected

						 * when boost_policy = BOOST_ON_ALL whereas idlest CPU in the

						 * big cluster is selected within boost_policy = BOOST_ON_BIG.

						 */

						if ((!env->need_idle &&

						    env->boost_policy != SCHED_BOOST_NONE) ||

						    env->need_waker_cluster ||

						    sched_cpu_high_irqload(i) ||

						    spill_threshold_crossed(env, cpu_rq(i)))

				@ -3005,7 +3041,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)

					struct task_struct *task = env->p;

					struct sched_cluster *cluster;

					if (env->boost_type != SCHED_BOOST_NONE || env->reason ||

					if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||

					    !task->ravg.mark_start ||

					    env->need_idle || !sched_short_sleep_task_threshold)

						return false;

				@ -3034,7 +3070,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)

					cluster = cpu_rq(prev_cpu)->cluster;

					if (!task_load_will_fit(task, env->task_load, prev_cpu,

								sched_boost_type())) {

								sched_boost_policy())) {

						__set_bit(cluster->id, env->backup_list);

						__clear_bit(cluster->id, env->candidate_list);

				@ -3056,7 +3092,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)

				static inline bool

				wake_to_waker_cluster(struct cpu_select_env *env)

				{

					return env->boost_type == SCHED_BOOST_NONE &&

					return env->boost_policy == SCHED_BOOST_NONE &&

					       !env->need_idle && !env->reason && env->sync &&

					       task_load(current) > sched_big_waker_task_load &&

					       task_load(env->p) < sched_small_wakee_task_load;

				@ -3098,7 +3134,6 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,

						.reason			= reason,

						.need_idle		= wake_to_idle(p),

						.need_waker_cluster	= 0,

						.boost_type		= sched_boost_type(),

						.sync			= sync,

						.prev_cpu		= target,

						.ignore_prev_cpu	= 0,

				@ -3107,6 +3142,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,

						.sbc_best_cluster_flag	= 0,

					};

					env.boost_policy = task_sched_boost(p) ?

							sched_boost_policy() : SCHED_BOOST_NONE;

					bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);

					bitmap_zero(env.backup_list, NR_CPUS);

				@ -3178,12 +3216,23 @@ retry:

						sbc_flag |= env.sbc_best_flag;

						target = stats.best_cpu;

					} else {

						if (env.rtg) {

						if (env.rtg && env.boost_policy == SCHED_BOOST_NONE) {

							env.rtg = NULL;

							goto retry;

						}

						find_backup_cluster(&env, &stats);

						/*

						 * With boost_policy == SCHED_BOOST_ON_BIG, we reach here with

						 * backup_list = little cluster, candidate_list = none and

						 * stats->best_capacity_cpu points the best spare capacity

						 * CPU among the CPUs in the big cluster.

						 */

						if (env.boost_policy == SCHED_BOOST_ON_BIG &&

						    stats.best_capacity_cpu >= 0)

							sbc_flag |= SBC_FLAG_BOOST_CLUSTER;

						else

							find_backup_cluster(&env, &stats);

						if (stats.best_capacity_cpu >= 0) {

							target = stats.best_capacity_cpu;

							sbc_flag |= SBC_FLAG_BEST_CAP_CPU;

				@ -3193,8 +3242,8 @@ retry:

				out:

					sbc_flag |= env.sbc_best_cluster_flag;

					rcu_read_unlock();

					trace_sched_task_load(p, sched_boost(), env.reason, env.sync,

									env.need_idle, sbc_flag, target);

					trace_sched_task_load(p, sched_boost_policy() && task_sched_boost(p),

						env.reason, env.sync, env.need_idle, sbc_flag, target);

					return target;

				}

				@ -3402,11 +3451,9 @@ static inline int migration_needed(struct task_struct *p, int cpu)

					if (task_will_be_throttled(p))

						return 0;

					if (sched_boost_type() == SCHED_BOOST_ON_BIG) {

						if (cpu_capacity(cpu) != max_capacity)

							return UP_MIGRATION;

						return 0;

					}

					if (sched_boost_policy() == SCHED_BOOST_ON_BIG &&

						 cpu_capacity(cpu) != max_capacity && task_sched_boost(p))

						return UP_MIGRATION;

					if (sched_cpu_high_irqload(cpu))

						return IRQLOAD_MIGRATION;

				@ -3420,7 +3467,7 @@ static inline int migration_needed(struct task_struct *p, int cpu)

						return DOWN_MIGRATION;

					}

					if (!grp && !task_will_fit(p, cpu)) {

					if (!task_will_fit(p, cpu)) {

						rcu_read_unlock();

						return UP_MIGRATION;

					}

				@ -6648,10 +6695,7 @@ enum fbq_type { regular, remote, all };

				#define LBF_NEED_BREAK	0x02

				#define LBF_DST_PINNED  0x04

				#define LBF_SOME_PINNED	0x08

				#define LBF_SCHED_BOOST_ACTIVE_BALANCE 0x40

				#define LBF_BIG_TASK_ACTIVE_BALANCE 0x80

				#define LBF_HMP_ACTIVE_BALANCE (LBF_SCHED_BOOST_ACTIVE_BALANCE | \

								LBF_BIG_TASK_ACTIVE_BALANCE)

				#define LBF_IGNORE_BIG_TASKS 0x100

				#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200

				#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400

				@ -6682,6 +6726,7 @@ struct lb_env {

					enum fbq_type		fbq_type;

					struct list_head	tasks;

					enum sched_boost_policy	boost_policy;

				};

				/*

				@ -6826,9 +6871,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)

					/* Record that we found atleast one task that could run on dst_cpu */

					env->flags &= ~LBF_ALL_PINNED;

					if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu) &&

						nr_big_tasks(env->src_rq) && !is_big_task(p))

						return 0;

					if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu)) {

						if (nr_big_tasks(env->src_rq) && !is_big_task(p))

							return 0;

						if (env->boost_policy == SCHED_BOOST_ON_BIG &&

									!task_sched_boost(p))

							return 0;

					}

					twf = task_will_fit(p, env->dst_cpu);

				@ -6951,8 +7001,7 @@ static int detach_tasks(struct lb_env *env)

					if (env->imbalance <= 0)

						return 0;

					if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu) &&

											!sched_boost())

					if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu))

						env->flags |= LBF_IGNORE_BIG_TASKS;

					else if (!same_cluster(env->dst_cpu, env->src_cpu))

						env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;

				@ -7255,8 +7304,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)

					int local_capacity, busiest_capacity;

					int local_pwr_cost, busiest_pwr_cost;

					int nr_cpus;

					int boost = sched_boost();

					if (!sysctl_sched_restrict_cluster_spill || sched_boost())

					if (!sysctl_sched_restrict_cluster_spill ||

						boost == FULL_THROTTLE_BOOST || boost == CONSERVATIVE_BOOST)

						return 0;

					local_cpu = group_first_cpu(sds->local);

				@ -7628,11 +7679,6 @@ static bool update_sd_pick_busiest_active_balance(struct lb_env *env,

				{

					if (env->idle != CPU_NOT_IDLE &&

					    cpu_capacity(env->dst_cpu) > group_rq_capacity(sg)) {

						if (sched_boost() && !sds->busiest && sgs->sum_nr_running) {

							env->flags |= LBF_SCHED_BOOST_ACTIVE_BALANCE;

							return true;

						}

						if (sgs->sum_nr_big_tasks >

								sds->busiest_stat.sum_nr_big_tasks) {

							env->flags |= LBF_BIG_TASK_ACTIVE_BALANCE;

				@ -8045,7 +8091,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)

					if (!sds.busiest || busiest->sum_nr_running == 0)

						goto out_balanced;

					if (env->flags & LBF_HMP_ACTIVE_BALANCE)

					if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)

						goto force_balance;

					if (bail_inter_cluster_balance(env, &sds))

				@ -8257,7 +8303,7 @@ static int need_active_balance(struct lb_env *env)

				{

					struct sched_domain *sd = env->sd;

					if (env->flags & LBF_HMP_ACTIVE_BALANCE)

					if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)

						return 1;

					if (env->idle == CPU_NEWLY_IDLE) {

				@ -8348,20 +8394,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,

					struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);

					struct lb_env env = {

						.sd		= sd,

						.dst_cpu	= this_cpu,

						.dst_rq		= this_rq,

						.dst_grpmask    = sched_group_cpus(sd->groups),

						.idle		= idle,

						.loop_break	= sched_nr_migrate_break,

						.cpus		= cpus,

						.fbq_type	= all,

						.tasks		= LIST_HEAD_INIT(env.tasks),

						.imbalance	= 0,

						.flags		= 0,

						.loop		= 0,

						.sd			= sd,

						.dst_cpu		= this_cpu,

						.dst_rq			= this_rq,

						.dst_grpmask		= sched_group_cpus(sd->groups),

						.idle			= idle,

						.loop_break		= sched_nr_migrate_break,

						.cpus			= cpus,

						.fbq_type		= all,

						.tasks			= LIST_HEAD_INIT(env.tasks),

						.imbalance		= 0,

						.flags			= 0,

						.loop			= 0,

						.busiest_nr_running     = 0,

						.busiest_grp_capacity   = 0,

						.boost_policy		= sched_boost_policy(),

					};

					/*

				@ -8510,7 +8557,7 @@ more_balance:

				no_move:

					if (!ld_moved) {

						if (!(env.flags & LBF_HMP_ACTIVE_BALANCE))

						if (!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))

							schedstat_inc(sd, lb_failed[idle]);

						/*

				@ -8520,7 +8567,7 @@ no_move:

						 * excessive cache_hot migrations and active balances.

						 */

						if (idle != CPU_NEWLY_IDLE &&

						    !(env.flags & LBF_HMP_ACTIVE_BALANCE))

						    !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))

							sd->nr_balance_failed++;

						if (need_active_balance(&env)) {

				@ -8797,6 +8844,7 @@ static int active_load_balance_cpu_stop(void *data)

						.busiest_grp_capacity 	= 0,

						.flags			= 0,

						.loop			= 0,

						.boost_policy		= sched_boost_policy(),

					};

					bool moved = false;

				@ -9272,7 +9320,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)

					if (rq->nr_running < 2)

						return 0;

					if (!sysctl_sched_restrict_cluster_spill || sched_boost())

					if (!sysctl_sched_restrict_cluster_spill ||

							sched_boost_policy() == SCHED_BOOST_ON_ALL)

						return 1;

					if (cpu_max_power_cost(cpu) == max_power_cost)

									
										512

kernel/sched/hmp.c
									
										View file
										
				@ -17,8 +17,6 @@

				#include <linux/cpufreq.h>

				#include <linux/list_sort.h>

				#include <linux/syscore_ops.h>

				#include <linux/of.h>

				#include <linux/sched/core_ctl.h>

				#include "sched.h"

				@ -231,52 +229,6 @@ fail:

					return ret;

				}

				/*

				 * It is possible that CPUs of the same micro architecture can have slight

				 * difference in the efficiency due to other factors like cache size. The

				 * BOOST_ON_BIG policy may not be optimial for such systems. The required

				 * boost policy can be specified via device tree to handle this.

				 */

				static int __read_mostly sched_boost_policy = SCHED_BOOST_NONE;

				/*

				 * This should be called after clusters are populated and

				 * the respective efficiency values are initialized.

				 */

				void init_sched_hmp_boost_policy(void)

				{

					/*

					 * Initialize the boost type here if it is not passed from

					 * device tree.

					 */

					if (sched_boost_policy == SCHED_BOOST_NONE) {

						if (max_possible_efficiency != min_possible_efficiency)

							sched_boost_policy = SCHED_BOOST_ON_BIG;

						else

							sched_boost_policy = SCHED_BOOST_ON_ALL;

					}

				}

				void sched_hmp_parse_dt(void)

				{

					struct device_node *sn;

					const char *boost_policy;

					if (!sched_enable_hmp)

						return;

					sn = of_find_node_by_path("/sched-hmp");

					if (!sn)

						return;

					if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {

						if (!strcmp(boost_policy, "boost-on-big"))

							sched_boost_policy = SCHED_BOOST_ON_BIG;

						else if (!strcmp(boost_policy, "boost-on-all"))

							sched_boost_policy = SCHED_BOOST_ON_ALL;

					}

				}

				unsigned int max_possible_efficiency = 1;

				unsigned int min_possible_efficiency = UINT_MAX;

				@ -680,29 +632,6 @@ int __init set_sched_enable_hmp(char *str)

				early_param("sched_enable_hmp", set_sched_enable_hmp);

				int got_boost_kick(void)

				{

					int cpu = smp_processor_id();

					struct rq *rq = cpu_rq(cpu);

					return test_bit(BOOST_KICK, &rq->hmp_flags);

				}

				inline void clear_boost_kick(int cpu)

				{

					struct rq *rq = cpu_rq(cpu);

					clear_bit(BOOST_KICK, &rq->hmp_flags);

				}

				inline void boost_kick(int cpu)

				{

					struct rq *rq = cpu_rq(cpu);

					if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))

						smp_send_reschedule(cpu);

				}

				/* Clear any HMP scheduler related requests pending from or on cpu */

				void clear_hmp_request(int cpu)

				{

				@ -840,6 +769,9 @@ min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */

				/* Window size (in ns) */

				__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;

				/* Maximum allowed threshold before freq aggregation must be enabled */

				#define MAX_FREQ_AGGR_THRESH 1000

				/* Temporarily disable window-stats activity on all cpus */

				unsigned int __read_mostly sched_disable_window_stats;

				@ -919,8 +851,8 @@ static const unsigned int top_tasks_bitmap_size =

				 *	C1 busy time = 5 + 5 + 6 = 16ms

				 *

				 */

				static __read_mostly unsigned int sched_freq_aggregate;

				__read_mostly unsigned int sysctl_sched_freq_aggregate;

				static __read_mostly unsigned int sched_freq_aggregate = 1;

				__read_mostly unsigned int sysctl_sched_freq_aggregate = 1;

				unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct;

				static unsigned int __read_mostly sched_freq_aggregate_threshold;

				@ -937,14 +869,6 @@ unsigned int max_task_load(void)

				/* Use this knob to turn on or off HMP-aware task placement logic */

				unsigned int __read_mostly sched_enable_hmp;

				/*

				 * Scheduler boost is a mechanism to temporarily place tasks on CPUs

				 * with higher capacity than those where a task would have normally

				 * ended up with their load characteristics. Any entity enabling

				 * boost is responsible for disabling it as well.

				 */

				unsigned int sysctl_sched_boost;

				/* A cpu can no longer accommodate more tasks if:

				 *

				 *	rq->nr_running > sysctl_sched_spill_nr_run ||

				@ -995,6 +919,21 @@ unsigned int __read_mostly sysctl_sched_upmigrate_pct = 80;

				unsigned int __read_mostly sched_downmigrate;

				unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60;

				/*

				 * Task groups whose aggregate demand on a cpu is more than

				 * sched_group_upmigrate need to be up-migrated if possible.

				 */

				unsigned int __read_mostly sched_group_upmigrate;

				unsigned int __read_mostly sysctl_sched_group_upmigrate_pct = 100;

				/*

				 * Task groups, once up-migrated, will need to drop their aggregate

				 * demand to less than sched_group_downmigrate before they are "down"

				 * migrated.

				 */

				unsigned int __read_mostly sched_group_downmigrate;

				unsigned int __read_mostly sysctl_sched_group_downmigrate_pct = 95;

				/*

				 * The load scale factor of a CPU gets boosted when its max frequency

				 * is restricted due to which the tasks are migrating to higher capacity

				@ -1017,33 +956,46 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;

				unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;

				void update_up_down_migrate(void)

				static void

				_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)

				{

					unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);

					unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);

					unsigned int delta;

					if (up_down_migrate_scale_factor == 1024)

						goto done;

						return;

					delta = up_migrate - down_migrate;

					delta = *up_migrate - *down_migrate;

					up_migrate /= NSEC_PER_USEC;

					up_migrate *= up_down_migrate_scale_factor;

					up_migrate >>= 10;

					up_migrate *= NSEC_PER_USEC;

					*up_migrate /= NSEC_PER_USEC;

					*up_migrate *= up_down_migrate_scale_factor;

					*up_migrate >>= 10;

					*up_migrate *= NSEC_PER_USEC;

					up_migrate = min(up_migrate, sched_ravg_window);

					*up_migrate = min(*up_migrate, sched_ravg_window);

					down_migrate /= NSEC_PER_USEC;

					down_migrate *= up_down_migrate_scale_factor;

					down_migrate >>= 10;

					down_migrate *= NSEC_PER_USEC;

					*down_migrate /= NSEC_PER_USEC;

					*down_migrate *= up_down_migrate_scale_factor;

					*down_migrate >>= 10;

					*down_migrate *= NSEC_PER_USEC;

					down_migrate = min(down_migrate, up_migrate - delta);

				done:

					*down_migrate = min(*down_migrate, *up_migrate - delta);

				}

				static void update_up_down_migrate(void)

				{

					unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);

					unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);

					_update_up_down_migrate(&up_migrate, &down_migrate);

					sched_upmigrate = up_migrate;

					sched_downmigrate = down_migrate;

					up_migrate = pct_to_real(sysctl_sched_group_upmigrate_pct);

					down_migrate = pct_to_real(sysctl_sched_group_downmigrate_pct);

					_update_up_down_migrate(&up_migrate, &down_migrate);

					sched_group_upmigrate = up_migrate;

					sched_group_downmigrate = down_migrate;

				}

				void set_hmp_defaults(void)

				@ -1134,82 +1086,6 @@ u64 cpu_load_sync(int cpu, int sync)

					return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);

				}

				static int boost_refcount;

				static DEFINE_SPINLOCK(boost_lock);

				static DEFINE_MUTEX(boost_mutex);

				static void boost_kick_cpus(void)

				{

					int i;

					for_each_online_cpu(i) {

						if (cpu_capacity(i) != max_capacity)

							boost_kick(i);

					}

				}

				int sched_boost(void)

				{

					return boost_refcount > 0;

				}

				int sched_set_boost(int enable)

				{

					unsigned long flags;

					int ret = 0;

					int old_refcount;

					if (!sched_enable_hmp)

						return -EINVAL;

					spin_lock_irqsave(&boost_lock, flags);

					old_refcount = boost_refcount;

					if (enable == 1) {

						boost_refcount++;

					} else if (!enable) {

						if (boost_refcount >= 1)

							boost_refcount--;

						else

							ret = -EINVAL;

					} else {

						ret = -EINVAL;

					}

					if (!old_refcount && boost_refcount)

						boost_kick_cpus();

					if (boost_refcount <= 1)

						core_ctl_set_boost(boost_refcount == 1);

					trace_sched_set_boost(boost_refcount);

					spin_unlock_irqrestore(&boost_lock, flags);

					return ret;

				}

				int sched_boost_handler(struct ctl_table *table, int write,

						void __user *buffer, size_t *lenp,

						loff_t *ppos)

				{

					int ret;

					mutex_lock(&boost_mutex);

					if (!write)

						sysctl_sched_boost = sched_boost();

					ret = proc_dointvec(table, write, buffer, lenp, ppos);

					if (ret || !write)

						goto done;

					ret = (sysctl_sched_boost <= 1) ?

						sched_set_boost(sysctl_sched_boost) : -EINVAL;

				done:

					mutex_unlock(&boost_mutex);

					return ret;

				}

				/*

				 * Task will fit on a cpu if it's bandwidth consumption on that cpu

				 * will be less than sched_upmigrate. A big task that was previously

				@ -1219,60 +1095,63 @@ done:

				 * tasks with load close to the upmigrate threshold

				 */

				int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,

							      enum sched_boost_type boost_type)

							      enum sched_boost_policy boost_policy)

				{

					int upmigrate;

					int upmigrate = sched_upmigrate;

					if (cpu_capacity(cpu) == max_capacity)

						return 1;

					if (boost_type != SCHED_BOOST_ON_BIG) {

					if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))

						upmigrate = sched_downmigrate;

					if (boost_policy != SCHED_BOOST_ON_BIG) {

						if (task_nice(p) > SCHED_UPMIGRATE_MIN_NICE ||

						    upmigrate_discouraged(p))

							return 1;

						upmigrate = sched_upmigrate;

						if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))

							upmigrate = sched_downmigrate;

						if (task_load < upmigrate)

							return 1;

					} else {

						if (task_sched_boost(p) || task_load >= upmigrate)

							return 0;

						return 1;

					}

					return 0;

				}

				enum sched_boost_type sched_boost_type(void)

				{

					if (sched_boost())

						return sched_boost_policy;

					return SCHED_BOOST_NONE;

				}

				int task_will_fit(struct task_struct *p, int cpu)

				{

					u64 tload = scale_load_to_cpu(task_load(p), cpu);

					return task_load_will_fit(p, tload, cpu, sched_boost_type());

					return task_load_will_fit(p, tload, cpu, sched_boost_policy());

				}

				int group_will_fit(struct sched_cluster *cluster,

						 struct related_thread_group *grp, u64 demand)

				static int

				group_will_fit(struct sched_cluster *cluster, struct related_thread_group *grp,

										u64 demand, bool group_boost)

				{

					int cpu = cluster_first_cpu(cluster);

					int prev_capacity = 0;

					unsigned int threshold = sched_upmigrate;

					unsigned int threshold = sched_group_upmigrate;

					u64 load;

					if (cluster->capacity == max_capacity)

						return 1;

					if (group_boost)

						return 0;

					if (!demand)

						return 1;

					if (grp->preferred_cluster)

						prev_capacity = grp->preferred_cluster->capacity;

					if (cluster->capacity < prev_capacity)

						threshold = sched_downmigrate;

						threshold = sched_group_downmigrate;

					load = scale_load_to_cpu(demand, cpu);

					if (load < threshold)

				@ -1495,6 +1374,23 @@ void post_big_task_count_change(const struct cpumask *cpus)

				DEFINE_MUTEX(policy_mutex);

				unsigned int update_freq_aggregate_threshold(unsigned int threshold)

				{

					unsigned int old_threshold;

					mutex_lock(&policy_mutex);

					old_threshold = sysctl_sched_freq_aggregate_threshold_pct;

					sysctl_sched_freq_aggregate_threshold_pct = threshold;

					sched_freq_aggregate_threshold =

						pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);

					mutex_unlock(&policy_mutex);

					return old_threshold;

				}

				static inline int invalid_value_freq_input(unsigned int *data)

				{

					if (data == &sysctl_sched_freq_aggregate)

				@ -1578,7 +1474,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,

					if (write && (old_val == *data))

						goto done;

					if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) {

					if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct ||

								sysctl_sched_group_downmigrate_pct >

								sysctl_sched_group_upmigrate_pct) {

						*data = old_val;

						ret = -EINVAL;

						goto done;

				@ -3110,37 +3008,9 @@ static void reset_all_task_stats(void)

				{

					struct task_struct *g, *p;

					read_lock(&tasklist_lock);

					do_each_thread(g, p) {

						raw_spin_lock_irq(&p->pi_lock);

						reset_task_stats(p);

						raw_spin_unlock_irq(&p->pi_lock);

					}  while_each_thread(g, p);

					read_unlock(&tasklist_lock);

				}

				static void disable_window_stats(void)

				{

					unsigned long flags;

					int i;

					local_irq_save(flags);

					for_each_possible_cpu(i)

						raw_spin_lock(&cpu_rq(i)->lock);

					sched_disable_window_stats = 1;

					for_each_possible_cpu(i)

						raw_spin_unlock(&cpu_rq(i)->lock);

					local_irq_restore(flags);

				}

				/* Called with all cpu's rq->lock held */

				static void enable_window_stats(void)

				{

					sched_disable_window_stats = 0;

				}

				enum reset_reason_code {

				@ -3166,16 +3036,21 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)

					unsigned int old = 0, new = 0;

					struct related_thread_group *grp;

					local_irq_save(flags);

					read_lock(&tasklist_lock);

					read_lock(&related_thread_group_lock);

					disable_window_stats();

					/* Taking all runqueue locks prevents race with sched_exit(). */

					for_each_possible_cpu(cpu)

						raw_spin_lock(&cpu_rq(cpu)->lock);

					sched_disable_window_stats = 1;

					reset_all_task_stats();

					local_irq_save(flags);

					for_each_possible_cpu(cpu)

						raw_spin_lock(&cpu_rq(cpu)->lock);

					read_unlock(&tasklist_lock);

					list_for_each_entry(grp, &related_thread_groups, list) {

						int j;

				@ -3196,7 +3071,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)

						sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES;

					}

					enable_window_stats();

					sched_disable_window_stats = 0;

					for_each_possible_cpu(cpu) {

						struct rq *rq = cpu_rq(cpu);

				@ -3239,10 +3114,10 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)

					for_each_possible_cpu(cpu)

						raw_spin_unlock(&cpu_rq(cpu)->lock);

					local_irq_restore(flags);

					read_unlock(&related_thread_group_lock);

					local_irq_restore(flags);

					trace_sched_reset_all_window_stats(window_start, window_size,

						sched_ktime_clock() - start_ts, reason, old, new);

				}

				@ -3824,13 +3699,13 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)

				}

				/* Return cluster which can offer required capacity for group */

				static struct sched_cluster *

				best_cluster(struct related_thread_group *grp, u64 total_demand)

				static struct sched_cluster *best_cluster(struct related_thread_group *grp,

									u64 total_demand, bool group_boost)

				{

					struct sched_cluster *cluster = NULL;

					for_each_sched_cluster(cluster) {

						if (group_will_fit(cluster, grp, total_demand))

						if (group_will_fit(cluster, grp, total_demand, group_boost))

							return cluster;

					}

				@ -3841,6 +3716,9 @@ static void _set_preferred_cluster(struct related_thread_group *grp)

				{

					struct task_struct *p;

					u64 combined_demand = 0;

					bool boost_on_big = sched_boost_policy() == SCHED_BOOST_ON_BIG;

					bool group_boost = false;

					u64 wallclock;

					if (!sysctl_sched_enable_colocation) {

						grp->last_update = sched_ktime_clock();

				@ -3848,31 +3726,43 @@ static void _set_preferred_cluster(struct related_thread_group *grp)

						return;

					}

					if (list_empty(&grp->tasks))

						return;

					wallclock = sched_ktime_clock();

					/*

					 * wakeup of two or more related tasks could race with each other and

					 * could result in multiple calls to _set_preferred_cluster being issued

					 * at same time. Avoid overhead in such cases of rechecking preferred

					 * cluster

					 */

					if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)

					if (wallclock - grp->last_update < sched_ravg_window / 10)

						return;

					list_for_each_entry(p, &grp->tasks, grp_list)

					list_for_each_entry(p, &grp->tasks, grp_list) {

						if (boost_on_big && task_sched_boost(p)) {

							group_boost = true;

							break;

						}

						if (p->ravg.mark_start < wallclock -

						    (sched_ravg_window * sched_ravg_hist_size))

							continue;

						combined_demand += p->ravg.demand;

					grp->preferred_cluster = best_cluster(grp, combined_demand);

					}

					grp->preferred_cluster = best_cluster(grp,

							combined_demand, group_boost);

					grp->last_update = sched_ktime_clock();

					trace_sched_set_preferred_cluster(grp, combined_demand);

				}

				void set_preferred_cluster(struct related_thread_group *grp)

				{

					/*

					 * Prevent possible deadlock with update_children(). Not updating

					 * the preferred cluster once is not a big deal.

					 */

					if (!raw_spin_trylock(&grp->lock))

						return;

					raw_spin_lock(&grp->lock);

					_set_preferred_cluster(grp);

					raw_spin_unlock(&grp->lock);

				}

				@ -3880,6 +3770,8 @@ void set_preferred_cluster(struct related_thread_group *grp)

				#define ADD_TASK	0

				#define REM_TASK	1

				#define DEFAULT_CGROUP_COLOC_ID 1

				static inline void free_group_cputime(struct related_thread_group *grp)

				{

					free_percpu(grp->cpu_time);

				@ -4116,64 +4008,19 @@ static void free_related_thread_group(struct rcu_head *rcu)

					kfree(grp);

				}

				/*

				 * The thread group for a task can change while we are here. However,

				 * add_new_task_to_grp() will take care of any tasks that we miss here.

				 * When a parent exits, and a child thread is simultaneously exiting,

				 * sched_set_group_id() will synchronize those operations.

				 */

				static void update_children(struct task_struct *leader,

							struct related_thread_group *grp, int event)

				{

					struct task_struct *child;

					struct rq *rq;

					unsigned long flags;

					if (!thread_group_leader(leader))

						return;

					if (event == ADD_TASK && !sysctl_sched_enable_thread_grouping)

						return;

					if (thread_group_empty(leader))

						return;

					child = next_thread(leader);

					do {

						rq = task_rq_lock(child, &flags);

						if (event == REM_TASK && child->grp && grp == child->grp) {

							transfer_busy_time(rq, grp, child, event);

							list_del_init(&child->grp_list);

							rcu_assign_pointer(child->grp, NULL);

						} else if (event == ADD_TASK && !child->grp) {

							transfer_busy_time(rq, grp, child, event);

							list_add(&child->grp_list, &grp->tasks);

							rcu_assign_pointer(child->grp, grp);

						}

						task_rq_unlock(rq, child, &flags);

					}  while_each_thread(leader, child);

				}

				static void remove_task_from_group(struct task_struct *p)

				{

					struct related_thread_group *grp = p->grp;

					struct rq *rq;

					int empty_group = 1;

					unsigned long flags;

					raw_spin_lock(&grp->lock);

					rq = task_rq_lock(p, &flags);

					rq = __task_rq_lock(p);

					transfer_busy_time(rq, p->grp, p, REM_TASK);

					list_del_init(&p->grp_list);

					rcu_assign_pointer(p->grp, NULL);

					task_rq_unlock(rq, p, &flags);

					update_children(p, grp, REM_TASK);

					__task_rq_unlock(rq);

					if (!list_empty(&grp->tasks)) {

						empty_group = 0;

				@ -4182,7 +4029,8 @@ static void remove_task_from_group(struct task_struct *p)

					raw_spin_unlock(&grp->lock);

					if (empty_group) {

					/* Reserved groups cannot be destroyed */

					if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {

						list_del(&grp->list);

						call_rcu(&grp->rcu, free_related_thread_group);

					}

				@ -4192,7 +4040,6 @@ static int

				add_task_to_group(struct task_struct *p, struct related_thread_group *grp)

				{

					struct rq *rq;

					unsigned long flags;

					raw_spin_lock(&grp->lock);

				@ -4200,13 +4047,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)

					 * Change p->grp under rq->lock. Will prevent races with read-side

					 * reference of p->grp in various hot-paths

					 */

					rq = task_rq_lock(p, &flags);

					rq = __task_rq_lock(p);

					transfer_busy_time(rq, grp, p, ADD_TASK);

					list_add(&p->grp_list, &grp->tasks);

					rcu_assign_pointer(p->grp, grp);

					task_rq_unlock(rq, p, &flags);

					update_children(p, grp, ADD_TASK);

					__task_rq_unlock(rq);

					_set_preferred_cluster(grp);

				@ -4219,23 +4064,33 @@ void add_new_task_to_grp(struct task_struct *new)

				{

					unsigned long flags;

					struct related_thread_group *grp;

					struct task_struct *parent;

					struct task_struct *leader = new->group_leader;

					unsigned int leader_grp_id = sched_get_group_id(leader);

					if (!sysctl_sched_enable_thread_grouping)

					if (!sysctl_sched_enable_thread_grouping &&

					    leader_grp_id != DEFAULT_CGROUP_COLOC_ID)

						return;

					if (thread_group_leader(new))

						return;

					parent = new->group_leader;

					if (leader_grp_id == DEFAULT_CGROUP_COLOC_ID) {

						if (!same_schedtune(new, leader))

							return;

					}

					write_lock_irqsave(&related_thread_group_lock, flags);

					rcu_read_lock();

					grp = task_related_thread_group(parent);

					grp = task_related_thread_group(leader);

					rcu_read_unlock();

					/* Its possible that update_children() already added us to the group */

					/*

					 * It's possible that someone already added the new task to the

					 * group. A leader's thread group is updated prior to calling

					 * this function. It's also possible that the leader has exited

					 * the group. In either case, there is nothing else to do.

					 */

					if (!grp || new->grp) {

						write_unlock_irqrestore(&related_thread_group_lock, flags);

						return;

				@ -4250,14 +4105,55 @@ void add_new_task_to_grp(struct task_struct *new)

					write_unlock_irqrestore(&related_thread_group_lock, flags);

				}

				#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)

				/*

				 * We create a default colocation group at boot. There is no need to

				 * synchronize tasks between cgroups at creation time because the

				 * correct cgroup hierarchy is not available at boot. Therefore cgroup

				 * colocation is turned off by default even though the colocation group

				 * itself has been allocated. Furthermore this colocation group cannot

				 * be destroyted once it has been created. All of this has been as part

				 * of runtime optimizations.

				 *

				 * The job of synchronizing tasks to the colocation group is done when

				 * the colocation flag in the cgroup is turned on.

				 */

				static int __init create_default_coloc_group(void)

				{

					struct related_thread_group *grp = NULL;

					unsigned long flags;

					grp = alloc_related_thread_group(DEFAULT_CGROUP_COLOC_ID);

					if (IS_ERR(grp)) {

						WARN_ON(1);

						return -ENOMEM;

					}

					write_lock_irqsave(&related_thread_group_lock, flags);

					list_add(&grp->list, &related_thread_groups);

					write_unlock_irqrestore(&related_thread_group_lock, flags);

					update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH);

					return 0;

				}

				late_initcall(create_default_coloc_group);

				int sync_cgroup_colocation(struct task_struct *p, bool insert)

				{

					unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;

					return sched_set_group_id(p, grp_id);

				}

				#endif

				int sched_set_group_id(struct task_struct *p, unsigned int group_id)

				{

					int rc = 0;

					unsigned long flags;

					struct related_thread_group *grp = NULL;

					/* Prevents tasks from exiting while we are managing groups. */

					write_lock_irqsave(&related_thread_group_lock, flags);

					raw_spin_lock_irqsave(&p->pi_lock, flags);

					write_lock(&related_thread_group_lock);

					/* Switching from one group to another directly is not permitted */

					if ((current != p && p->flags & PF_EXITING) ||

				@ -4272,6 +4168,12 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)

					grp = lookup_related_thread_group(group_id);

					if (!grp) {

						/* This is a reserved id */

						if (group_id == DEFAULT_CGROUP_COLOC_ID) {

							rc = -EINVAL;

							goto done;

						}

						grp = alloc_related_thread_group(group_id);

						if (IS_ERR(grp)) {

							rc = -ENOMEM;

				@ -4281,10 +4183,10 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)

						list_add(&grp->list, &related_thread_groups);

					}

					BUG_ON(!grp);

					rc = add_task_to_group(p, grp);

				done:

					write_unlock_irqrestore(&related_thread_group_lock, flags);

					write_unlock(&related_thread_group_lock);

					raw_spin_unlock_irqrestore(&p->pi_lock, flags);

					return rc;

				}

				@ -4529,7 +4431,7 @@ bool early_detection_notify(struct rq *rq, u64 wallclock)

					struct task_struct *p;

					int loop_max = 10;

					if (!sched_boost() || !rq->cfs.h_nr_running)

					if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)

						return 0;

					rq->ed_task = NULL;

									
										12

kernel/sched/rt.c
									
										View file
										
				@ -1677,8 +1677,13 @@ static int find_lowest_rq_hmp(struct task_struct *task)

					int prev_cpu = task_cpu(task);

					u64 cpu_load, min_load = ULLONG_MAX;

					int i;

					int restrict_cluster = sched_boost() ? 0 :

								sysctl_sched_restrict_cluster_spill;

					int restrict_cluster;

					int boost_on_big;

					boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&

							sched_boost_policy() == SCHED_BOOST_ON_BIG;

					restrict_cluster = sysctl_sched_restrict_cluster_spill;

					/* Make sure the mask is initialized first */

					if (unlikely(!lowest_mask))

				@ -1697,6 +1702,9 @@ static int find_lowest_rq_hmp(struct task_struct *task)

					 */

					for_each_sched_cluster(cluster) {

						if (boost_on_big && cluster->capacity != max_possible_capacity)

							continue;

						cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);

						cpumask_andnot(&candidate_mask, &candidate_mask,

							       cpu_isolated_mask);

									
										47

kernel/sched/sched.h
									
										View file
										
				@ -1061,8 +1061,6 @@ extern unsigned int max_load_scale_factor;

				extern unsigned int max_possible_capacity;

				extern unsigned int min_max_possible_capacity;

				extern unsigned int max_power_cost;

				extern unsigned int sched_upmigrate;

				extern unsigned int sched_downmigrate;

				extern unsigned int sched_init_task_load_windows;

				extern unsigned int up_down_migrate_scale_factor;

				extern unsigned int sysctl_sched_restrict_cluster_spill;

				@ -1106,18 +1104,23 @@ extern void sched_account_irqstart(int cpu, struct task_struct *curr,

								   u64 wallclock);

				extern unsigned int cpu_temp(int cpu);

				extern unsigned int nr_eligible_big_tasks(int cpu);

				extern void update_up_down_migrate(void);

				extern int update_preferred_cluster(struct related_thread_group *grp,

							struct task_struct *p, u32 old_load);

				extern void set_preferred_cluster(struct related_thread_group *grp);

				extern void add_new_task_to_grp(struct task_struct *new);

				extern unsigned int update_freq_aggregate_threshold(unsigned int threshold);

				enum sched_boost_type {

				enum sched_boost_policy {

					SCHED_BOOST_NONE,

					SCHED_BOOST_ON_BIG,

					SCHED_BOOST_ON_ALL,

				};

				#define NO_BOOST 0

				#define FULL_THROTTLE_BOOST 1

				#define CONSERVATIVE_BOOST 2

				#define RESTRAINED_BOOST 3

				static inline struct sched_cluster *cpu_cluster(int cpu)

				{

					return cpu_rq(cpu)->cluster;

				@ -1387,14 +1390,11 @@ extern void set_hmp_defaults(void);

				extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);

				extern unsigned int power_cost(int cpu, u64 demand);

				extern void reset_all_window_stats(u64 window_start, unsigned int window_size);

				extern void boost_kick(int cpu);

				extern int sched_boost(void);

				extern int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,

									enum sched_boost_type boost_type);

				extern enum sched_boost_type sched_boost_type(void);

									enum sched_boost_policy boost_policy);

				extern enum sched_boost_policy sched_boost_policy(void);

				extern int task_will_fit(struct task_struct *p, int cpu);

				extern int group_will_fit(struct sched_cluster *cluster,

						 struct related_thread_group *grp, u64 demand);

				extern u64 cpu_load(int cpu);

				extern u64 cpu_load_sync(int cpu, int sync);

				extern int preferred_cluster(struct sched_cluster *cluster,

				@ -1422,10 +1422,32 @@ extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,

									struct cftype *cft);

				extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,

								struct cftype *cft, u64 upmigrate_discourage);

				extern void sched_hmp_parse_dt(void);

				extern void init_sched_hmp_boost_policy(void);

				extern void sched_boost_parse_dt(void);

				extern void clear_top_tasks_bitmap(unsigned long *bitmap);

				#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)

				extern bool task_sched_boost(struct task_struct *p);

				extern int sync_cgroup_colocation(struct task_struct *p, bool insert);

				extern bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2);

				extern void update_cgroup_boost_settings(void);

				extern void restore_cgroup_boost_settings(void);

				#else

				static inline bool

				same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)

				{

					return true;

				}

				static inline bool task_sched_boost(struct task_struct *p)

				{

					return true;

				}

				static inline void update_cgroup_boost_settings(void) { }

				static inline void restore_cgroup_boost_settings(void) { }

				#endif

				#else	/* CONFIG_SCHED_HMP */

				struct hmp_sched_stats;

				@ -1615,8 +1637,7 @@ static inline void post_big_task_count_change(void) { }

				static inline void set_hmp_defaults(void) { }

				static inline void clear_reserved(int cpu) { }

				static inline void sched_hmp_parse_dt(void) {}

				static inline  void init_sched_hmp_boost_policy(void) {}

				static inline void sched_boost_parse_dt(void) {}

				#define trace_sched_cpu_load(...)

				#define trace_sched_cpu_load_lb(...)

									
										184

kernel/sched/tune.c
									
										View file
										
				@ -25,6 +25,33 @@ struct schedtune {

					/* Boost value for tasks on that SchedTune CGroup */

					int boost;

				#ifdef CONFIG_SCHED_HMP

					/* Toggle ability to override sched boost enabled */

					bool sched_boost_no_override;

					/*

					 * Controls whether a cgroup is eligible for sched boost or not. This

					 * can temporariliy be disabled by the kernel based on the no_override

					 * flag above.

					 */

					bool sched_boost_enabled;

					/*

					 * This tracks the default value of sched_boost_enabled and is used

					 * restore the value following any temporary changes to that flag.

					 */

					bool sched_boost_enabled_backup;

					/*

					 * Controls whether tasks of this cgroup should be colocated with each

					 * other and tasks of other cgroups that have the same flag turned on.

					 */

					bool colocate;

					/* Controls whether further updates are allowed to the colocate flag */

					bool colocate_update_disabled;

				#endif

				};

				static inline struct schedtune *css_st(struct cgroup_subsys_state *css)

				@ -54,6 +81,13 @@ static inline struct schedtune *parent_st(struct schedtune *st)

				static struct schedtune

				root_schedtune = {

					.boost	= 0,

				#ifdef CONFIG_SCHED_HMP

					.sched_boost_no_override = false,

					.sched_boost_enabled = true,

					.sched_boost_enabled_backup = true,

					.colocate = false,

					.colocate_update_disabled = false,

				#endif

				};

				/*

				@ -97,6 +131,121 @@ struct boost_groups {

				/* Boost groups affecting each CPU in the system */

				DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);

				#ifdef CONFIG_SCHED_HMP

				static inline void init_sched_boost(struct schedtune *st)

				{

					st->sched_boost_no_override = false;

					st->sched_boost_enabled = true;

					st->sched_boost_enabled_backup = st->sched_boost_enabled;

					st->colocate = false;

					st->colocate_update_disabled = false;

				}

				bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)

				{

					return task_schedtune(tsk1) == task_schedtune(tsk2);

				}

				void update_cgroup_boost_settings(void)

				{

					int i;

					for (i = 0; i < BOOSTGROUPS_COUNT; i++) {

						if (!allocated_group[i])

							break;

						if (allocated_group[i]->sched_boost_no_override)

							continue;

						allocated_group[i]->sched_boost_enabled = false;

					}

				}

				void restore_cgroup_boost_settings(void)

				{

					int i;

					for (i = 0; i < BOOSTGROUPS_COUNT; i++) {

						if (!allocated_group[i])

							break;

						allocated_group[i]->sched_boost_enabled =

							allocated_group[i]->sched_boost_enabled_backup;

					}

				}

				bool task_sched_boost(struct task_struct *p)

				{

					struct schedtune *st = task_schedtune(p);

					return st->sched_boost_enabled;

				}

				static u64

				sched_boost_override_read(struct cgroup_subsys_state *css,

							struct cftype *cft)

				{

					struct schedtune *st = css_st(css);

					return st->sched_boost_no_override;

				}

				static int sched_boost_override_write(struct cgroup_subsys_state *css,

							struct cftype *cft, u64 override)

				{

					struct schedtune *st = css_st(css);

					st->sched_boost_no_override = !!override;

					return 0;

				}

				static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,

							struct cftype *cft)

				{

					struct schedtune *st = css_st(css);

					return st->sched_boost_enabled;

				}

				static int sched_boost_enabled_write(struct cgroup_subsys_state *css,

							struct cftype *cft, u64 enable)

				{

					struct schedtune *st = css_st(css);

					st->sched_boost_enabled = !!enable;

					st->sched_boost_enabled_backup = st->sched_boost_enabled;

					return 0;

				}

				static u64 sched_colocate_read(struct cgroup_subsys_state *css,

							struct cftype *cft)

				{

					struct schedtune *st = css_st(css);

					return st->colocate;

				}

				static int sched_colocate_write(struct cgroup_subsys_state *css,

							struct cftype *cft, u64 colocate)

				{

					struct schedtune *st = css_st(css);

					if (st->colocate_update_disabled)

						return -EPERM;

					st->colocate = !!colocate;

					st->colocate_update_disabled = true;

					return 0;

				}

				#else /* CONFIG_SCHED_HMP */

				static inline void init_sched_boost(struct schedtune *st) { }

				#endif /* CONFIG_SCHED_HMP */

				static u64

				boost_read(struct cgroup_subsys_state *css, struct cftype *cft)

				{

				@ -121,12 +270,45 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,

					return 0;

				}

				static void schedtune_attach(struct cgroup_taskset *tset)

				{

					struct task_struct *task;

					struct cgroup_subsys_state *css;

					struct schedtune *st;

					bool colocate;

					cgroup_taskset_first(tset, &css);

					st = css_st(css);

					colocate = st->colocate;

					cgroup_taskset_for_each(task, css, tset)

						sync_cgroup_colocation(task, colocate);

				}

				static struct cftype files[] = {

					{

						.name = "boost",

						.read_u64 = boost_read,

						.write_u64 = boost_write,

					},

				#ifdef CONFIG_SCHED_HMP

					{

						.name = "sched_boost_no_override",

						.read_u64 = sched_boost_override_read,

						.write_u64 = sched_boost_override_write,

					},

					{

						.name = "sched_boost_enabled",

						.read_u64 = sched_boost_enabled_read,

						.write_u64 = sched_boost_enabled_write,

					},

					{

						.name = "colocate",

						.read_u64 = sched_colocate_read,

						.write_u64 = sched_colocate_write,

					},

				#endif

					{ }	/* terminate */

				};

				@ -189,6 +371,7 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)

					/* Initialize per CPUs boost group support */

					st->idx = idx;

					init_sched_boost(st);

					if (schedtune_boostgroup_init(st))

						goto release;

				@ -222,6 +405,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = {

					.legacy_cftypes	= files,

					.early_init	= 1,

					.allow_attach	= subsys_cgroup_allow_attach,

					.attach		= schedtune_attach,

				};

				#endif /* CONFIG_CGROUP_SCHEDTUNE */

									
										19

kernel/sysctl.c
									
										View file
										
				@ -124,6 +124,7 @@ static int __maybe_unused neg_one = -1;

				static int zero;

				static int __maybe_unused one = 1;

				static int __maybe_unused two = 2;

				static int __maybe_unused three = 3;

				static int __maybe_unused four = 4;

				static unsigned long one_ul = 1;

				static int one_hundred = 100;

				@ -376,6 +377,22 @@ static struct ctl_table kern_table[] = {

						.extra1		= &zero,

						.extra2		= &one_hundred,

					},

					{

						.procname	= "sched_group_upmigrate",

						.data		= &sysctl_sched_group_upmigrate_pct,

						.maxlen		= sizeof(unsigned int),

						.mode		= 0644,

						.proc_handler	= sched_hmp_proc_update_handler,

						.extra1		= &zero,

					},

					{

						.procname	= "sched_group_downmigrate",

						.data		= &sysctl_sched_group_downmigrate_pct,

						.maxlen		= sizeof(unsigned int),

						.mode		= 0644,

						.proc_handler	= sched_hmp_proc_update_handler,

						.extra1		= &zero,

					},

					{

						.procname	= "sched_init_task_load",

						.data		= &sysctl_sched_init_task_load_pct,

				@ -487,6 +504,8 @@ static struct ctl_table kern_table[] = {

						.maxlen		= sizeof(unsigned int),

						.mode		= 0644,

						.proc_handler	= sched_boost_handler,

						.extra1         = &zero,

						.extra2		= &three,

					},

				#endif	/* CONFIG_SCHED_HMP */

				#ifdef CONFIG_SCHED_DEBUG