diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 6848454c5447..5d0899df64ff 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -53,6 +53,8 @@ extern unsigned int sysctl_sched_spill_nr_run;
 extern unsigned int sysctl_sched_spill_load_pct;
 extern unsigned int sysctl_sched_upmigrate_pct;
 extern unsigned int sysctl_sched_downmigrate_pct;
+extern unsigned int sysctl_sched_group_upmigrate_pct;
+extern unsigned int sysctl_sched_group_downmigrate_pct;
 extern unsigned int sysctl_early_detection_duration;
 extern unsigned int sysctl_sched_boost;
 extern unsigned int sysctl_sched_small_wakee_task_load_pct;
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 7778ff3947de..72bbed9ad5db 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -133,6 +133,7 @@ TRACE_EVENT(sched_task_load,
 		__field(	u32,	flags			)
 		__field(	int,	best_cpu		)
 		__field(	u64,	latency			)
+		__field(	int,	grp_id			)
 	),
 
 	TP_fast_assign(
@@ -148,12 +149,13 @@ TRACE_EVENT(sched_task_load,
 		__entry->latency	= p->state == TASK_WAKING ?
 						      sched_ktime_clock() -
 						      p->ravg.mark_start : 0;
+		__entry->grp_id		= p->grp ? p->grp->id : 0;
 	),
 
-	TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x best_cpu=%d latency=%llu",
+	TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x grp=%d best_cpu=%d latency=%llu",
 		__entry->pid, __entry->comm, __entry->demand,
 		__entry->boost, __entry->reason, __entry->sync,
-		__entry->need_idle, __entry->flags,
+		__entry->need_idle, __entry->flags, __entry->grp_id,
 		__entry->best_cpu, __entry->latency)
 );
 
@@ -164,9 +166,12 @@ TRACE_EVENT(sched_set_preferred_cluster,
 	TP_ARGS(grp, total_demand),
 
 	TP_STRUCT__entry(
-		__field(		int,	id			)
-		__field(		u64,	demand			)
-		__field(		int,	cluster_first_cpu	)
+		__field(	int,	id			)
+		__field(	u64,	demand			)
+		__field(	int,	cluster_first_cpu	)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(unsigned int,	task_demand			)
 	),
 
 	TP_fast_assign(
@@ -245,19 +250,19 @@ DEFINE_EVENT(sched_cpu_load, sched_cpu_load_cgroup,
 
 TRACE_EVENT(sched_set_boost,
 
-	TP_PROTO(int ref_count),
+	TP_PROTO(int type),
 
-	TP_ARGS(ref_count),
+	TP_ARGS(type),
 
 	TP_STRUCT__entry(
-		__field(unsigned int, ref_count			)
+		__field(int, type			)
 	),
 
 	TP_fast_assign(
-		__entry->ref_count = ref_count;
+		__entry->type = type;
 	),
 
-	TP_printk("ref_count=%d", __entry->ref_count)
+	TP_printk("type %d", __entry->type)
 );
 
 #if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP)
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7d0d34c53e08..7c0382a3eace 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -15,7 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
 obj-y += wait.o completion.o idle.o sched_avg.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
-obj-$(CONFIG_SCHED_HMP) += hmp.o
+obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/boost.c b/kernel/sched/boost.c
new file mode 100644
index 000000000000..fcfda385b74a
--- /dev/null
+++ b/kernel/sched/boost.c
@@ -0,0 +1,226 @@
+/* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "sched.h"
+#include <linux/of.h>
+#include <linux/sched/core_ctl.h>
+#include <trace/events/sched.h>
+
+/*
+ * Scheduler boost is a mechanism to temporarily place tasks on CPUs
+ * with higher capacity than those where a task would have normally
+ * ended up with their load characteristics. Any entity enabling
+ * boost is responsible for disabling it as well.
+ */
+
+unsigned int sysctl_sched_boost;
+static enum sched_boost_policy boost_policy;
+static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;
+static DEFINE_MUTEX(boost_mutex);
+static unsigned int freq_aggr_threshold_backup;
+
+static inline void boost_kick(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
+		smp_send_reschedule(cpu);
+}
+
+static void boost_kick_cpus(void)
+{
+	int i;
+	struct cpumask kick_mask;
+
+	if (boost_policy != SCHED_BOOST_ON_BIG)
+		return;
+
+	cpumask_andnot(&kick_mask, cpu_online_mask, cpu_isolated_mask);
+
+	for_each_cpu(i, &kick_mask) {
+		if (cpu_capacity(i) != max_capacity)
+			boost_kick(i);
+	}
+}
+
+int got_boost_kick(void)
+{
+	int cpu = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+
+	return test_bit(BOOST_KICK, &rq->hmp_flags);
+}
+
+void clear_boost_kick(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	clear_bit(BOOST_KICK, &rq->hmp_flags);
+}
+
+/*
+ * Scheduler boost type and boost policy might at first seem unrelated,
+ * however, there exists a connection between them that will allow us
+ * to use them interchangeably during placement decisions. We'll explain
+ * the connection here in one possible way so that the implications are
+ * clear when looking at placement policies.
+ *
+ * When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
+ * When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
+ * neither be none nor RESTRAINED.
+ */
+static void set_boost_policy(int type)
+{
+	if (type == SCHED_BOOST_NONE || type == RESTRAINED_BOOST) {
+		boost_policy = SCHED_BOOST_NONE;
+		return;
+	}
+
+	if (boost_policy_dt) {
+		boost_policy = boost_policy_dt;
+		return;
+	}
+
+	if (min_possible_efficiency != max_possible_efficiency) {
+		boost_policy = SCHED_BOOST_ON_BIG;
+		return;
+	}
+
+	boost_policy = SCHED_BOOST_ON_ALL;
+}
+
+enum sched_boost_policy sched_boost_policy(void)
+{
+	return boost_policy;
+}
+
+static bool verify_boost_params(int old_val, int new_val)
+{
+	/*
+	 * Boost can only be turned on or off. There is no possiblity of
+	 * switching from one boost type to another or to set the same
+	 * kind of boost several times.
+	 */
+	return !(!!old_val == !!new_val);
+}
+
+static void _sched_set_boost(int old_val, int type)
+{
+	switch (type) {
+	case NO_BOOST:
+		if (old_val == FULL_THROTTLE_BOOST)
+			core_ctl_set_boost(false);
+		else if (old_val == CONSERVATIVE_BOOST)
+			restore_cgroup_boost_settings();
+		else
+			update_freq_aggregate_threshold(
+				freq_aggr_threshold_backup);
+		break;
+
+	case FULL_THROTTLE_BOOST:
+		core_ctl_set_boost(true);
+		boost_kick_cpus();
+		break;
+
+	case CONSERVATIVE_BOOST:
+		update_cgroup_boost_settings();
+		boost_kick_cpus();
+		break;
+
+	case RESTRAINED_BOOST:
+		freq_aggr_threshold_backup =
+			update_freq_aggregate_threshold(1);
+		break;
+
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	set_boost_policy(type);
+	sysctl_sched_boost = type;
+	trace_sched_set_boost(type);
+}
+
+void sched_boost_parse_dt(void)
+{
+	struct device_node *sn;
+	const char *boost_policy;
+
+	if (!sched_enable_hmp)
+		return;
+
+	sn = of_find_node_by_path("/sched-hmp");
+	if (!sn)
+		return;
+
+	if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
+		if (!strcmp(boost_policy, "boost-on-big"))
+			boost_policy_dt = SCHED_BOOST_ON_BIG;
+		else if (!strcmp(boost_policy, "boost-on-all"))
+			boost_policy_dt = SCHED_BOOST_ON_ALL;
+	}
+}
+
+int sched_set_boost(int type)
+{
+	int ret = 0;
+
+	if (!sched_enable_hmp)
+		return -EINVAL;
+
+	mutex_lock(&boost_mutex);
+
+	if (verify_boost_params(sysctl_sched_boost, type))
+		_sched_set_boost(sysctl_sched_boost, type);
+	else
+		ret = -EINVAL;
+
+	mutex_unlock(&boost_mutex);
+	return ret;
+}
+
+int sched_boost_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	unsigned int *data = (unsigned int *)table->data;
+	unsigned int old_val;
+
+	if (!sched_enable_hmp)
+		return -EINVAL;
+
+	mutex_lock(&boost_mutex);
+
+	old_val = *data;
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write)
+		goto done;
+
+	if (verify_boost_params(old_val, *data)) {
+		_sched_set_boost(old_val, *data);
+	} else {
+		*data = old_val;
+		ret = -EINVAL;
+	}
+
+done:
+	mutex_unlock(&boost_mutex);
+	return ret;
+}
+
+int sched_boost(void)
+{
+	return sysctl_sched_boost;
+}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84563da000cf..a5d101e8a5f2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7846,7 +7846,6 @@ void __init sched_init_smp(void)
 	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
 
 	update_cluster_topology();
-	init_sched_hmp_boost_policy();
 
 	init_hrtick();
 
@@ -7895,7 +7894,7 @@ void __init sched_init(void)
 
 	BUG_ON(num_possible_cpus() > BITS_PER_LONG);
 
-	sched_hmp_parse_dt();
+	sched_boost_parse_dt();
 	init_clusters();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1674b1054f83..3db77aff2433 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2596,6 +2596,7 @@ static u32 __compute_runnable_contrib(u64 n)
 #define SBC_FLAG_COLOC_CLUSTER				0x10000
 #define SBC_FLAG_WAKER_CLUSTER				0x20000
 #define SBC_FLAG_BACKUP_CLUSTER				0x40000
+#define SBC_FLAG_BOOST_CLUSTER				0x80000
 
 struct cpu_select_env {
 	struct task_struct *p;
@@ -2605,7 +2606,7 @@ struct cpu_select_env {
 	u8 need_waker_cluster:1;
 	u8 sync:1;
 	u8 ignore_prev_cpu:1;
-	enum sched_boost_type boost_type;
+	enum sched_boost_policy boost_policy;
 	int prev_cpu;
 	DECLARE_BITMAP(candidate_list, NR_CPUS);
 	DECLARE_BITMAP(backup_list, NR_CPUS);
@@ -2705,10 +2706,38 @@ select_least_power_cluster(struct cpu_select_env *env)
 	struct sched_cluster *cluster;
 
 	if (env->rtg) {
-		env->task_load = scale_load_to_cpu(task_load(env->p),
-			cluster_first_cpu(env->rtg->preferred_cluster));
-		env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
-		return env->rtg->preferred_cluster;
+		int cpu = cluster_first_cpu(env->rtg->preferred_cluster);
+
+		env->task_load = scale_load_to_cpu(task_load(env->p), cpu);
+
+		if (task_load_will_fit(env->p, env->task_load,
+					cpu, env->boost_policy)) {
+			env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
+
+			if (env->boost_policy == SCHED_BOOST_NONE)
+				return env->rtg->preferred_cluster;
+
+			for_each_sched_cluster(cluster) {
+				if (cluster != env->rtg->preferred_cluster) {
+					__set_bit(cluster->id,
+						env->backup_list);
+					__clear_bit(cluster->id,
+						env->candidate_list);
+				}
+			}
+
+			return env->rtg->preferred_cluster;
+		}
+
+		/*
+		 * Since the task load does not fit on the preferred
+		 * cluster anymore, pretend that the task does not
+		 * have any preferred cluster. This allows the waking
+		 * task to get the appropriate CPU it needs as per the
+		 * non co-location placement policy without having to
+		 * wait until the preferred cluster is updated.
+		 */
+		env->rtg = NULL;
 	}
 
 	for_each_sched_cluster(cluster) {
@@ -2718,7 +2747,7 @@ select_least_power_cluster(struct cpu_select_env *env)
 			env->task_load = scale_load_to_cpu(task_load(env->p),
 									 cpu);
 			if (task_load_will_fit(env->p, env->task_load, cpu,
-					       env->boost_type))
+					       env->boost_policy))
 				return cluster;
 
 			__set_bit(cluster->id, env->backup_list);
@@ -2961,7 +2990,14 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
 		update_spare_capacity(stats, env, i, c->capacity,
 				      env->cpu_load);
 
-		if (env->boost_type == SCHED_BOOST_ON_ALL ||
+		/*
+		 * need_idle takes precedence over sched boost but when both
+		 * are set, idlest CPU with in all the clusters is selected
+		 * when boost_policy = BOOST_ON_ALL whereas idlest CPU in the
+		 * big cluster is selected within boost_policy = BOOST_ON_BIG.
+		 */
+		if ((!env->need_idle &&
+		    env->boost_policy != SCHED_BOOST_NONE) ||
 		    env->need_waker_cluster ||
 		    sched_cpu_high_irqload(i) ||
 		    spill_threshold_crossed(env, cpu_rq(i)))
@@ -3005,7 +3041,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 	struct task_struct *task = env->p;
 	struct sched_cluster *cluster;
 
-	if (env->boost_type != SCHED_BOOST_NONE || env->reason ||
+	if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
 	    !task->ravg.mark_start ||
 	    env->need_idle || !sched_short_sleep_task_threshold)
 		return false;
@@ -3034,7 +3070,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 	cluster = cpu_rq(prev_cpu)->cluster;
 
 	if (!task_load_will_fit(task, env->task_load, prev_cpu,
-				sched_boost_type())) {
+				sched_boost_policy())) {
 
 		__set_bit(cluster->id, env->backup_list);
 		__clear_bit(cluster->id, env->candidate_list);
@@ -3056,7 +3092,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 static inline bool
 wake_to_waker_cluster(struct cpu_select_env *env)
 {
-	return env->boost_type == SCHED_BOOST_NONE &&
+	return env->boost_policy == SCHED_BOOST_NONE &&
 	       !env->need_idle && !env->reason && env->sync &&
 	       task_load(current) > sched_big_waker_task_load &&
 	       task_load(env->p) < sched_small_wakee_task_load;
@@ -3098,7 +3134,6 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		.reason			= reason,
 		.need_idle		= wake_to_idle(p),
 		.need_waker_cluster	= 0,
-		.boost_type		= sched_boost_type(),
 		.sync			= sync,
 		.prev_cpu		= target,
 		.ignore_prev_cpu	= 0,
@@ -3107,6 +3142,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		.sbc_best_cluster_flag	= 0,
 	};
 
+	env.boost_policy = task_sched_boost(p) ?
+			sched_boost_policy() : SCHED_BOOST_NONE;
+
 	bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
 	bitmap_zero(env.backup_list, NR_CPUS);
 
@@ -3178,12 +3216,23 @@ retry:
 		sbc_flag |= env.sbc_best_flag;
 		target = stats.best_cpu;
 	} else {
-		if (env.rtg) {
+		if (env.rtg && env.boost_policy == SCHED_BOOST_NONE) {
 			env.rtg = NULL;
 			goto retry;
 		}
 
-		find_backup_cluster(&env, &stats);
+		/*
+		 * With boost_policy == SCHED_BOOST_ON_BIG, we reach here with
+		 * backup_list = little cluster, candidate_list = none and
+		 * stats->best_capacity_cpu points the best spare capacity
+		 * CPU among the CPUs in the big cluster.
+		 */
+		if (env.boost_policy == SCHED_BOOST_ON_BIG &&
+		    stats.best_capacity_cpu >= 0)
+			sbc_flag |= SBC_FLAG_BOOST_CLUSTER;
+		else
+			find_backup_cluster(&env, &stats);
+
 		if (stats.best_capacity_cpu >= 0) {
 			target = stats.best_capacity_cpu;
 			sbc_flag |= SBC_FLAG_BEST_CAP_CPU;
@@ -3193,8 +3242,8 @@ retry:
 out:
 	sbc_flag |= env.sbc_best_cluster_flag;
 	rcu_read_unlock();
-	trace_sched_task_load(p, sched_boost(), env.reason, env.sync,
-					env.need_idle, sbc_flag, target);
+	trace_sched_task_load(p, sched_boost_policy() && task_sched_boost(p),
+		env.reason, env.sync, env.need_idle, sbc_flag, target);
 	return target;
 }
 
@@ -3402,11 +3451,9 @@ static inline int migration_needed(struct task_struct *p, int cpu)
 	if (task_will_be_throttled(p))
 		return 0;
 
-	if (sched_boost_type() == SCHED_BOOST_ON_BIG) {
-		if (cpu_capacity(cpu) != max_capacity)
-			return UP_MIGRATION;
-		return 0;
-	}
+	if (sched_boost_policy() == SCHED_BOOST_ON_BIG &&
+		 cpu_capacity(cpu) != max_capacity && task_sched_boost(p))
+		return UP_MIGRATION;
 
 	if (sched_cpu_high_irqload(cpu))
 		return IRQLOAD_MIGRATION;
@@ -3420,7 +3467,7 @@ static inline int migration_needed(struct task_struct *p, int cpu)
 		return DOWN_MIGRATION;
 	}
 
-	if (!grp && !task_will_fit(p, cpu)) {
+	if (!task_will_fit(p, cpu)) {
 		rcu_read_unlock();
 		return UP_MIGRATION;
 	}
@@ -6648,10 +6695,7 @@ enum fbq_type { regular, remote, all };
 #define LBF_NEED_BREAK	0x02
 #define LBF_DST_PINNED  0x04
 #define LBF_SOME_PINNED	0x08
-#define LBF_SCHED_BOOST_ACTIVE_BALANCE 0x40
 #define LBF_BIG_TASK_ACTIVE_BALANCE 0x80
-#define LBF_HMP_ACTIVE_BALANCE (LBF_SCHED_BOOST_ACTIVE_BALANCE | \
-				LBF_BIG_TASK_ACTIVE_BALANCE)
 #define LBF_IGNORE_BIG_TASKS 0x100
 #define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
 #define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400
@@ -6682,6 +6726,7 @@ struct lb_env {
 
 	enum fbq_type		fbq_type;
 	struct list_head	tasks;
+	enum sched_boost_policy	boost_policy;
 };
 
 /*
@@ -6826,9 +6871,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	/* Record that we found atleast one task that could run on dst_cpu */
 	env->flags &= ~LBF_ALL_PINNED;
 
-	if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu) &&
-		nr_big_tasks(env->src_rq) && !is_big_task(p))
-		return 0;
+	if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu)) {
+		if (nr_big_tasks(env->src_rq) && !is_big_task(p))
+			return 0;
+
+		if (env->boost_policy == SCHED_BOOST_ON_BIG &&
+					!task_sched_boost(p))
+			return 0;
+	}
 
 	twf = task_will_fit(p, env->dst_cpu);
 
@@ -6951,8 +7001,7 @@ static int detach_tasks(struct lb_env *env)
 	if (env->imbalance <= 0)
 		return 0;
 
-	if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu) &&
-							!sched_boost())
+	if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu))
 		env->flags |= LBF_IGNORE_BIG_TASKS;
 	else if (!same_cluster(env->dst_cpu, env->src_cpu))
 		env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
@@ -7255,8 +7304,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)
 	int local_capacity, busiest_capacity;
 	int local_pwr_cost, busiest_pwr_cost;
 	int nr_cpus;
+	int boost = sched_boost();
 
-	if (!sysctl_sched_restrict_cluster_spill || sched_boost())
+	if (!sysctl_sched_restrict_cluster_spill ||
+		boost == FULL_THROTTLE_BOOST || boost == CONSERVATIVE_BOOST)
 		return 0;
 
 	local_cpu = group_first_cpu(sds->local);
@@ -7628,11 +7679,6 @@ static bool update_sd_pick_busiest_active_balance(struct lb_env *env,
 {
 	if (env->idle != CPU_NOT_IDLE &&
 	    cpu_capacity(env->dst_cpu) > group_rq_capacity(sg)) {
-		if (sched_boost() && !sds->busiest && sgs->sum_nr_running) {
-			env->flags |= LBF_SCHED_BOOST_ACTIVE_BALANCE;
-			return true;
-		}
-
 		if (sgs->sum_nr_big_tasks >
 				sds->busiest_stat.sum_nr_big_tasks) {
 			env->flags |= LBF_BIG_TASK_ACTIVE_BALANCE;
@@ -8045,7 +8091,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	if (env->flags & LBF_HMP_ACTIVE_BALANCE)
+	if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
 		goto force_balance;
 
 	if (bail_inter_cluster_balance(env, &sds))
@@ -8257,7 +8303,7 @@ static int need_active_balance(struct lb_env *env)
 {
 	struct sched_domain *sd = env->sd;
 
-	if (env->flags & LBF_HMP_ACTIVE_BALANCE)
+	if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
 		return 1;
 
 	if (env->idle == CPU_NEWLY_IDLE) {
@@ -8348,20 +8394,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
 
 	struct lb_env env = {
-		.sd		= sd,
-		.dst_cpu	= this_cpu,
-		.dst_rq		= this_rq,
-		.dst_grpmask    = sched_group_cpus(sd->groups),
-		.idle		= idle,
-		.loop_break	= sched_nr_migrate_break,
-		.cpus		= cpus,
-		.fbq_type	= all,
-		.tasks		= LIST_HEAD_INIT(env.tasks),
-		.imbalance	= 0,
-		.flags		= 0,
-		.loop		= 0,
+		.sd			= sd,
+		.dst_cpu		= this_cpu,
+		.dst_rq			= this_rq,
+		.dst_grpmask		= sched_group_cpus(sd->groups),
+		.idle			= idle,
+		.loop_break		= sched_nr_migrate_break,
+		.cpus			= cpus,
+		.fbq_type		= all,
+		.tasks			= LIST_HEAD_INIT(env.tasks),
+		.imbalance		= 0,
+		.flags			= 0,
+		.loop			= 0,
 		.busiest_nr_running     = 0,
 		.busiest_grp_capacity   = 0,
+		.boost_policy		= sched_boost_policy(),
 	};
 
 	/*
@@ -8510,7 +8557,7 @@ more_balance:
 
 no_move:
 	if (!ld_moved) {
-		if (!(env.flags & LBF_HMP_ACTIVE_BALANCE))
+		if (!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
 			schedstat_inc(sd, lb_failed[idle]);
 
 		/*
@@ -8520,7 +8567,7 @@ no_move:
 		 * excessive cache_hot migrations and active balances.
 		 */
 		if (idle != CPU_NEWLY_IDLE &&
-		    !(env.flags & LBF_HMP_ACTIVE_BALANCE))
+		    !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
 			sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
@@ -8797,6 +8844,7 @@ static int active_load_balance_cpu_stop(void *data)
 		.busiest_grp_capacity 	= 0,
 		.flags			= 0,
 		.loop			= 0,
+		.boost_policy		= sched_boost_policy(),
 	};
 	bool moved = false;
 
@@ -9272,7 +9320,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
 	if (rq->nr_running < 2)
 		return 0;
 
-	if (!sysctl_sched_restrict_cluster_spill || sched_boost())
+	if (!sysctl_sched_restrict_cluster_spill ||
+			sched_boost_policy() == SCHED_BOOST_ON_ALL)
 		return 1;
 
 	if (cpu_max_power_cost(cpu) == max_power_cost)
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 30391aae0822..968a41e0e81e 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -17,8 +17,6 @@
 #include <linux/cpufreq.h>
 #include <linux/list_sort.h>
 #include <linux/syscore_ops.h>
-#include <linux/of.h>
-#include <linux/sched/core_ctl.h>
 
 #include "sched.h"
 
@@ -231,52 +229,6 @@ fail:
 	return ret;
 }
 
-/*
- * It is possible that CPUs of the same micro architecture can have slight
- * difference in the efficiency due to other factors like cache size. The
- * BOOST_ON_BIG policy may not be optimial for such systems. The required
- * boost policy can be specified via device tree to handle this.
- */
-static int __read_mostly sched_boost_policy = SCHED_BOOST_NONE;
-
-/*
- * This should be called after clusters are populated and
- * the respective efficiency values are initialized.
- */
-void init_sched_hmp_boost_policy(void)
-{
-	/*
-	 * Initialize the boost type here if it is not passed from
-	 * device tree.
-	 */
-	if (sched_boost_policy == SCHED_BOOST_NONE) {
-		if (max_possible_efficiency != min_possible_efficiency)
-			sched_boost_policy = SCHED_BOOST_ON_BIG;
-		else
-			sched_boost_policy = SCHED_BOOST_ON_ALL;
-	}
-}
-
-void sched_hmp_parse_dt(void)
-{
-	struct device_node *sn;
-	const char *boost_policy;
-
-	if (!sched_enable_hmp)
-		return;
-
-	sn = of_find_node_by_path("/sched-hmp");
-	if (!sn)
-		return;
-
-	if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
-		if (!strcmp(boost_policy, "boost-on-big"))
-			sched_boost_policy = SCHED_BOOST_ON_BIG;
-		else if (!strcmp(boost_policy, "boost-on-all"))
-			sched_boost_policy = SCHED_BOOST_ON_ALL;
-	}
-}
-
 unsigned int max_possible_efficiency = 1;
 unsigned int min_possible_efficiency = UINT_MAX;
 
@@ -680,29 +632,6 @@ int __init set_sched_enable_hmp(char *str)
 
 early_param("sched_enable_hmp", set_sched_enable_hmp);
 
-int got_boost_kick(void)
-{
-	int cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(cpu);
-
-	return test_bit(BOOST_KICK, &rq->hmp_flags);
-}
-
-inline void clear_boost_kick(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-
-	clear_bit(BOOST_KICK, &rq->hmp_flags);
-}
-
-inline void boost_kick(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-
-	if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
-		smp_send_reschedule(cpu);
-}
-
 /* Clear any HMP scheduler related requests pending from or on cpu */
 void clear_hmp_request(int cpu)
 {
@@ -840,6 +769,9 @@ min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
 /* Window size (in ns) */
 __read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
 
+/* Maximum allowed threshold before freq aggregation must be enabled */
+#define MAX_FREQ_AGGR_THRESH 1000
+
 /* Temporarily disable window-stats activity on all cpus */
 unsigned int __read_mostly sched_disable_window_stats;
 
@@ -919,8 +851,8 @@ static const unsigned int top_tasks_bitmap_size =
  *	C1 busy time = 5 + 5 + 6 = 16ms
  *
  */
-static __read_mostly unsigned int sched_freq_aggregate;
-__read_mostly unsigned int sysctl_sched_freq_aggregate;
+static __read_mostly unsigned int sched_freq_aggregate = 1;
+__read_mostly unsigned int sysctl_sched_freq_aggregate = 1;
 
 unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct;
 static unsigned int __read_mostly sched_freq_aggregate_threshold;
@@ -937,14 +869,6 @@ unsigned int max_task_load(void)
 /* Use this knob to turn on or off HMP-aware task placement logic */
 unsigned int __read_mostly sched_enable_hmp;
 
-/*
- * Scheduler boost is a mechanism to temporarily place tasks on CPUs
- * with higher capacity than those where a task would have normally
- * ended up with their load characteristics. Any entity enabling
- * boost is responsible for disabling it as well.
- */
-unsigned int sysctl_sched_boost;
-
 /* A cpu can no longer accommodate more tasks if:
  *
  *	rq->nr_running > sysctl_sched_spill_nr_run ||
@@ -995,6 +919,21 @@ unsigned int __read_mostly sysctl_sched_upmigrate_pct = 80;
 unsigned int __read_mostly sched_downmigrate;
 unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60;
 
+/*
+ * Task groups whose aggregate demand on a cpu is more than
+ * sched_group_upmigrate need to be up-migrated if possible.
+ */
+unsigned int __read_mostly sched_group_upmigrate;
+unsigned int __read_mostly sysctl_sched_group_upmigrate_pct = 100;
+
+/*
+ * Task groups, once up-migrated, will need to drop their aggregate
+ * demand to less than sched_group_downmigrate before they are "down"
+ * migrated.
+ */
+unsigned int __read_mostly sched_group_downmigrate;
+unsigned int __read_mostly sysctl_sched_group_downmigrate_pct = 95;
+
 /*
  * The load scale factor of a CPU gets boosted when its max frequency
  * is restricted due to which the tasks are migrating to higher capacity
@@ -1017,33 +956,46 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
 
 unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
 
-void update_up_down_migrate(void)
+static void
+_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
 {
-	unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
-	unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
 	unsigned int delta;
 
 	if (up_down_migrate_scale_factor == 1024)
-		goto done;
+		return;
 
-	delta = up_migrate - down_migrate;
+	delta = *up_migrate - *down_migrate;
 
-	up_migrate /= NSEC_PER_USEC;
-	up_migrate *= up_down_migrate_scale_factor;
-	up_migrate >>= 10;
-	up_migrate *= NSEC_PER_USEC;
+	*up_migrate /= NSEC_PER_USEC;
+	*up_migrate *= up_down_migrate_scale_factor;
+	*up_migrate >>= 10;
+	*up_migrate *= NSEC_PER_USEC;
 
-	up_migrate = min(up_migrate, sched_ravg_window);
+	*up_migrate = min(*up_migrate, sched_ravg_window);
 
-	down_migrate /= NSEC_PER_USEC;
-	down_migrate *= up_down_migrate_scale_factor;
-	down_migrate >>= 10;
-	down_migrate *= NSEC_PER_USEC;
+	*down_migrate /= NSEC_PER_USEC;
+	*down_migrate *= up_down_migrate_scale_factor;
+	*down_migrate >>= 10;
+	*down_migrate *= NSEC_PER_USEC;
 
-	down_migrate = min(down_migrate, up_migrate - delta);
-done:
+	*down_migrate = min(*down_migrate, *up_migrate - delta);
+}
+
+static void update_up_down_migrate(void)
+{
+	unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
+	unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
+
+	_update_up_down_migrate(&up_migrate, &down_migrate);
 	sched_upmigrate = up_migrate;
 	sched_downmigrate = down_migrate;
+
+	up_migrate = pct_to_real(sysctl_sched_group_upmigrate_pct);
+	down_migrate = pct_to_real(sysctl_sched_group_downmigrate_pct);
+
+	_update_up_down_migrate(&up_migrate, &down_migrate);
+	sched_group_upmigrate = up_migrate;
+	sched_group_downmigrate = down_migrate;
 }
 
 void set_hmp_defaults(void)
@@ -1134,82 +1086,6 @@ u64 cpu_load_sync(int cpu, int sync)
 	return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
 }
 
-static int boost_refcount;
-static DEFINE_SPINLOCK(boost_lock);
-static DEFINE_MUTEX(boost_mutex);
-
-static void boost_kick_cpus(void)
-{
-	int i;
-
-	for_each_online_cpu(i) {
-		if (cpu_capacity(i) != max_capacity)
-			boost_kick(i);
-	}
-}
-
-int sched_boost(void)
-{
-	return boost_refcount > 0;
-}
-
-int sched_set_boost(int enable)
-{
-	unsigned long flags;
-	int ret = 0;
-	int old_refcount;
-
-	if (!sched_enable_hmp)
-		return -EINVAL;
-
-	spin_lock_irqsave(&boost_lock, flags);
-
-	old_refcount = boost_refcount;
-
-	if (enable == 1) {
-		boost_refcount++;
-	} else if (!enable) {
-		if (boost_refcount >= 1)
-			boost_refcount--;
-		else
-			ret = -EINVAL;
-	} else {
-		ret = -EINVAL;
-	}
-
-	if (!old_refcount && boost_refcount)
-		boost_kick_cpus();
-
-	if (boost_refcount <= 1)
-		core_ctl_set_boost(boost_refcount == 1);
-	trace_sched_set_boost(boost_refcount);
-	spin_unlock_irqrestore(&boost_lock, flags);
-
-	return ret;
-}
-
-int sched_boost_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int ret;
-
-	mutex_lock(&boost_mutex);
-	if (!write)
-		sysctl_sched_boost = sched_boost();
-
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (ret || !write)
-		goto done;
-
-	ret = (sysctl_sched_boost <= 1) ?
-		sched_set_boost(sysctl_sched_boost) : -EINVAL;
-
-done:
-	mutex_unlock(&boost_mutex);
-	return ret;
-}
-
 /*
  * Task will fit on a cpu if it's bandwidth consumption on that cpu
  * will be less than sched_upmigrate. A big task that was previously
@@ -1219,60 +1095,63 @@ done:
  * tasks with load close to the upmigrate threshold
  */
 int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
-			      enum sched_boost_type boost_type)
+			      enum sched_boost_policy boost_policy)
 {
-	int upmigrate;
+	int upmigrate = sched_upmigrate;
 
 	if (cpu_capacity(cpu) == max_capacity)
 		return 1;
 
-	if (boost_type != SCHED_BOOST_ON_BIG) {
+	if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
+		upmigrate = sched_downmigrate;
+
+	if (boost_policy != SCHED_BOOST_ON_BIG) {
 		if (task_nice(p) > SCHED_UPMIGRATE_MIN_NICE ||
 		    upmigrate_discouraged(p))
 			return 1;
 
-		upmigrate = sched_upmigrate;
-		if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
-			upmigrate = sched_downmigrate;
-
 		if (task_load < upmigrate)
 			return 1;
+	} else {
+		if (task_sched_boost(p) || task_load >= upmigrate)
+			return 0;
+
+		return 1;
 	}
 
 	return 0;
 }
 
-enum sched_boost_type sched_boost_type(void)
-{
-	if (sched_boost())
-		return sched_boost_policy;
-
-	return SCHED_BOOST_NONE;
-}
-
 int task_will_fit(struct task_struct *p, int cpu)
 {
 	u64 tload = scale_load_to_cpu(task_load(p), cpu);
 
-	return task_load_will_fit(p, tload, cpu, sched_boost_type());
+	return task_load_will_fit(p, tload, cpu, sched_boost_policy());
 }
 
-int group_will_fit(struct sched_cluster *cluster,
-		 struct related_thread_group *grp, u64 demand)
+static int
+group_will_fit(struct sched_cluster *cluster, struct related_thread_group *grp,
+						u64 demand, bool group_boost)
 {
 	int cpu = cluster_first_cpu(cluster);
 	int prev_capacity = 0;
-	unsigned int threshold = sched_upmigrate;
+	unsigned int threshold = sched_group_upmigrate;
 	u64 load;
 
 	if (cluster->capacity == max_capacity)
 		return 1;
 
+	if (group_boost)
+		return 0;
+
+	if (!demand)
+		return 1;
+
 	if (grp->preferred_cluster)
 		prev_capacity = grp->preferred_cluster->capacity;
 
 	if (cluster->capacity < prev_capacity)
-		threshold = sched_downmigrate;
+		threshold = sched_group_downmigrate;
 
 	load = scale_load_to_cpu(demand, cpu);
 	if (load < threshold)
@@ -1495,6 +1374,23 @@ void post_big_task_count_change(const struct cpumask *cpus)
 
 DEFINE_MUTEX(policy_mutex);
 
+unsigned int update_freq_aggregate_threshold(unsigned int threshold)
+{
+	unsigned int old_threshold;
+
+	mutex_lock(&policy_mutex);
+
+	old_threshold = sysctl_sched_freq_aggregate_threshold_pct;
+
+	sysctl_sched_freq_aggregate_threshold_pct = threshold;
+	sched_freq_aggregate_threshold =
+		pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
+
+	mutex_unlock(&policy_mutex);
+
+	return old_threshold;
+}
+
 static inline int invalid_value_freq_input(unsigned int *data)
 {
 	if (data == &sysctl_sched_freq_aggregate)
@@ -1578,7 +1474,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
 	if (write && (old_val == *data))
 		goto done;
 
-	if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) {
+	if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct ||
+				sysctl_sched_group_downmigrate_pct >
+				sysctl_sched_group_upmigrate_pct) {
 		*data = old_val;
 		ret = -EINVAL;
 		goto done;
@@ -3110,37 +3008,9 @@ static void reset_all_task_stats(void)
 {
 	struct task_struct *g, *p;
 
-	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		raw_spin_lock_irq(&p->pi_lock);
 		reset_task_stats(p);
-		raw_spin_unlock_irq(&p->pi_lock);
 	}  while_each_thread(g, p);
-	read_unlock(&tasklist_lock);
-}
-
-static void disable_window_stats(void)
-{
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for_each_possible_cpu(i)
-		raw_spin_lock(&cpu_rq(i)->lock);
-
-	sched_disable_window_stats = 1;
-
-	for_each_possible_cpu(i)
-		raw_spin_unlock(&cpu_rq(i)->lock);
-
-	local_irq_restore(flags);
-}
-
-/* Called with all cpu's rq->lock held */
-static void enable_window_stats(void)
-{
-	sched_disable_window_stats = 0;
-
 }
 
 enum reset_reason_code {
@@ -3166,16 +3036,21 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
 	unsigned int old = 0, new = 0;
 	struct related_thread_group *grp;
 
+	local_irq_save(flags);
+
+	read_lock(&tasklist_lock);
+
 	read_lock(&related_thread_group_lock);
 
-	disable_window_stats();
+	/* Taking all runqueue locks prevents race with sched_exit(). */
+	for_each_possible_cpu(cpu)
+		raw_spin_lock(&cpu_rq(cpu)->lock);
+
+	sched_disable_window_stats = 1;
 
 	reset_all_task_stats();
 
-	local_irq_save(flags);
-
-	for_each_possible_cpu(cpu)
-		raw_spin_lock(&cpu_rq(cpu)->lock);
+	read_unlock(&tasklist_lock);
 
 	list_for_each_entry(grp, &related_thread_groups, list) {
 		int j;
@@ -3196,7 +3071,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
 		sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES;
 	}
 
-	enable_window_stats();
+	sched_disable_window_stats = 0;
 
 	for_each_possible_cpu(cpu) {
 		struct rq *rq = cpu_rq(cpu);
@@ -3239,10 +3114,10 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
 	for_each_possible_cpu(cpu)
 		raw_spin_unlock(&cpu_rq(cpu)->lock);
 
-	local_irq_restore(flags);
-
 	read_unlock(&related_thread_group_lock);
 
+	local_irq_restore(flags);
+
 	trace_sched_reset_all_window_stats(window_start, window_size,
 		sched_ktime_clock() - start_ts, reason, old, new);
 }
@@ -3824,13 +3699,13 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)
 }
 
 /* Return cluster which can offer required capacity for group */
-static struct sched_cluster *
-best_cluster(struct related_thread_group *grp, u64 total_demand)
+static struct sched_cluster *best_cluster(struct related_thread_group *grp,
+					u64 total_demand, bool group_boost)
 {
 	struct sched_cluster *cluster = NULL;
 
 	for_each_sched_cluster(cluster) {
-		if (group_will_fit(cluster, grp, total_demand))
+		if (group_will_fit(cluster, grp, total_demand, group_boost))
 			return cluster;
 	}
 
@@ -3841,6 +3716,9 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
 {
 	struct task_struct *p;
 	u64 combined_demand = 0;
+	bool boost_on_big = sched_boost_policy() == SCHED_BOOST_ON_BIG;
+	bool group_boost = false;
+	u64 wallclock;
 
 	if (!sysctl_sched_enable_colocation) {
 		grp->last_update = sched_ktime_clock();
@@ -3848,31 +3726,43 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
 		return;
 	}
 
+	if (list_empty(&grp->tasks))
+		return;
+
+	wallclock = sched_ktime_clock();
+
 	/*
 	 * wakeup of two or more related tasks could race with each other and
 	 * could result in multiple calls to _set_preferred_cluster being issued
 	 * at same time. Avoid overhead in such cases of rechecking preferred
 	 * cluster
 	 */
-	if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)
+	if (wallclock - grp->last_update < sched_ravg_window / 10)
 		return;
 
-	list_for_each_entry(p, &grp->tasks, grp_list)
+	list_for_each_entry(p, &grp->tasks, grp_list) {
+		if (boost_on_big && task_sched_boost(p)) {
+			group_boost = true;
+			break;
+		}
+
+		if (p->ravg.mark_start < wallclock -
+		    (sched_ravg_window * sched_ravg_hist_size))
+			continue;
+
 		combined_demand += p->ravg.demand;
 
-	grp->preferred_cluster = best_cluster(grp, combined_demand);
+	}
+
+	grp->preferred_cluster = best_cluster(grp,
+			combined_demand, group_boost);
 	grp->last_update = sched_ktime_clock();
 	trace_sched_set_preferred_cluster(grp, combined_demand);
 }
 
 void set_preferred_cluster(struct related_thread_group *grp)
 {
-	/*
-	 * Prevent possible deadlock with update_children(). Not updating
-	 * the preferred cluster once is not a big deal.
-	 */
-	if (!raw_spin_trylock(&grp->lock))
-		return;
+	raw_spin_lock(&grp->lock);
 	_set_preferred_cluster(grp);
 	raw_spin_unlock(&grp->lock);
 }
@@ -3880,6 +3770,8 @@ void set_preferred_cluster(struct related_thread_group *grp)
 #define ADD_TASK	0
 #define REM_TASK	1
 
+#define DEFAULT_CGROUP_COLOC_ID 1
+
 static inline void free_group_cputime(struct related_thread_group *grp)
 {
 	free_percpu(grp->cpu_time);
@@ -4116,64 +4008,19 @@ static void free_related_thread_group(struct rcu_head *rcu)
 	kfree(grp);
 }
 
-/*
- * The thread group for a task can change while we are here. However,
- * add_new_task_to_grp() will take care of any tasks that we miss here.
- * When a parent exits, and a child thread is simultaneously exiting,
- * sched_set_group_id() will synchronize those operations.
- */
-static void update_children(struct task_struct *leader,
-			struct related_thread_group *grp, int event)
-{
-	struct task_struct *child;
-	struct rq *rq;
-	unsigned long flags;
-
-	if (!thread_group_leader(leader))
-		return;
-
-	if (event == ADD_TASK && !sysctl_sched_enable_thread_grouping)
-		return;
-
-	if (thread_group_empty(leader))
-		return;
-
-	child = next_thread(leader);
-
-	do {
-		rq = task_rq_lock(child, &flags);
-
-		if (event == REM_TASK && child->grp && grp == child->grp) {
-			transfer_busy_time(rq, grp, child, event);
-			list_del_init(&child->grp_list);
-			rcu_assign_pointer(child->grp, NULL);
-		} else if (event == ADD_TASK && !child->grp) {
-			transfer_busy_time(rq, grp, child, event);
-			list_add(&child->grp_list, &grp->tasks);
-			rcu_assign_pointer(child->grp, grp);
-		}
-
-		task_rq_unlock(rq, child, &flags);
-	}  while_each_thread(leader, child);
-
-}
-
 static void remove_task_from_group(struct task_struct *p)
 {
 	struct related_thread_group *grp = p->grp;
 	struct rq *rq;
 	int empty_group = 1;
-	unsigned long flags;
 
 	raw_spin_lock(&grp->lock);
 
-	rq = task_rq_lock(p, &flags);
+	rq = __task_rq_lock(p);
 	transfer_busy_time(rq, p->grp, p, REM_TASK);
 	list_del_init(&p->grp_list);
 	rcu_assign_pointer(p->grp, NULL);
-	task_rq_unlock(rq, p, &flags);
-
-	update_children(p, grp, REM_TASK);
+	__task_rq_unlock(rq);
 
 	if (!list_empty(&grp->tasks)) {
 		empty_group = 0;
@@ -4182,7 +4029,8 @@ static void remove_task_from_group(struct task_struct *p)
 
 	raw_spin_unlock(&grp->lock);
 
-	if (empty_group) {
+	/* Reserved groups cannot be destroyed */
+	if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
 		list_del(&grp->list);
 		call_rcu(&grp->rcu, free_related_thread_group);
 	}
@@ -4192,7 +4040,6 @@ static int
 add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
 {
 	struct rq *rq;
-	unsigned long flags;
 
 	raw_spin_lock(&grp->lock);
 
@@ -4200,13 +4047,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
 	 * Change p->grp under rq->lock. Will prevent races with read-side
 	 * reference of p->grp in various hot-paths
 	 */
-	rq = task_rq_lock(p, &flags);
+	rq = __task_rq_lock(p);
 	transfer_busy_time(rq, grp, p, ADD_TASK);
 	list_add(&p->grp_list, &grp->tasks);
 	rcu_assign_pointer(p->grp, grp);
-	task_rq_unlock(rq, p, &flags);
-
-	update_children(p, grp, ADD_TASK);
+	__task_rq_unlock(rq);
 
 	_set_preferred_cluster(grp);
 
@@ -4219,23 +4064,33 @@ void add_new_task_to_grp(struct task_struct *new)
 {
 	unsigned long flags;
 	struct related_thread_group *grp;
-	struct task_struct *parent;
+	struct task_struct *leader = new->group_leader;
+	unsigned int leader_grp_id = sched_get_group_id(leader);
 
-	if (!sysctl_sched_enable_thread_grouping)
+	if (!sysctl_sched_enable_thread_grouping &&
+	    leader_grp_id != DEFAULT_CGROUP_COLOC_ID)
 		return;
 
 	if (thread_group_leader(new))
 		return;
 
-	parent = new->group_leader;
+	if (leader_grp_id == DEFAULT_CGROUP_COLOC_ID) {
+		if (!same_schedtune(new, leader))
+			return;
+	}
 
 	write_lock_irqsave(&related_thread_group_lock, flags);
 
 	rcu_read_lock();
-	grp = task_related_thread_group(parent);
+	grp = task_related_thread_group(leader);
 	rcu_read_unlock();
 
-	/* Its possible that update_children() already added us to the group */
+	/*
+	 * It's possible that someone already added the new task to the
+	 * group. A leader's thread group is updated prior to calling
+	 * this function. It's also possible that the leader has exited
+	 * the group. In either case, there is nothing else to do.
+	 */
 	if (!grp || new->grp) {
 		write_unlock_irqrestore(&related_thread_group_lock, flags);
 		return;
@@ -4250,14 +4105,55 @@ void add_new_task_to_grp(struct task_struct *new)
 	write_unlock_irqrestore(&related_thread_group_lock, flags);
 }
 
+#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
+/*
+ * We create a default colocation group at boot. There is no need to
+ * synchronize tasks between cgroups at creation time because the
+ * correct cgroup hierarchy is not available at boot. Therefore cgroup
+ * colocation is turned off by default even though the colocation group
+ * itself has been allocated. Furthermore this colocation group cannot
+ * be destroyted once it has been created. All of this has been as part
+ * of runtime optimizations.
+ *
+ * The job of synchronizing tasks to the colocation group is done when
+ * the colocation flag in the cgroup is turned on.
+ */
+static int __init create_default_coloc_group(void)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+
+	grp = alloc_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	if (IS_ERR(grp)) {
+		WARN_ON(1);
+		return -ENOMEM;
+	}
+
+	write_lock_irqsave(&related_thread_group_lock, flags);
+	list_add(&grp->list, &related_thread_groups);
+	write_unlock_irqrestore(&related_thread_group_lock, flags);
+
+	update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH);
+	return 0;
+}
+late_initcall(create_default_coloc_group);
+
+int sync_cgroup_colocation(struct task_struct *p, bool insert)
+{
+	unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
+
+	return sched_set_group_id(p, grp_id);
+}
+#endif
+
 int sched_set_group_id(struct task_struct *p, unsigned int group_id)
 {
 	int rc = 0;
 	unsigned long flags;
 	struct related_thread_group *grp = NULL;
 
-	/* Prevents tasks from exiting while we are managing groups. */
-	write_lock_irqsave(&related_thread_group_lock, flags);
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	write_lock(&related_thread_group_lock);
 
 	/* Switching from one group to another directly is not permitted */
 	if ((current != p && p->flags & PF_EXITING) ||
@@ -4272,6 +4168,12 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
 
 	grp = lookup_related_thread_group(group_id);
 	if (!grp) {
+		/* This is a reserved id */
+		if (group_id == DEFAULT_CGROUP_COLOC_ID) {
+			rc = -EINVAL;
+			goto done;
+		}
+
 		grp = alloc_related_thread_group(group_id);
 		if (IS_ERR(grp)) {
 			rc = -ENOMEM;
@@ -4281,10 +4183,10 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
 		list_add(&grp->list, &related_thread_groups);
 	}
 
-	BUG_ON(!grp);
 	rc = add_task_to_group(p, grp);
 done:
-	write_unlock_irqrestore(&related_thread_group_lock, flags);
+	write_unlock(&related_thread_group_lock);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 	return rc;
 }
 
@@ -4529,7 +4431,7 @@ bool early_detection_notify(struct rq *rq, u64 wallclock)
 	struct task_struct *p;
 	int loop_max = 10;
 
-	if (!sched_boost() || !rq->cfs.h_nr_running)
+	if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)
 		return 0;
 
 	rq->ed_task = NULL;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ba4403e910d8..12a04f30ef77 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1677,8 +1677,13 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 	int prev_cpu = task_cpu(task);
 	u64 cpu_load, min_load = ULLONG_MAX;
 	int i;
-	int restrict_cluster = sched_boost() ? 0 :
-				sysctl_sched_restrict_cluster_spill;
+	int restrict_cluster;
+	int boost_on_big;
+
+	boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
+			sched_boost_policy() == SCHED_BOOST_ON_BIG;
+
+	restrict_cluster = sysctl_sched_restrict_cluster_spill;
 
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask))
@@ -1697,6 +1702,9 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 	 */
 
 	for_each_sched_cluster(cluster) {
+		if (boost_on_big && cluster->capacity != max_possible_capacity)
+			continue;
+
 		cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
 		cpumask_andnot(&candidate_mask, &candidate_mask,
 			       cpu_isolated_mask);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4289bf6cd642..30838bb9b442 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1061,8 +1061,6 @@ extern unsigned int max_load_scale_factor;
 extern unsigned int max_possible_capacity;
 extern unsigned int min_max_possible_capacity;
 extern unsigned int max_power_cost;
-extern unsigned int sched_upmigrate;
-extern unsigned int sched_downmigrate;
 extern unsigned int sched_init_task_load_windows;
 extern unsigned int up_down_migrate_scale_factor;
 extern unsigned int sysctl_sched_restrict_cluster_spill;
@@ -1106,18 +1104,23 @@ extern void sched_account_irqstart(int cpu, struct task_struct *curr,
 				   u64 wallclock);
 extern unsigned int cpu_temp(int cpu);
 extern unsigned int nr_eligible_big_tasks(int cpu);
-extern void update_up_down_migrate(void);
 extern int update_preferred_cluster(struct related_thread_group *grp,
 			struct task_struct *p, u32 old_load);
 extern void set_preferred_cluster(struct related_thread_group *grp);
 extern void add_new_task_to_grp(struct task_struct *new);
+extern unsigned int update_freq_aggregate_threshold(unsigned int threshold);
 
-enum sched_boost_type {
+enum sched_boost_policy {
 	SCHED_BOOST_NONE,
 	SCHED_BOOST_ON_BIG,
 	SCHED_BOOST_ON_ALL,
 };
 
+#define NO_BOOST 0
+#define FULL_THROTTLE_BOOST 1
+#define CONSERVATIVE_BOOST 2
+#define RESTRAINED_BOOST 3
+
 static inline struct sched_cluster *cpu_cluster(int cpu)
 {
 	return cpu_rq(cpu)->cluster;
@@ -1387,14 +1390,11 @@ extern void set_hmp_defaults(void);
 extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
 extern unsigned int power_cost(int cpu, u64 demand);
 extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
-extern void boost_kick(int cpu);
 extern int sched_boost(void);
 extern int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
-					enum sched_boost_type boost_type);
-extern enum sched_boost_type sched_boost_type(void);
+					enum sched_boost_policy boost_policy);
+extern enum sched_boost_policy sched_boost_policy(void);
 extern int task_will_fit(struct task_struct *p, int cpu);
-extern int group_will_fit(struct sched_cluster *cluster,
-		 struct related_thread_group *grp, u64 demand);
 extern u64 cpu_load(int cpu);
 extern u64 cpu_load_sync(int cpu, int sync);
 extern int preferred_cluster(struct sched_cluster *cluster,
@@ -1422,10 +1422,32 @@ extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,
 					struct cftype *cft);
 extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
 				struct cftype *cft, u64 upmigrate_discourage);
-extern void sched_hmp_parse_dt(void);
-extern void init_sched_hmp_boost_policy(void);
+extern void sched_boost_parse_dt(void);
 extern void clear_top_tasks_bitmap(unsigned long *bitmap);
 
+#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
+extern bool task_sched_boost(struct task_struct *p);
+extern int sync_cgroup_colocation(struct task_struct *p, bool insert);
+extern bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2);
+extern void update_cgroup_boost_settings(void);
+extern void restore_cgroup_boost_settings(void);
+
+#else
+static inline bool
+same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
+{
+	return true;
+}
+
+static inline bool task_sched_boost(struct task_struct *p)
+{
+	return true;
+}
+
+static inline void update_cgroup_boost_settings(void) { }
+static inline void restore_cgroup_boost_settings(void) { }
+#endif
+
 #else	/* CONFIG_SCHED_HMP */
 
 struct hmp_sched_stats;
@@ -1615,8 +1637,7 @@ static inline void post_big_task_count_change(void) { }
 static inline void set_hmp_defaults(void) { }
 
 static inline void clear_reserved(int cpu) { }
-static inline void sched_hmp_parse_dt(void) {}
-static inline  void init_sched_hmp_boost_policy(void) {}
+static inline void sched_boost_parse_dt(void) {}
 
 #define trace_sched_cpu_load(...)
 #define trace_sched_cpu_load_lb(...)
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 4f8182302e5e..ee2af8e0b5ce 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -25,6 +25,33 @@ struct schedtune {
 	/* Boost value for tasks on that SchedTune CGroup */
 	int boost;
 
+#ifdef CONFIG_SCHED_HMP
+	/* Toggle ability to override sched boost enabled */
+	bool sched_boost_no_override;
+
+	/*
+	 * Controls whether a cgroup is eligible for sched boost or not. This
+	 * can temporariliy be disabled by the kernel based on the no_override
+	 * flag above.
+	 */
+	bool sched_boost_enabled;
+
+	/*
+	 * This tracks the default value of sched_boost_enabled and is used
+	 * restore the value following any temporary changes to that flag.
+	 */
+	bool sched_boost_enabled_backup;
+
+	/*
+	 * Controls whether tasks of this cgroup should be colocated with each
+	 * other and tasks of other cgroups that have the same flag turned on.
+	 */
+	bool colocate;
+
+	/* Controls whether further updates are allowed to the colocate flag */
+	bool colocate_update_disabled;
+#endif
+
 };
 
 static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -54,6 +81,13 @@ static inline struct schedtune *parent_st(struct schedtune *st)
 static struct schedtune
 root_schedtune = {
 	.boost	= 0,
+#ifdef CONFIG_SCHED_HMP
+	.sched_boost_no_override = false,
+	.sched_boost_enabled = true,
+	.sched_boost_enabled_backup = true,
+	.colocate = false,
+	.colocate_update_disabled = false,
+#endif
 };
 
 /*
@@ -97,6 +131,121 @@ struct boost_groups {
 /* Boost groups affecting each CPU in the system */
 DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
 
+#ifdef CONFIG_SCHED_HMP
+static inline void init_sched_boost(struct schedtune *st)
+{
+	st->sched_boost_no_override = false;
+	st->sched_boost_enabled = true;
+	st->sched_boost_enabled_backup = st->sched_boost_enabled;
+	st->colocate = false;
+	st->colocate_update_disabled = false;
+}
+
+bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
+{
+	return task_schedtune(tsk1) == task_schedtune(tsk2);
+}
+
+void update_cgroup_boost_settings(void)
+{
+	int i;
+
+	for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
+		if (!allocated_group[i])
+			break;
+
+		if (allocated_group[i]->sched_boost_no_override)
+			continue;
+
+		allocated_group[i]->sched_boost_enabled = false;
+	}
+}
+
+void restore_cgroup_boost_settings(void)
+{
+	int i;
+
+	for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
+		if (!allocated_group[i])
+			break;
+
+		allocated_group[i]->sched_boost_enabled =
+			allocated_group[i]->sched_boost_enabled_backup;
+	}
+}
+
+bool task_sched_boost(struct task_struct *p)
+{
+	struct schedtune *st = task_schedtune(p);
+
+	return st->sched_boost_enabled;
+}
+
+static u64
+sched_boost_override_read(struct cgroup_subsys_state *css,
+			struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->sched_boost_no_override;
+}
+
+static int sched_boost_override_write(struct cgroup_subsys_state *css,
+			struct cftype *cft, u64 override)
+{
+	struct schedtune *st = css_st(css);
+
+	st->sched_boost_no_override = !!override;
+
+	return 0;
+}
+
+static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
+			struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->sched_boost_enabled;
+}
+
+static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
+			struct cftype *cft, u64 enable)
+{
+	struct schedtune *st = css_st(css);
+
+	st->sched_boost_enabled = !!enable;
+	st->sched_boost_enabled_backup = st->sched_boost_enabled;
+
+	return 0;
+}
+
+static u64 sched_colocate_read(struct cgroup_subsys_state *css,
+			struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->colocate;
+}
+
+static int sched_colocate_write(struct cgroup_subsys_state *css,
+			struct cftype *cft, u64 colocate)
+{
+	struct schedtune *st = css_st(css);
+
+	if (st->colocate_update_disabled)
+		return -EPERM;
+
+	st->colocate = !!colocate;
+	st->colocate_update_disabled = true;
+	return 0;
+}
+
+#else /* CONFIG_SCHED_HMP */
+
+static inline void init_sched_boost(struct schedtune *st) { }
+
+#endif /* CONFIG_SCHED_HMP */
+
 static u64
 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -121,12 +270,45 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
 	return 0;
 }
 
+static void schedtune_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+	struct schedtune *st;
+	bool colocate;
+
+	cgroup_taskset_first(tset, &css);
+	st = css_st(css);
+
+	colocate = st->colocate;
+
+	cgroup_taskset_for_each(task, css, tset)
+		sync_cgroup_colocation(task, colocate);
+}
+
 static struct cftype files[] = {
 	{
 		.name = "boost",
 		.read_u64 = boost_read,
 		.write_u64 = boost_write,
 	},
+#ifdef CONFIG_SCHED_HMP
+	{
+		.name = "sched_boost_no_override",
+		.read_u64 = sched_boost_override_read,
+		.write_u64 = sched_boost_override_write,
+	},
+	{
+		.name = "sched_boost_enabled",
+		.read_u64 = sched_boost_enabled_read,
+		.write_u64 = sched_boost_enabled_write,
+	},
+	{
+		.name = "colocate",
+		.read_u64 = sched_colocate_read,
+		.write_u64 = sched_colocate_write,
+	},
+#endif
 	{ }	/* terminate */
 };
 
@@ -189,6 +371,7 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 
 	/* Initialize per CPUs boost group support */
 	st->idx = idx;
+	init_sched_boost(st);
 	if (schedtune_boostgroup_init(st))
 		goto release;
 
@@ -222,6 +405,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
 	.legacy_cftypes	= files,
 	.early_init	= 1,
 	.allow_attach	= subsys_cgroup_allow_attach,
+	.attach		= schedtune_attach,
 };
 
 #endif /* CONFIG_CGROUP_SCHEDTUNE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 574316f1c344..b7cbd7940f7b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -124,6 +124,7 @@ static int __maybe_unused neg_one = -1;
 static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
 static int __maybe_unused four = 4;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
@@ -376,6 +377,22 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.procname	= "sched_group_upmigrate",
+		.data		= &sysctl_sched_group_upmigrate_pct,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_hmp_proc_update_handler,
+		.extra1		= &zero,
+	},
+	{
+		.procname	= "sched_group_downmigrate",
+		.data		= &sysctl_sched_group_downmigrate_pct,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_hmp_proc_update_handler,
+		.extra1		= &zero,
+	},
 	{
 		.procname	= "sched_init_task_load",
 		.data		= &sysctl_sched_init_task_load_pct,
@@ -487,6 +504,8 @@ static struct ctl_table kern_table[] = {
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= sched_boost_handler,
+		.extra1         = &zero,
+		.extra2		= &three,
 	},
 #endif	/* CONFIG_SCHED_HMP */
 #ifdef CONFIG_SCHED_DEBUG