diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt
index 091d49ea80cf..766c01d321b5 100644
--- a/Documentation/scheduler/sched-hmp.txt
+++ b/Documentation/scheduler/sched-hmp.txt
@@ -726,6 +726,16 @@ d. /proc/sys/kernel/sched_select_prev_cpu_us
 	Default value of sched_select_prev_cpu_us is 2000 (2ms).  This can be
 	turned off by setting it to 0.
 
+e. /proc/sys/kernel/sched_short_burst_ns
+	This threshold controls whether a task is considered as "short-burst"
+	or not. "short-burst" tasks are eligible for packing to avoid overhead
+	associated with waking up an idle CPU. "non-idle" CPUs which are not
+	loaded with IRQs and can accommodate the waking task without exceeding
+	spill limits are considered. The ties are broken with load followed
+	by previous CPU. This tunable does not affect cluster selection.
+	It only affects CPU selection in a given cluster. This packing is
+	skipped for tasks that are eligible for "wake-up-idle" and "boost".
+
 **** 5.2.4 Wakeup Logic for Task "p"
 
 Wakeup task placement logic is as follows:
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 5d0899df64ff..e4aff5e6e17f 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -68,6 +68,7 @@ extern unsigned int sysctl_sched_freq_aggregate;
 extern unsigned int sysctl_sched_enable_thread_grouping;
 extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
 extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
+extern unsigned int sysctl_sched_short_burst;
 
 #else /* CONFIG_SCHED_HMP */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3db77aff2433..95b961dc7b14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2591,6 +2591,7 @@ static u32 __compute_runnable_contrib(u64 n)
 #define SBC_FLAG_CSTATE_LOAD				0x100
 #define SBC_FLAG_BEST_SIBLING				0x200
 #define SBC_FLAG_WAKER_CPU				0x400
+#define SBC_FLAG_PACK_TASK				0x800
 
 /* Cluster selection flag */
 #define SBC_FLAG_COLOC_CLUSTER				0x10000
@@ -2607,6 +2608,7 @@ struct cpu_select_env {
 	u8 sync:1;
 	u8 ignore_prev_cpu:1;
 	enum sched_boost_policy boost_policy;
+	u8 pack_task:1;
 	int prev_cpu;
 	DECLARE_BITMAP(candidate_list, NR_CPUS);
 	DECLARE_BITMAP(backup_list, NR_CPUS);
@@ -2958,8 +2960,17 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
 {
 	int cpu_cost;
 
-	cpu_cost = power_cost(cpu, task_load(env->p) +
+	/*
+	 * We try to find the least loaded *busy* CPU irrespective
+	 * of the power cost.
+	 */
+	if (env->pack_task)
+		cpu_cost = cpu_min_power_cost(cpu);
+
+	else
+		cpu_cost = power_cost(cpu, task_load(env->p) +
 				cpu_cravg_sync(cpu, env->sync));
+
 	if (cpu_cost <= stats->min_cost)
 		__update_cluster_stats(cpu, stats, env, cpu_cost);
 }
@@ -3034,6 +3045,15 @@ static inline int wake_to_idle(struct task_struct *p)
 		 (p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
 }
 
+static inline bool env_has_special_flags(struct cpu_select_env *env)
+{
+	if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
+	    env->reason)
+		return true;
+
+	return false;
+}
+
 static inline bool
 bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 {
@@ -3041,9 +3061,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 	struct task_struct *task = env->p;
 	struct sched_cluster *cluster;
 
-	if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
-	    !task->ravg.mark_start ||
-	    env->need_idle || !sched_short_sleep_task_threshold)
+	if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
 		return false;
 
 	prev_cpu = env->prev_cpu;
@@ -3092,8 +3110,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
 static inline bool
 wake_to_waker_cluster(struct cpu_select_env *env)
 {
-	return env->boost_policy == SCHED_BOOST_NONE &&
-	       !env->need_idle && !env->reason && env->sync &&
+	return env->sync &&
 	       task_load(current) > sched_big_waker_task_load &&
 	       task_load(env->p) < sched_small_wakee_task_load;
 }
@@ -3118,7 +3135,6 @@ cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
 	return !cpumask_empty(&tmp_mask);
 }
 
-
 /* return cheapest cpu that can fit this task */
 static int select_best_cpu(struct task_struct *p, int target, int reason,
 			   int sync)
@@ -3128,6 +3144,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 	struct related_thread_group *grp;
 	unsigned int sbc_flag = 0;
 	int cpu = raw_smp_processor_id();
+	bool special;
 
 	struct cpu_select_env env = {
 		.p			= p,
@@ -3140,6 +3157,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		.rtg			= NULL,
 		.sbc_best_flag		= 0,
 		.sbc_best_cluster_flag	= 0,
+		.pack_task              = false,
 	};
 
 	env.boost_policy = task_sched_boost(p) ?
@@ -3149,6 +3167,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 	bitmap_zero(env.backup_list, NR_CPUS);
 
 	init_cluster_cpu_stats(&stats);
+	special = env_has_special_flags(&env);
 
 	rcu_read_lock();
 
@@ -3160,7 +3179,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 			clear_bit(pref_cluster->id, env.candidate_list);
 		else
 			env.rtg = grp;
-	} else {
+	} else if (!special) {
 		cluster = cpu_rq(cpu)->cluster;
 		if (wake_to_waker_cluster(&env)) {
 			if (bias_to_waker_cpu(p, cpu)) {
@@ -3181,6 +3200,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		}
 	}
 
+	if (!special && is_short_burst_task(p)) {
+		env.pack_task = true;
+		sbc_flag = SBC_FLAG_PACK_TASK;
+	}
 retry:
 	cluster = select_least_power_cluster(&env);
 
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index b2f3013bfe31..95e618ee1124 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -961,6 +961,13 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
 
 unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
 
+/*
+ * Scheduler tries to avoid waking up idle CPUs for tasks running
+ * in short bursts. If the task average burst is less than
+ * sysctl_sched_short_burst nanoseconds, it is eligible for packing.
+ */
+unsigned int __read_mostly sysctl_sched_short_burst;
+
 static void
 _update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
 {
@@ -1553,7 +1560,13 @@ void init_new_task_load(struct task_struct *p, bool idle_task)
 	memset(&p->ravg, 0, sizeof(struct ravg));
 	p->cpu_cycles = 0;
 	p->ravg.curr_burst = 0;
-	p->ravg.avg_burst = 0;
+	/*
+	 * Initialize the avg_burst to twice the threshold, so that
+	 * a task would not be classified as short burst right away
+	 * after fork. It takes at least 6 sleep-wakeup cycles for
+	 * the avg_burst to go below the threshold.
+	 */
+	p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
 
 	p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
 	p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
@@ -2987,6 +3000,8 @@ void reset_task_stats(struct task_struct *p)
 	p->ravg.curr_window_cpu = curr_window_ptr;
 	p->ravg.prev_window_cpu = prev_window_ptr;
 
+	p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
+
 	/* Retain EXITING_TASK marker */
 	p->ravg.sum_history[0] = sum;
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 52edd6b158ed..624bededfb85 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1679,6 +1679,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 	int i;
 	int restrict_cluster;
 	int boost_on_big;
+	int pack_task, wakeup_latency, least_wakeup_latency = INT_MAX;
 
 	boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
 			sched_boost_policy() == SCHED_BOOST_ON_BIG;
@@ -1695,6 +1696,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
 		return best_cpu; /* No targets found */
 
+	pack_task = is_short_burst_task(task);
+
 	/*
 	 * At this point we have built a mask of cpus representing the
 	 * lowest priority tasks in the system.  Now we want to elect
@@ -1720,6 +1723,20 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 			if (!restrict_cluster)
 				cpu_load = scale_load_to_cpu(cpu_load, i);
 
+			if (pack_task) {
+				wakeup_latency = cpu_rq(i)->wakeup_latency;
+
+				if (wakeup_latency > least_wakeup_latency)
+					continue;
+
+				if (wakeup_latency < least_wakeup_latency) {
+					least_wakeup_latency = wakeup_latency;
+					min_load = cpu_load;
+					best_cpu = i;
+					continue;
+				}
+			}
+
 			if (cpu_load < min_load ||
 				(cpu_load == min_load &&
 				(i == prev_cpu || (best_cpu != prev_cpu &&
@@ -1728,6 +1745,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 				best_cpu = i;
 			}
 		}
+
 		if (restrict_cluster && best_cpu != -1)
 			break;
 	}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6b9f11d9a47c..b9a109e5ef94 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1188,6 +1188,11 @@ static inline int cpu_max_power_cost(int cpu)
 	return cpu_rq(cpu)->cluster->max_power_cost;
 }
 
+static inline int cpu_min_power_cost(int cpu)
+{
+	return cpu_rq(cpu)->cluster->min_power_cost;
+}
+
 static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
 {
 	return div64_u64(cycles, period);
@@ -1385,6 +1390,11 @@ static inline u64 cpu_cravg_sync(int cpu, int sync)
 	return load;
 }
 
+static inline bool is_short_burst_task(struct task_struct *p)
+{
+	return p->ravg.avg_burst < sysctl_sched_short_burst;
+}
+
 extern void check_for_migration(struct rq *rq, struct task_struct *p);
 extern void pre_big_task_count_change(const struct cpumask *cpus);
 extern void post_big_task_count_change(const struct cpumask *cpus);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b7cbd7940f7b..d4682d0cdeb1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -507,6 +507,13 @@ static struct ctl_table kern_table[] = {
 		.extra1         = &zero,
 		.extra2		= &three,
 	},
+	{
+		.procname	= "sched_short_burst_ns",
+		.data		= &sysctl_sched_short_burst,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #endif	/* CONFIG_SCHED_HMP */
 #ifdef CONFIG_SCHED_DEBUG
 	{