Merge "sched: Avoid waking idle cpu for short-burst tasks"
This commit is contained in:
commit
e26b0777dc
7 changed files with 93 additions and 9 deletions
|
@ -726,6 +726,16 @@ d. /proc/sys/kernel/sched_select_prev_cpu_us
|
|||
Default value of sched_select_prev_cpu_us is 2000 (2ms). This can be
|
||||
turned off by setting it to 0.
|
||||
|
||||
e. /proc/sys/kernel/sched_short_burst_ns
|
||||
This threshold controls whether a task is considered as "short-burst"
|
||||
or not. "short-burst" tasks are eligible for packing to avoid overhead
|
||||
associated with waking up an idle CPU. "non-idle" CPUs which are not
|
||||
loaded with IRQs and can accommodate the waking task without exceeding
|
||||
spill limits are considered. The ties are broken with load followed
|
||||
by previous CPU. This tunable does not affect cluster selection.
|
||||
It only affects CPU selection in a given cluster. This packing is
|
||||
skipped for tasks that are eligible for "wake-up-idle" and "boost".
|
||||
|
||||
**** 5.2.4 Wakeup Logic for Task "p"
|
||||
|
||||
Wakeup task placement logic is as follows:
|
||||
|
|
|
@ -72,6 +72,7 @@ extern unsigned int sysctl_sched_freq_aggregate;
|
|||
extern unsigned int sysctl_sched_enable_thread_grouping;
|
||||
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
|
||||
extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
|
||||
extern unsigned int sysctl_sched_short_burst;
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
|
|
|
@ -2613,6 +2613,7 @@ static u32 __compute_runnable_contrib(u64 n)
|
|||
#define SBC_FLAG_CSTATE_LOAD 0x100
|
||||
#define SBC_FLAG_BEST_SIBLING 0x200
|
||||
#define SBC_FLAG_WAKER_CPU 0x400
|
||||
#define SBC_FLAG_PACK_TASK 0x800
|
||||
|
||||
/* Cluster selection flag */
|
||||
#define SBC_FLAG_COLOC_CLUSTER 0x10000
|
||||
|
@ -2629,6 +2630,7 @@ struct cpu_select_env {
|
|||
u8 sync:1;
|
||||
u8 ignore_prev_cpu:1;
|
||||
enum sched_boost_policy boost_policy;
|
||||
u8 pack_task:1;
|
||||
int prev_cpu;
|
||||
DECLARE_BITMAP(candidate_list, NR_CPUS);
|
||||
DECLARE_BITMAP(backup_list, NR_CPUS);
|
||||
|
@ -2980,8 +2982,17 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
|
|||
{
|
||||
int cpu_cost;
|
||||
|
||||
cpu_cost = power_cost(cpu, task_load(env->p) +
|
||||
/*
|
||||
* We try to find the least loaded *busy* CPU irrespective
|
||||
* of the power cost.
|
||||
*/
|
||||
if (env->pack_task)
|
||||
cpu_cost = cpu_min_power_cost(cpu);
|
||||
|
||||
else
|
||||
cpu_cost = power_cost(cpu, task_load(env->p) +
|
||||
cpu_cravg_sync(cpu, env->sync));
|
||||
|
||||
if (cpu_cost <= stats->min_cost)
|
||||
__update_cluster_stats(cpu, stats, env, cpu_cost);
|
||||
}
|
||||
|
@ -3056,6 +3067,15 @@ static inline int wake_to_idle(struct task_struct *p)
|
|||
(p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
|
||||
}
|
||||
|
||||
static inline bool env_has_special_flags(struct cpu_select_env *env)
|
||||
{
|
||||
if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
|
||||
env->reason)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
||||
{
|
||||
|
@ -3063,9 +3083,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
|||
struct task_struct *task = env->p;
|
||||
struct sched_cluster *cluster;
|
||||
|
||||
if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
|
||||
!task->ravg.mark_start ||
|
||||
env->need_idle || !sched_short_sleep_task_threshold)
|
||||
if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
|
||||
return false;
|
||||
|
||||
prev_cpu = env->prev_cpu;
|
||||
|
@ -3114,8 +3132,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
|||
static inline bool
|
||||
wake_to_waker_cluster(struct cpu_select_env *env)
|
||||
{
|
||||
return env->boost_policy == SCHED_BOOST_NONE &&
|
||||
!env->need_idle && !env->reason && env->sync &&
|
||||
return env->sync &&
|
||||
task_load(current) > sched_big_waker_task_load &&
|
||||
task_load(env->p) < sched_small_wakee_task_load;
|
||||
}
|
||||
|
@ -3140,7 +3157,6 @@ cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
|
|||
return !cpumask_empty(&tmp_mask);
|
||||
}
|
||||
|
||||
|
||||
/* return cheapest cpu that can fit this task */
|
||||
static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||
int sync)
|
||||
|
@ -3150,6 +3166,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
struct related_thread_group *grp;
|
||||
unsigned int sbc_flag = 0;
|
||||
int cpu = raw_smp_processor_id();
|
||||
bool special;
|
||||
|
||||
struct cpu_select_env env = {
|
||||
.p = p,
|
||||
|
@ -3162,6 +3179,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
.rtg = NULL,
|
||||
.sbc_best_flag = 0,
|
||||
.sbc_best_cluster_flag = 0,
|
||||
.pack_task = false,
|
||||
};
|
||||
|
||||
env.boost_policy = task_sched_boost(p) ?
|
||||
|
@ -3171,6 +3189,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
bitmap_zero(env.backup_list, NR_CPUS);
|
||||
|
||||
init_cluster_cpu_stats(&stats);
|
||||
special = env_has_special_flags(&env);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
|
@ -3182,7 +3201,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
clear_bit(pref_cluster->id, env.candidate_list);
|
||||
else
|
||||
env.rtg = grp;
|
||||
} else {
|
||||
} else if (!special) {
|
||||
cluster = cpu_rq(cpu)->cluster;
|
||||
if (wake_to_waker_cluster(&env)) {
|
||||
if (bias_to_waker_cpu(p, cpu)) {
|
||||
|
@ -3203,6 +3222,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
}
|
||||
}
|
||||
|
||||
if (!special && is_short_burst_task(p)) {
|
||||
env.pack_task = true;
|
||||
sbc_flag = SBC_FLAG_PACK_TASK;
|
||||
}
|
||||
retry:
|
||||
cluster = select_least_power_cluster(&env);
|
||||
|
||||
|
|
|
@ -961,6 +961,13 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
|
|||
|
||||
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
|
||||
|
||||
/*
|
||||
* Scheduler tries to avoid waking up idle CPUs for tasks running
|
||||
* in short bursts. If the task average burst is less than
|
||||
* sysctl_sched_short_burst nanoseconds, it is eligible for packing.
|
||||
*/
|
||||
unsigned int __read_mostly sysctl_sched_short_burst;
|
||||
|
||||
static void
|
||||
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
|
||||
{
|
||||
|
@ -1553,7 +1560,13 @@ void init_new_task_load(struct task_struct *p, bool idle_task)
|
|||
memset(&p->ravg, 0, sizeof(struct ravg));
|
||||
p->cpu_cycles = 0;
|
||||
p->ravg.curr_burst = 0;
|
||||
p->ravg.avg_burst = 0;
|
||||
/*
|
||||
* Initialize the avg_burst to twice the threshold, so that
|
||||
* a task would not be classified as short burst right away
|
||||
* after fork. It takes at least 6 sleep-wakeup cycles for
|
||||
* the avg_burst to go below the threshold.
|
||||
*/
|
||||
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
|
||||
|
||||
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
||||
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
||||
|
@ -2987,6 +3000,8 @@ void reset_task_stats(struct task_struct *p)
|
|||
p->ravg.curr_window_cpu = curr_window_ptr;
|
||||
p->ravg.prev_window_cpu = prev_window_ptr;
|
||||
|
||||
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
|
||||
|
||||
/* Retain EXITING_TASK marker */
|
||||
p->ravg.sum_history[0] = sum;
|
||||
}
|
||||
|
|
|
@ -1723,6 +1723,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
int i;
|
||||
int restrict_cluster;
|
||||
int boost_on_big;
|
||||
int pack_task, wakeup_latency, least_wakeup_latency = INT_MAX;
|
||||
|
||||
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
|
||||
sched_boost_policy() == SCHED_BOOST_ON_BIG;
|
||||
|
@ -1739,6 +1740,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
||||
return best_cpu; /* No targets found */
|
||||
|
||||
pack_task = is_short_burst_task(task);
|
||||
|
||||
/*
|
||||
* At this point we have built a mask of cpus representing the
|
||||
* lowest priority tasks in the system. Now we want to elect
|
||||
|
@ -1764,6 +1767,20 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
if (!restrict_cluster)
|
||||
cpu_load = scale_load_to_cpu(cpu_load, i);
|
||||
|
||||
if (pack_task) {
|
||||
wakeup_latency = cpu_rq(i)->wakeup_latency;
|
||||
|
||||
if (wakeup_latency > least_wakeup_latency)
|
||||
continue;
|
||||
|
||||
if (wakeup_latency < least_wakeup_latency) {
|
||||
least_wakeup_latency = wakeup_latency;
|
||||
min_load = cpu_load;
|
||||
best_cpu = i;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_load < min_load ||
|
||||
(cpu_load == min_load &&
|
||||
(i == prev_cpu || (best_cpu != prev_cpu &&
|
||||
|
@ -1772,6 +1789,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
best_cpu = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (restrict_cluster && best_cpu != -1)
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1216,6 +1216,11 @@ static inline int cpu_max_power_cost(int cpu)
|
|||
return cpu_rq(cpu)->cluster->max_power_cost;
|
||||
}
|
||||
|
||||
static inline int cpu_min_power_cost(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->min_power_cost;
|
||||
}
|
||||
|
||||
static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
|
||||
{
|
||||
return div64_u64(cycles, period);
|
||||
|
@ -1413,6 +1418,11 @@ static inline u64 cpu_cravg_sync(int cpu, int sync)
|
|||
return load;
|
||||
}
|
||||
|
||||
static inline bool is_short_burst_task(struct task_struct *p)
|
||||
{
|
||||
return p->ravg.avg_burst < sysctl_sched_short_burst;
|
||||
}
|
||||
|
||||
extern void check_for_migration(struct rq *rq, struct task_struct *p);
|
||||
extern void pre_big_task_count_change(const struct cpumask *cpus);
|
||||
extern void post_big_task_count_change(const struct cpumask *cpus);
|
||||
|
|
|
@ -507,6 +507,13 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &zero,
|
||||
.extra2 = &three,
|
||||
},
|
||||
{
|
||||
.procname = "sched_short_burst_ns",
|
||||
.data = &sysctl_sched_short_burst,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif /* CONFIG_SCHED_HMP */
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue