Merge "sched: Avoid waking idle cpu for short-burst tasks"
This commit is contained in:
commit
e26b0777dc
7 changed files with 93 additions and 9 deletions
|
@ -726,6 +726,16 @@ d. /proc/sys/kernel/sched_select_prev_cpu_us
|
||||||
Default value of sched_select_prev_cpu_us is 2000 (2ms). This can be
|
Default value of sched_select_prev_cpu_us is 2000 (2ms). This can be
|
||||||
turned off by setting it to 0.
|
turned off by setting it to 0.
|
||||||
|
|
||||||
|
e. /proc/sys/kernel/sched_short_burst_ns
|
||||||
|
This threshold controls whether a task is considered as "short-burst"
|
||||||
|
or not. "short-burst" tasks are eligible for packing to avoid overhead
|
||||||
|
associated with waking up an idle CPU. "non-idle" CPUs which are not
|
||||||
|
loaded with IRQs and can accommodate the waking task without exceeding
|
||||||
|
spill limits are considered. The ties are broken with load followed
|
||||||
|
by previous CPU. This tunable does not affect cluster selection.
|
||||||
|
It only affects CPU selection in a given cluster. This packing is
|
||||||
|
skipped for tasks that are eligible for "wake-up-idle" and "boost".
|
||||||
|
|
||||||
**** 5.2.4 Wakeup Logic for Task "p"
|
**** 5.2.4 Wakeup Logic for Task "p"
|
||||||
|
|
||||||
Wakeup task placement logic is as follows:
|
Wakeup task placement logic is as follows:
|
||||||
|
|
|
@ -72,6 +72,7 @@ extern unsigned int sysctl_sched_freq_aggregate;
|
||||||
extern unsigned int sysctl_sched_enable_thread_grouping;
|
extern unsigned int sysctl_sched_enable_thread_grouping;
|
||||||
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
|
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
|
||||||
extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
|
extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
|
||||||
|
extern unsigned int sysctl_sched_short_burst;
|
||||||
|
|
||||||
#else /* CONFIG_SCHED_HMP */
|
#else /* CONFIG_SCHED_HMP */
|
||||||
|
|
||||||
|
|
|
@ -2613,6 +2613,7 @@ static u32 __compute_runnable_contrib(u64 n)
|
||||||
#define SBC_FLAG_CSTATE_LOAD 0x100
|
#define SBC_FLAG_CSTATE_LOAD 0x100
|
||||||
#define SBC_FLAG_BEST_SIBLING 0x200
|
#define SBC_FLAG_BEST_SIBLING 0x200
|
||||||
#define SBC_FLAG_WAKER_CPU 0x400
|
#define SBC_FLAG_WAKER_CPU 0x400
|
||||||
|
#define SBC_FLAG_PACK_TASK 0x800
|
||||||
|
|
||||||
/* Cluster selection flag */
|
/* Cluster selection flag */
|
||||||
#define SBC_FLAG_COLOC_CLUSTER 0x10000
|
#define SBC_FLAG_COLOC_CLUSTER 0x10000
|
||||||
|
@ -2629,6 +2630,7 @@ struct cpu_select_env {
|
||||||
u8 sync:1;
|
u8 sync:1;
|
||||||
u8 ignore_prev_cpu:1;
|
u8 ignore_prev_cpu:1;
|
||||||
enum sched_boost_policy boost_policy;
|
enum sched_boost_policy boost_policy;
|
||||||
|
u8 pack_task:1;
|
||||||
int prev_cpu;
|
int prev_cpu;
|
||||||
DECLARE_BITMAP(candidate_list, NR_CPUS);
|
DECLARE_BITMAP(candidate_list, NR_CPUS);
|
||||||
DECLARE_BITMAP(backup_list, NR_CPUS);
|
DECLARE_BITMAP(backup_list, NR_CPUS);
|
||||||
|
@ -2980,8 +2982,17 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
|
||||||
{
|
{
|
||||||
int cpu_cost;
|
int cpu_cost;
|
||||||
|
|
||||||
cpu_cost = power_cost(cpu, task_load(env->p) +
|
/*
|
||||||
|
* We try to find the least loaded *busy* CPU irrespective
|
||||||
|
* of the power cost.
|
||||||
|
*/
|
||||||
|
if (env->pack_task)
|
||||||
|
cpu_cost = cpu_min_power_cost(cpu);
|
||||||
|
|
||||||
|
else
|
||||||
|
cpu_cost = power_cost(cpu, task_load(env->p) +
|
||||||
cpu_cravg_sync(cpu, env->sync));
|
cpu_cravg_sync(cpu, env->sync));
|
||||||
|
|
||||||
if (cpu_cost <= stats->min_cost)
|
if (cpu_cost <= stats->min_cost)
|
||||||
__update_cluster_stats(cpu, stats, env, cpu_cost);
|
__update_cluster_stats(cpu, stats, env, cpu_cost);
|
||||||
}
|
}
|
||||||
|
@ -3056,6 +3067,15 @@ static inline int wake_to_idle(struct task_struct *p)
|
||||||
(p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
|
(p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool env_has_special_flags(struct cpu_select_env *env)
|
||||||
|
{
|
||||||
|
if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
|
||||||
|
env->reason)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
||||||
{
|
{
|
||||||
|
@ -3063,9 +3083,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
||||||
struct task_struct *task = env->p;
|
struct task_struct *task = env->p;
|
||||||
struct sched_cluster *cluster;
|
struct sched_cluster *cluster;
|
||||||
|
|
||||||
if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
|
if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
|
||||||
!task->ravg.mark_start ||
|
|
||||||
env->need_idle || !sched_short_sleep_task_threshold)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
prev_cpu = env->prev_cpu;
|
prev_cpu = env->prev_cpu;
|
||||||
|
@ -3114,8 +3132,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
||||||
static inline bool
|
static inline bool
|
||||||
wake_to_waker_cluster(struct cpu_select_env *env)
|
wake_to_waker_cluster(struct cpu_select_env *env)
|
||||||
{
|
{
|
||||||
return env->boost_policy == SCHED_BOOST_NONE &&
|
return env->sync &&
|
||||||
!env->need_idle && !env->reason && env->sync &&
|
|
||||||
task_load(current) > sched_big_waker_task_load &&
|
task_load(current) > sched_big_waker_task_load &&
|
||||||
task_load(env->p) < sched_small_wakee_task_load;
|
task_load(env->p) < sched_small_wakee_task_load;
|
||||||
}
|
}
|
||||||
|
@ -3140,7 +3157,6 @@ cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
|
||||||
return !cpumask_empty(&tmp_mask);
|
return !cpumask_empty(&tmp_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* return cheapest cpu that can fit this task */
|
/* return cheapest cpu that can fit this task */
|
||||||
static int select_best_cpu(struct task_struct *p, int target, int reason,
|
static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
int sync)
|
int sync)
|
||||||
|
@ -3150,6 +3166,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
struct related_thread_group *grp;
|
struct related_thread_group *grp;
|
||||||
unsigned int sbc_flag = 0;
|
unsigned int sbc_flag = 0;
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
|
bool special;
|
||||||
|
|
||||||
struct cpu_select_env env = {
|
struct cpu_select_env env = {
|
||||||
.p = p,
|
.p = p,
|
||||||
|
@ -3162,6 +3179,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
.rtg = NULL,
|
.rtg = NULL,
|
||||||
.sbc_best_flag = 0,
|
.sbc_best_flag = 0,
|
||||||
.sbc_best_cluster_flag = 0,
|
.sbc_best_cluster_flag = 0,
|
||||||
|
.pack_task = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
env.boost_policy = task_sched_boost(p) ?
|
env.boost_policy = task_sched_boost(p) ?
|
||||||
|
@ -3171,6 +3189,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
bitmap_zero(env.backup_list, NR_CPUS);
|
bitmap_zero(env.backup_list, NR_CPUS);
|
||||||
|
|
||||||
init_cluster_cpu_stats(&stats);
|
init_cluster_cpu_stats(&stats);
|
||||||
|
special = env_has_special_flags(&env);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
|
@ -3182,7 +3201,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
clear_bit(pref_cluster->id, env.candidate_list);
|
clear_bit(pref_cluster->id, env.candidate_list);
|
||||||
else
|
else
|
||||||
env.rtg = grp;
|
env.rtg = grp;
|
||||||
} else {
|
} else if (!special) {
|
||||||
cluster = cpu_rq(cpu)->cluster;
|
cluster = cpu_rq(cpu)->cluster;
|
||||||
if (wake_to_waker_cluster(&env)) {
|
if (wake_to_waker_cluster(&env)) {
|
||||||
if (bias_to_waker_cpu(p, cpu)) {
|
if (bias_to_waker_cpu(p, cpu)) {
|
||||||
|
@ -3203,6 +3222,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!special && is_short_burst_task(p)) {
|
||||||
|
env.pack_task = true;
|
||||||
|
sbc_flag = SBC_FLAG_PACK_TASK;
|
||||||
|
}
|
||||||
retry:
|
retry:
|
||||||
cluster = select_least_power_cluster(&env);
|
cluster = select_least_power_cluster(&env);
|
||||||
|
|
||||||
|
|
|
@ -961,6 +961,13 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
|
||||||
|
|
||||||
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
|
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scheduler tries to avoid waking up idle CPUs for tasks running
|
||||||
|
* in short bursts. If the task average burst is less than
|
||||||
|
* sysctl_sched_short_burst nanoseconds, it is eligible for packing.
|
||||||
|
*/
|
||||||
|
unsigned int __read_mostly sysctl_sched_short_burst;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
|
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
|
||||||
{
|
{
|
||||||
|
@ -1553,7 +1560,13 @@ void init_new_task_load(struct task_struct *p, bool idle_task)
|
||||||
memset(&p->ravg, 0, sizeof(struct ravg));
|
memset(&p->ravg, 0, sizeof(struct ravg));
|
||||||
p->cpu_cycles = 0;
|
p->cpu_cycles = 0;
|
||||||
p->ravg.curr_burst = 0;
|
p->ravg.curr_burst = 0;
|
||||||
p->ravg.avg_burst = 0;
|
/*
|
||||||
|
* Initialize the avg_burst to twice the threshold, so that
|
||||||
|
* a task would not be classified as short burst right away
|
||||||
|
* after fork. It takes at least 6 sleep-wakeup cycles for
|
||||||
|
* the avg_burst to go below the threshold.
|
||||||
|
*/
|
||||||
|
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
|
||||||
|
|
||||||
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
||||||
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
|
||||||
|
@ -2987,6 +3000,8 @@ void reset_task_stats(struct task_struct *p)
|
||||||
p->ravg.curr_window_cpu = curr_window_ptr;
|
p->ravg.curr_window_cpu = curr_window_ptr;
|
||||||
p->ravg.prev_window_cpu = prev_window_ptr;
|
p->ravg.prev_window_cpu = prev_window_ptr;
|
||||||
|
|
||||||
|
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
|
||||||
|
|
||||||
/* Retain EXITING_TASK marker */
|
/* Retain EXITING_TASK marker */
|
||||||
p->ravg.sum_history[0] = sum;
|
p->ravg.sum_history[0] = sum;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1723,6 +1723,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
||||||
int i;
|
int i;
|
||||||
int restrict_cluster;
|
int restrict_cluster;
|
||||||
int boost_on_big;
|
int boost_on_big;
|
||||||
|
int pack_task, wakeup_latency, least_wakeup_latency = INT_MAX;
|
||||||
|
|
||||||
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
|
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
|
||||||
sched_boost_policy() == SCHED_BOOST_ON_BIG;
|
sched_boost_policy() == SCHED_BOOST_ON_BIG;
|
||||||
|
@ -1739,6 +1740,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
||||||
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
||||||
return best_cpu; /* No targets found */
|
return best_cpu; /* No targets found */
|
||||||
|
|
||||||
|
pack_task = is_short_burst_task(task);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point we have built a mask of cpus representing the
|
* At this point we have built a mask of cpus representing the
|
||||||
* lowest priority tasks in the system. Now we want to elect
|
* lowest priority tasks in the system. Now we want to elect
|
||||||
|
@ -1764,6 +1767,20 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
||||||
if (!restrict_cluster)
|
if (!restrict_cluster)
|
||||||
cpu_load = scale_load_to_cpu(cpu_load, i);
|
cpu_load = scale_load_to_cpu(cpu_load, i);
|
||||||
|
|
||||||
|
if (pack_task) {
|
||||||
|
wakeup_latency = cpu_rq(i)->wakeup_latency;
|
||||||
|
|
||||||
|
if (wakeup_latency > least_wakeup_latency)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (wakeup_latency < least_wakeup_latency) {
|
||||||
|
least_wakeup_latency = wakeup_latency;
|
||||||
|
min_load = cpu_load;
|
||||||
|
best_cpu = i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (cpu_load < min_load ||
|
if (cpu_load < min_load ||
|
||||||
(cpu_load == min_load &&
|
(cpu_load == min_load &&
|
||||||
(i == prev_cpu || (best_cpu != prev_cpu &&
|
(i == prev_cpu || (best_cpu != prev_cpu &&
|
||||||
|
@ -1772,6 +1789,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
||||||
best_cpu = i;
|
best_cpu = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (restrict_cluster && best_cpu != -1)
|
if (restrict_cluster && best_cpu != -1)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1216,6 +1216,11 @@ static inline int cpu_max_power_cost(int cpu)
|
||||||
return cpu_rq(cpu)->cluster->max_power_cost;
|
return cpu_rq(cpu)->cluster->max_power_cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int cpu_min_power_cost(int cpu)
|
||||||
|
{
|
||||||
|
return cpu_rq(cpu)->cluster->min_power_cost;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
|
static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
|
||||||
{
|
{
|
||||||
return div64_u64(cycles, period);
|
return div64_u64(cycles, period);
|
||||||
|
@ -1413,6 +1418,11 @@ static inline u64 cpu_cravg_sync(int cpu, int sync)
|
||||||
return load;
|
return load;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool is_short_burst_task(struct task_struct *p)
|
||||||
|
{
|
||||||
|
return p->ravg.avg_burst < sysctl_sched_short_burst;
|
||||||
|
}
|
||||||
|
|
||||||
extern void check_for_migration(struct rq *rq, struct task_struct *p);
|
extern void check_for_migration(struct rq *rq, struct task_struct *p);
|
||||||
extern void pre_big_task_count_change(const struct cpumask *cpus);
|
extern void pre_big_task_count_change(const struct cpumask *cpus);
|
||||||
extern void post_big_task_count_change(const struct cpumask *cpus);
|
extern void post_big_task_count_change(const struct cpumask *cpus);
|
||||||
|
|
|
@ -507,6 +507,13 @@ static struct ctl_table kern_table[] = {
|
||||||
.extra1 = &zero,
|
.extra1 = &zero,
|
||||||
.extra2 = &three,
|
.extra2 = &three,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "sched_short_burst_ns",
|
||||||
|
.data = &sysctl_sched_short_burst,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec,
|
||||||
|
},
|
||||||
#endif /* CONFIG_SCHED_HMP */
|
#endif /* CONFIG_SCHED_HMP */
|
||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Reference in a new issue