Merge "sched: Avoid waking idle cpu for short-burst tasks"

This commit is contained in:
Linux Build Service Account 2016-12-19 17:04:54 -08:00 committed by Gerrit - the friendly Code Review server
commit e26b0777dc
7 changed files with 93 additions and 9 deletions

View file

@ -726,6 +726,16 @@ d. /proc/sys/kernel/sched_select_prev_cpu_us
Default value of sched_select_prev_cpu_us is 2000 (2ms). This can be Default value of sched_select_prev_cpu_us is 2000 (2ms). This can be
turned off by setting it to 0. turned off by setting it to 0.
e. /proc/sys/kernel/sched_short_burst_ns
This threshold controls whether a task is considered as "short-burst"
or not. "short-burst" tasks are eligible for packing to avoid overhead
associated with waking up an idle CPU. "non-idle" CPUs which are not
loaded with IRQs and can accommodate the waking task without exceeding
spill limits are considered. The ties are broken with load followed
by previous CPU. This tunable does not affect cluster selection.
It only affects CPU selection in a given cluster. This packing is
skipped for tasks that are eligible for "wake-up-idle" and "boost".
**** 5.2.4 Wakeup Logic for Task "p" **** 5.2.4 Wakeup Logic for Task "p"
Wakeup task placement logic is as follows: Wakeup task placement logic is as follows:

View file

@ -72,6 +72,7 @@ extern unsigned int sysctl_sched_freq_aggregate;
extern unsigned int sysctl_sched_enable_thread_grouping; extern unsigned int sysctl_sched_enable_thread_grouping;
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct; extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker; extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
extern unsigned int sysctl_sched_short_burst;
#else /* CONFIG_SCHED_HMP */ #else /* CONFIG_SCHED_HMP */

View file

@ -2613,6 +2613,7 @@ static u32 __compute_runnable_contrib(u64 n)
#define SBC_FLAG_CSTATE_LOAD 0x100 #define SBC_FLAG_CSTATE_LOAD 0x100
#define SBC_FLAG_BEST_SIBLING 0x200 #define SBC_FLAG_BEST_SIBLING 0x200
#define SBC_FLAG_WAKER_CPU 0x400 #define SBC_FLAG_WAKER_CPU 0x400
#define SBC_FLAG_PACK_TASK 0x800
/* Cluster selection flag */ /* Cluster selection flag */
#define SBC_FLAG_COLOC_CLUSTER 0x10000 #define SBC_FLAG_COLOC_CLUSTER 0x10000
@ -2629,6 +2630,7 @@ struct cpu_select_env {
u8 sync:1; u8 sync:1;
u8 ignore_prev_cpu:1; u8 ignore_prev_cpu:1;
enum sched_boost_policy boost_policy; enum sched_boost_policy boost_policy;
u8 pack_task:1;
int prev_cpu; int prev_cpu;
DECLARE_BITMAP(candidate_list, NR_CPUS); DECLARE_BITMAP(candidate_list, NR_CPUS);
DECLARE_BITMAP(backup_list, NR_CPUS); DECLARE_BITMAP(backup_list, NR_CPUS);
@ -2980,8 +2982,17 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
{ {
int cpu_cost; int cpu_cost;
cpu_cost = power_cost(cpu, task_load(env->p) + /*
* We try to find the least loaded *busy* CPU irrespective
* of the power cost.
*/
if (env->pack_task)
cpu_cost = cpu_min_power_cost(cpu);
else
cpu_cost = power_cost(cpu, task_load(env->p) +
cpu_cravg_sync(cpu, env->sync)); cpu_cravg_sync(cpu, env->sync));
if (cpu_cost <= stats->min_cost) if (cpu_cost <= stats->min_cost)
__update_cluster_stats(cpu, stats, env, cpu_cost); __update_cluster_stats(cpu, stats, env, cpu_cost);
} }
@ -3056,6 +3067,15 @@ static inline int wake_to_idle(struct task_struct *p)
(p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle; (p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
} }
static inline bool env_has_special_flags(struct cpu_select_env *env)
{
if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
env->reason)
return true;
return false;
}
static inline bool static inline bool
bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats) bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
{ {
@ -3063,9 +3083,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
struct task_struct *task = env->p; struct task_struct *task = env->p;
struct sched_cluster *cluster; struct sched_cluster *cluster;
if (env->boost_policy != SCHED_BOOST_NONE || env->reason || if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
!task->ravg.mark_start ||
env->need_idle || !sched_short_sleep_task_threshold)
return false; return false;
prev_cpu = env->prev_cpu; prev_cpu = env->prev_cpu;
@ -3114,8 +3132,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
static inline bool static inline bool
wake_to_waker_cluster(struct cpu_select_env *env) wake_to_waker_cluster(struct cpu_select_env *env)
{ {
return env->boost_policy == SCHED_BOOST_NONE && return env->sync &&
!env->need_idle && !env->reason && env->sync &&
task_load(current) > sched_big_waker_task_load && task_load(current) > sched_big_waker_task_load &&
task_load(env->p) < sched_small_wakee_task_load; task_load(env->p) < sched_small_wakee_task_load;
} }
@ -3140,7 +3157,6 @@ cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
return !cpumask_empty(&tmp_mask); return !cpumask_empty(&tmp_mask);
} }
/* return cheapest cpu that can fit this task */ /* return cheapest cpu that can fit this task */
static int select_best_cpu(struct task_struct *p, int target, int reason, static int select_best_cpu(struct task_struct *p, int target, int reason,
int sync) int sync)
@ -3150,6 +3166,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
struct related_thread_group *grp; struct related_thread_group *grp;
unsigned int sbc_flag = 0; unsigned int sbc_flag = 0;
int cpu = raw_smp_processor_id(); int cpu = raw_smp_processor_id();
bool special;
struct cpu_select_env env = { struct cpu_select_env env = {
.p = p, .p = p,
@ -3162,6 +3179,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
.rtg = NULL, .rtg = NULL,
.sbc_best_flag = 0, .sbc_best_flag = 0,
.sbc_best_cluster_flag = 0, .sbc_best_cluster_flag = 0,
.pack_task = false,
}; };
env.boost_policy = task_sched_boost(p) ? env.boost_policy = task_sched_boost(p) ?
@ -3171,6 +3189,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
bitmap_zero(env.backup_list, NR_CPUS); bitmap_zero(env.backup_list, NR_CPUS);
init_cluster_cpu_stats(&stats); init_cluster_cpu_stats(&stats);
special = env_has_special_flags(&env);
rcu_read_lock(); rcu_read_lock();
@ -3182,7 +3201,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
clear_bit(pref_cluster->id, env.candidate_list); clear_bit(pref_cluster->id, env.candidate_list);
else else
env.rtg = grp; env.rtg = grp;
} else { } else if (!special) {
cluster = cpu_rq(cpu)->cluster; cluster = cpu_rq(cpu)->cluster;
if (wake_to_waker_cluster(&env)) { if (wake_to_waker_cluster(&env)) {
if (bias_to_waker_cpu(p, cpu)) { if (bias_to_waker_cpu(p, cpu)) {
@ -3203,6 +3222,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
} }
} }
if (!special && is_short_burst_task(p)) {
env.pack_task = true;
sbc_flag = SBC_FLAG_PACK_TASK;
}
retry: retry:
cluster = select_least_power_cluster(&env); cluster = select_least_power_cluster(&env);

View file

@ -961,6 +961,13 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill; unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
/*
* Scheduler tries to avoid waking up idle CPUs for tasks running
* in short bursts. If the task average burst is less than
* sysctl_sched_short_burst nanoseconds, it is eligible for packing.
*/
unsigned int __read_mostly sysctl_sched_short_burst;
static void static void
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate) _update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
{ {
@ -1553,7 +1560,13 @@ void init_new_task_load(struct task_struct *p, bool idle_task)
memset(&p->ravg, 0, sizeof(struct ravg)); memset(&p->ravg, 0, sizeof(struct ravg));
p->cpu_cycles = 0; p->cpu_cycles = 0;
p->ravg.curr_burst = 0; p->ravg.curr_burst = 0;
p->ravg.avg_burst = 0; /*
* Initialize the avg_burst to twice the threshold, so that
* a task would not be classified as short burst right away
* after fork. It takes at least 6 sleep-wakeup cycles for
* the avg_burst to go below the threshold.
*/
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL); p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL); p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
@ -2987,6 +3000,8 @@ void reset_task_stats(struct task_struct *p)
p->ravg.curr_window_cpu = curr_window_ptr; p->ravg.curr_window_cpu = curr_window_ptr;
p->ravg.prev_window_cpu = prev_window_ptr; p->ravg.prev_window_cpu = prev_window_ptr;
p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
/* Retain EXITING_TASK marker */ /* Retain EXITING_TASK marker */
p->ravg.sum_history[0] = sum; p->ravg.sum_history[0] = sum;
} }

View file

@ -1723,6 +1723,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
int i; int i;
int restrict_cluster; int restrict_cluster;
int boost_on_big; int boost_on_big;
int pack_task, wakeup_latency, least_wakeup_latency = INT_MAX;
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST && boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
sched_boost_policy() == SCHED_BOOST_ON_BIG; sched_boost_policy() == SCHED_BOOST_ON_BIG;
@ -1739,6 +1740,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
return best_cpu; /* No targets found */ return best_cpu; /* No targets found */
pack_task = is_short_burst_task(task);
/* /*
* At this point we have built a mask of cpus representing the * At this point we have built a mask of cpus representing the
* lowest priority tasks in the system. Now we want to elect * lowest priority tasks in the system. Now we want to elect
@ -1764,6 +1767,20 @@ static int find_lowest_rq_hmp(struct task_struct *task)
if (!restrict_cluster) if (!restrict_cluster)
cpu_load = scale_load_to_cpu(cpu_load, i); cpu_load = scale_load_to_cpu(cpu_load, i);
if (pack_task) {
wakeup_latency = cpu_rq(i)->wakeup_latency;
if (wakeup_latency > least_wakeup_latency)
continue;
if (wakeup_latency < least_wakeup_latency) {
least_wakeup_latency = wakeup_latency;
min_load = cpu_load;
best_cpu = i;
continue;
}
}
if (cpu_load < min_load || if (cpu_load < min_load ||
(cpu_load == min_load && (cpu_load == min_load &&
(i == prev_cpu || (best_cpu != prev_cpu && (i == prev_cpu || (best_cpu != prev_cpu &&
@ -1772,6 +1789,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
best_cpu = i; best_cpu = i;
} }
} }
if (restrict_cluster && best_cpu != -1) if (restrict_cluster && best_cpu != -1)
break; break;
} }

View file

@ -1216,6 +1216,11 @@ static inline int cpu_max_power_cost(int cpu)
return cpu_rq(cpu)->cluster->max_power_cost; return cpu_rq(cpu)->cluster->max_power_cost;
} }
static inline int cpu_min_power_cost(int cpu)
{
return cpu_rq(cpu)->cluster->min_power_cost;
}
static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period) static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
{ {
return div64_u64(cycles, period); return div64_u64(cycles, period);
@ -1413,6 +1418,11 @@ static inline u64 cpu_cravg_sync(int cpu, int sync)
return load; return load;
} }
static inline bool is_short_burst_task(struct task_struct *p)
{
return p->ravg.avg_burst < sysctl_sched_short_burst;
}
extern void check_for_migration(struct rq *rq, struct task_struct *p); extern void check_for_migration(struct rq *rq, struct task_struct *p);
extern void pre_big_task_count_change(const struct cpumask *cpus); extern void pre_big_task_count_change(const struct cpumask *cpus);
extern void post_big_task_count_change(const struct cpumask *cpus); extern void post_big_task_count_change(const struct cpumask *cpus);

View file

@ -507,6 +507,13 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero, .extra1 = &zero,
.extra2 = &three, .extra2 = &three,
}, },
{
.procname = "sched_short_burst_ns",
.data = &sysctl_sched_short_burst,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_SCHED_HMP */ #endif /* CONFIG_SCHED_HMP */
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
{ {