From 47f7e0415af9fd1078b51a45a7b18701eb7d5177 Mon Sep 17 00:00:00 2001 From: Syed Rameez Mustafa Date: Wed, 4 Jan 2017 15:56:51 -0800 Subject: [PATCH] sched: Convert the global wake_up_idle flag to a per cluster flag Since clusters can vary significantly in the power and performance characteristics, there may be a need to have different CPU selection policies based on which cluster a task is being placed on. For example the placement policy can be more aggressive in using idle CPUs on cluster that are power efficient and less aggressive on clusters that are geared towards performance. Add support for per cluster wake_up_idle flag to allow greater flexibility in placement policies. Change-Id: I18cd3d907cd965db03a13f4655870dc10c07acfe Signed-off-by: Syed Rameez Mustafa --- drivers/base/cpu.c | 40 ++++++++++++++++++++++++++++++++ include/linux/sched.h | 2 ++ include/linux/sched/sysctl.h | 1 - kernel/sched/fair.c | 45 +++++++++++++++--------------------- kernel/sched/hmp.c | 14 +++++++++++ kernel/sched/sched.h | 1 + kernel/sysctl.c | 7 ------ 7 files changed, 76 insertions(+), 34 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index c8bfb6077224..118a4f245ad1 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -306,16 +306,56 @@ static ssize_t __ref store_sched_static_cluster_pwr_cost(struct device *dev, return err; } +static ssize_t show_sched_cluser_wake_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t rc; + int cpuid = cpu->dev.id; + unsigned int wake_up_idle; + + wake_up_idle = sched_get_cluster_wake_idle(cpuid); + + rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", wake_up_idle); + + return rc; +} + +static ssize_t __ref store_sched_cluster_wake_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int err; + int cpuid = cpu->dev.id; + unsigned int wake_up_idle; + + err = kstrtouint(strstrip((char *)buf), 0, &wake_up_idle); + if (err) + return err; + + err = sched_set_cluster_wake_idle(cpuid, wake_up_idle); + + if (err >= 0) + err = count; + + return err; +} + static DEVICE_ATTR(sched_static_cpu_pwr_cost, 0644, show_sched_static_cpu_pwr_cost, store_sched_static_cpu_pwr_cost); static DEVICE_ATTR(sched_static_cluster_pwr_cost, 0644, show_sched_static_cluster_pwr_cost, store_sched_static_cluster_pwr_cost); +static DEVICE_ATTR(sched_cluster_wake_up_idle, 0644, + show_sched_cluser_wake_idle, + store_sched_cluster_wake_idle); static struct attribute *hmp_sched_cpu_attrs[] = { &dev_attr_sched_static_cpu_pwr_cost.attr, &dev_attr_sched_static_cluster_pwr_cost.attr, + &dev_attr_sched_cluster_wake_up_idle.attr, NULL }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 36007d90a678..0d1d21e9f081 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2451,6 +2451,8 @@ extern int sched_set_static_cpu_pwr_cost(int cpu, unsigned int cost); extern unsigned int sched_get_static_cpu_pwr_cost(int cpu); extern int sched_set_static_cluster_pwr_cost(int cpu, unsigned int cost); extern unsigned int sched_get_static_cluster_pwr_cost(int cpu); +extern int sched_set_cluster_wake_idle(int cpu, unsigned int wake_idle); +extern unsigned int sched_get_cluster_wake_idle(int cpu); extern int sched_update_freq_max_load(const cpumask_t *cpumask); extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0538de6dfb6f..f7aeb14f4d63 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -39,7 +39,6 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; -extern unsigned int sysctl_sched_wake_to_idle; extern unsigned int sysctl_sched_is_big_little; extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_initial_task_util; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 87538f7d495a..e1c8ec0458b3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -85,14 +85,6 @@ static unsigned int sched_nr_latency = 8; */ unsigned int sysctl_sched_child_runs_first __read_mostly; -/* - * Controls whether, when SD_SHARE_PKG_RESOURCES is on, if all - * tasks go to idle CPUs when woken. If this is off, note that the - * per-task flag PF_WAKE_UP_IDLE can still cause a task to go to an - * idle CPU upon being woken. - */ -unsigned int __read_mostly sysctl_sched_wake_to_idle; - /* * SCHED_OTHER wake-up granularity. * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) @@ -2649,6 +2641,21 @@ struct cluster_cpu_stats { s64 highest_spare_capacity; }; +/* + * Should task be woken to any available idle cpu? + * + * Waking tasks to idle cpu has mixed implications on both performance and + * power. In many cases, scheduler can't estimate correctly impact of using idle + * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel + * module to pass a strong hint to scheduler that the task in question should be + * woken to idle cpu, generally to improve performance. + */ +static inline int wake_to_idle(struct task_struct *p) +{ + return (current->flags & PF_WAKE_UP_IDLE) || + (p->flags & PF_WAKE_UP_IDLE); +} + static int spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq) { u64 total_load; @@ -3009,6 +3016,8 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c, if (env->ignore_prev_cpu) cpumask_clear_cpu(env->prev_cpu, &search_cpus); + env->need_idle = wake_to_idle(env->p) || c->wake_up_idle; + for_each_cpu(i, &search_cpus) { env->cpu_load = cpu_load_sync(i, env->sync); @@ -3052,21 +3061,6 @@ static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats) /* No need to initialize stats->best_load */ } -/* - * Should task be woken to any available idle cpu? - * - * Waking tasks to idle cpu has mixed implications on both performance and - * power. In many cases, scheduler can't estimate correctly impact of using idle - * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel - * module to pass a strong hint to scheduler that the task in question should be - * woken to idle cpu, generally to improve performance. - */ -static inline int wake_to_idle(struct task_struct *p) -{ - return (current->flags & PF_WAKE_UP_IDLE) || - (p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle; -} - static inline bool env_has_special_flags(struct cpu_select_env *env) { if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE || @@ -6755,9 +6749,8 @@ static int select_idle_sibling(struct task_struct *p, int target) return i; } - if (!sysctl_sched_wake_to_idle && - !(current->flags & PF_WAKE_UP_IDLE) && - !(p->flags & PF_WAKE_UP_IDLE)) + if (!(current->flags & PF_WAKE_UP_IDLE) && + !(p->flags & PF_WAKE_UP_IDLE)) return target; /* diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 180e2fcf785b..12fa618a8135 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -377,6 +377,7 @@ struct sched_cluster init_cluster = { .dstate_wakeup_latency = 0, .exec_scale_factor = 1024, .notifier_sent = 0, + .wake_up_idle = 0, }; static void update_all_clusters_stats(void) @@ -677,6 +678,19 @@ unsigned int sched_get_static_cluster_pwr_cost(int cpu) return cpu_rq(cpu)->cluster->static_cluster_pwr_cost; } +int sched_set_cluster_wake_idle(int cpu, unsigned int wake_idle) +{ + struct sched_cluster *cluster = cpu_rq(cpu)->cluster; + + cluster->wake_up_idle = !!wake_idle; + return 0; +} + +unsigned int sched_get_cluster_wake_idle(int cpu) +{ + return cpu_rq(cpu)->cluster->wake_up_idle; +} + /* * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy * associated with them. This is required for atomic update of those variables diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a3abdf19ff4c..c110c4aaf2be 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -389,6 +389,7 @@ struct sched_cluster { int dstate, dstate_wakeup_latency, dstate_wakeup_energy; unsigned int static_cluster_pwr_cost; int notifier_sent; + bool wake_up_idle; }; extern unsigned long all_cluster_ids[]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 12ea4f09c04b..eced92aa492a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -289,13 +289,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "sched_wake_to_idle", - .data = &sysctl_sched_wake_to_idle, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_SCHED_HMP { .procname = "sched_freq_reporting_policy",