diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index c8bfb6077224..118a4f245ad1 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -306,16 +306,56 @@ static ssize_t __ref store_sched_static_cluster_pwr_cost(struct device *dev, return err; } +static ssize_t show_sched_cluser_wake_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t rc; + int cpuid = cpu->dev.id; + unsigned int wake_up_idle; + + wake_up_idle = sched_get_cluster_wake_idle(cpuid); + + rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", wake_up_idle); + + return rc; +} + +static ssize_t __ref store_sched_cluster_wake_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int err; + int cpuid = cpu->dev.id; + unsigned int wake_up_idle; + + err = kstrtouint(strstrip((char *)buf), 0, &wake_up_idle); + if (err) + return err; + + err = sched_set_cluster_wake_idle(cpuid, wake_up_idle); + + if (err >= 0) + err = count; + + return err; +} + static DEVICE_ATTR(sched_static_cpu_pwr_cost, 0644, show_sched_static_cpu_pwr_cost, store_sched_static_cpu_pwr_cost); static DEVICE_ATTR(sched_static_cluster_pwr_cost, 0644, show_sched_static_cluster_pwr_cost, store_sched_static_cluster_pwr_cost); +static DEVICE_ATTR(sched_cluster_wake_up_idle, 0644, + show_sched_cluser_wake_idle, + store_sched_cluster_wake_idle); static struct attribute *hmp_sched_cpu_attrs[] = { &dev_attr_sched_static_cpu_pwr_cost.attr, &dev_attr_sched_static_cluster_pwr_cost.attr, + &dev_attr_sched_cluster_wake_up_idle.attr, NULL }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 36007d90a678..0d1d21e9f081 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2451,6 +2451,8 @@ extern int sched_set_static_cpu_pwr_cost(int cpu, unsigned int cost); extern unsigned int sched_get_static_cpu_pwr_cost(int cpu); extern int sched_set_static_cluster_pwr_cost(int cpu, unsigned int cost); extern unsigned int sched_get_static_cluster_pwr_cost(int cpu); +extern int sched_set_cluster_wake_idle(int cpu, unsigned int wake_idle); +extern unsigned int sched_get_cluster_wake_idle(int cpu); extern int sched_update_freq_max_load(const cpumask_t *cpumask); extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0538de6dfb6f..f7aeb14f4d63 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -39,7 +39,6 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; -extern unsigned int sysctl_sched_wake_to_idle; extern unsigned int sysctl_sched_is_big_little; extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_initial_task_util; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 87538f7d495a..e1c8ec0458b3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -85,14 +85,6 @@ static unsigned int sched_nr_latency = 8; */ unsigned int sysctl_sched_child_runs_first __read_mostly; -/* - * Controls whether, when SD_SHARE_PKG_RESOURCES is on, if all - * tasks go to idle CPUs when woken. If this is off, note that the - * per-task flag PF_WAKE_UP_IDLE can still cause a task to go to an - * idle CPU upon being woken. - */ -unsigned int __read_mostly sysctl_sched_wake_to_idle; - /* * SCHED_OTHER wake-up granularity. * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) @@ -2649,6 +2641,21 @@ struct cluster_cpu_stats { s64 highest_spare_capacity; }; +/* + * Should task be woken to any available idle cpu? + * + * Waking tasks to idle cpu has mixed implications on both performance and + * power. In many cases, scheduler can't estimate correctly impact of using idle + * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel + * module to pass a strong hint to scheduler that the task in question should be + * woken to idle cpu, generally to improve performance. + */ +static inline int wake_to_idle(struct task_struct *p) +{ + return (current->flags & PF_WAKE_UP_IDLE) || + (p->flags & PF_WAKE_UP_IDLE); +} + static int spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq) { u64 total_load; @@ -3009,6 +3016,8 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c, if (env->ignore_prev_cpu) cpumask_clear_cpu(env->prev_cpu, &search_cpus); + env->need_idle = wake_to_idle(env->p) || c->wake_up_idle; + for_each_cpu(i, &search_cpus) { env->cpu_load = cpu_load_sync(i, env->sync); @@ -3052,21 +3061,6 @@ static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats) /* No need to initialize stats->best_load */ } -/* - * Should task be woken to any available idle cpu? - * - * Waking tasks to idle cpu has mixed implications on both performance and - * power. In many cases, scheduler can't estimate correctly impact of using idle - * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel - * module to pass a strong hint to scheduler that the task in question should be - * woken to idle cpu, generally to improve performance. - */ -static inline int wake_to_idle(struct task_struct *p) -{ - return (current->flags & PF_WAKE_UP_IDLE) || - (p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle; -} - static inline bool env_has_special_flags(struct cpu_select_env *env) { if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE || @@ -6755,9 +6749,8 @@ static int select_idle_sibling(struct task_struct *p, int target) return i; } - if (!sysctl_sched_wake_to_idle && - !(current->flags & PF_WAKE_UP_IDLE) && - !(p->flags & PF_WAKE_UP_IDLE)) + if (!(current->flags & PF_WAKE_UP_IDLE) && + !(p->flags & PF_WAKE_UP_IDLE)) return target; /* diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 180e2fcf785b..12fa618a8135 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -377,6 +377,7 @@ struct sched_cluster init_cluster = { .dstate_wakeup_latency = 0, .exec_scale_factor = 1024, .notifier_sent = 0, + .wake_up_idle = 0, }; static void update_all_clusters_stats(void) @@ -677,6 +678,19 @@ unsigned int sched_get_static_cluster_pwr_cost(int cpu) return cpu_rq(cpu)->cluster->static_cluster_pwr_cost; } +int sched_set_cluster_wake_idle(int cpu, unsigned int wake_idle) +{ + struct sched_cluster *cluster = cpu_rq(cpu)->cluster; + + cluster->wake_up_idle = !!wake_idle; + return 0; +} + +unsigned int sched_get_cluster_wake_idle(int cpu) +{ + return cpu_rq(cpu)->cluster->wake_up_idle; +} + /* * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy * associated with them. This is required for atomic update of those variables diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a3abdf19ff4c..c110c4aaf2be 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -389,6 +389,7 @@ struct sched_cluster { int dstate, dstate_wakeup_latency, dstate_wakeup_energy; unsigned int static_cluster_pwr_cost; int notifier_sent; + bool wake_up_idle; }; extern unsigned long all_cluster_ids[]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 12ea4f09c04b..eced92aa492a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -289,13 +289,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "sched_wake_to_idle", - .data = &sysctl_sched_wake_to_idle, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_SCHED_HMP { .procname = "sched_freq_reporting_policy",