diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d43c401ff190..91b05e2b8799 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -120,6 +120,15 @@ config CPU_FREQ_DEFAULT_GOV_SCHED cpu frequency using CPU utilization estimates from the scheduler. +config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL + bool "schedutil" + depends on SMP + select CPU_FREQ_GOV_SCHEDUTIL + select CPU_FREQ_GOV_PERFORMANCE + help + Use the 'schedutil' CPUFreq governor by default. If unsure, + have a look at the help section of that governor. The fallback + governor will be 'performance'. endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -228,6 +237,23 @@ config CPU_FREQ_GOV_SCHED If in doubt, say N. +config CPU_FREQ_GOV_SCHEDUTIL + bool "'schedutil' cpufreq policy governor" + depends on CPU_FREQ && SMP + select CPU_FREQ_GOV_ATTR_SET + select IRQ_WORK + help + This governor makes decisions based on the utilization data provided + by the scheduler. It sets the CPU frequency to be proportional to + the utilization/capacity ratio coming from the scheduler. If the + utilization is frequency-invariant, the new frequency is also + proportional to the maximum available frequency. If that is not the + case, it is proportional to the current frequency of the CPU. The + frequency tipping point is at utilization/capacity equal to 80% in + both cases. + + If in doubt, say N. + comment "CPU frequency scaling drivers" config CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index b02ae1463a15..04e6324fd638 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -1,5 +1,5 @@ # CPUfreq core -obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o +obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o cpufreq_governor_attr_set.o # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0ceb0a889ebf..56d12ed4f38c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -531,6 +531,38 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end); +/** + * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported + * one. + * @target_freq: target frequency to resolve. + * + * The target to driver frequency mapping is cached in the policy. + * + * Return: Lowest driver-supported frequency greater than or equal to the + * given target_freq, subject to policy (min/max) and driver limitations. + */ +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + policy->cached_target_freq = target_freq; + + if (cpufreq_driver->target_index) { + int idx, rv; + + rv = cpufreq_frequency_table_target(policy, policy->freq_table, + target_freq, + CPUFREQ_RELATION_L, + &idx); + if (rv) + return target_freq; + policy->cached_resolved_idx = idx; + return policy->freq_table[idx].frequency; + } + + return target_freq; +} +EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); /********************************************************************* * SYSFS INTERFACE * diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c new file mode 100644 index 000000000000..52841f807a7e --- /dev/null +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -0,0 +1,84 @@ +/* + * Abstract code for CPUFreq governor tunable sysfs attributes. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct governor_attr *gattr = to_gov_attr(attr); + + return gattr->show(to_gov_attr_set(kobj), buf); +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct gov_attr_set *attr_set = to_gov_attr_set(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret; + + mutex_lock(&attr_set->update_lock); + ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY; + mutex_unlock(&attr_set->update_lock); + return ret; +} + +const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; +EXPORT_SYMBOL_GPL(governor_sysfs_ops); + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + INIT_LIST_HEAD(&attr_set->policy_list); + mutex_init(&attr_set->update_lock); + attr_set->usage_count = 1; + list_add(list_node, &attr_set->policy_list); +} +EXPORT_SYMBOL_GPL(gov_attr_set_init); + +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + mutex_lock(&attr_set->update_lock); + attr_set->usage_count++; + list_add(list_node, &attr_set->policy_list); + mutex_unlock(&attr_set->update_lock); +} +EXPORT_SYMBOL_GPL(gov_attr_set_get); + +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + unsigned int count; + + mutex_lock(&attr_set->update_lock); + list_del(list_node); + count = --attr_set->usage_count; + mutex_unlock(&attr_set->update_lock); + if (count) + return count; + + kobject_put(&attr_set->kobj); + mutex_destroy(&attr_set->update_lock); + return 0; +} +EXPORT_SYMBOL_GPL(gov_attr_set_put); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 60571292a802..b144e7445477 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -107,6 +107,22 @@ struct cpufreq_policy { */ struct rw_semaphore rwsem; + + /* + * Fast switch flags: + * - fast_switch_possible should be set by the driver if it can + * guarantee that frequency can be changed on any CPU sharing the + * policy and that the change will affect all of the policy CPUs then. + * - fast_switch_enabled is to be set by governors that support fast + * freqnency switching with the help of cpufreq_enable_fast_switch(). + */ + bool fast_switch_possible; + bool fast_switch_enabled; + + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ + unsigned int cached_target_freq; + int cached_resolved_idx; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -471,6 +487,8 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -502,8 +520,42 @@ extern struct cpufreq_governor cpufreq_gov_interactive; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED) extern struct cpufreq_governor cpufreq_gov_sched; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_sched) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL) +extern struct cpufreq_governor cpufreq_gov_schedutil; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_schedutil) #endif +static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) +{ + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); +} + +/* Governor attribute set */ +struct gov_attr_set { + struct kobject kobj; + struct list_head policy_list; + struct mutex update_lock; + int usage_count; +}; + +/* sysfs ops for cpufreq governors */ +extern const struct sysfs_ops governor_sysfs_ops; + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); + +/* Governor sysfs attribute */ +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct gov_attr_set *attr_set, char *buf); + ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, + size_t count); +}; + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index f2d49474aab1..ca0d94096170 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o +obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c new file mode 100644 index 000000000000..7e42a1d6d88d --- /dev/null +++ b/kernel/sched/cpufreq_schedutil.c @@ -0,0 +1,601 @@ +/* + * CPUFreq governor based on scheduler-provided CPU utilization data. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "sched.h" + +/* Stub out fast switch routines present on mainline to reduce the backport + * overhead. */ +#define cpufreq_driver_fast_switch(x, y) 0 +#define cpufreq_enable_fast_switch(x) +#define cpufreq_disable_fast_switch(x) + +struct sugov_tunables { + struct gov_attr_set attr_set; + unsigned int rate_limit_us; +}; + +struct sugov_policy { + struct cpufreq_policy *policy; + + struct sugov_tunables *tunables; + struct list_head tunables_hook; + + raw_spinlock_t update_lock; /* For shared policies */ + u64 last_freq_update_time; + s64 freq_update_delay_ns; + unsigned int next_freq; + + /* The next fields are only needed if fast switch cannot be used. */ + struct irq_work irq_work; + struct work_struct work; + struct mutex work_lock; + bool work_in_progress; + + bool need_freq_update; +}; + +struct sugov_cpu { + struct update_util_data update_util; + struct sugov_policy *sg_policy; + + unsigned int cached_raw_freq; + unsigned long iowait_boost; + unsigned long iowait_boost_max; + u64 last_update; + + /* The fields below are only needed when sharing a policy. */ + unsigned long util; + unsigned long max; + unsigned int flags; +}; + +static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); + +/************************ Governor internals ***********************/ + +static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) +{ + s64 delta_ns; + + if (sg_policy->work_in_progress) + return false; + + if (unlikely(sg_policy->need_freq_update)) { + sg_policy->need_freq_update = false; + /* + * This happens when limits change, so forget the previous + * next_freq value and force an update. + */ + sg_policy->next_freq = UINT_MAX; + return true; + } + + delta_ns = time - sg_policy->last_freq_update_time; + return delta_ns >= sg_policy->freq_update_delay_ns; +} + +static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + struct cpufreq_policy *policy = sg_policy->policy; + + sg_policy->last_freq_update_time = time; + + if (policy->fast_switch_enabled) { + if (sg_policy->next_freq == next_freq) { + trace_cpu_frequency(policy->cur, smp_processor_id()); + return; + } + sg_policy->next_freq = next_freq; + next_freq = cpufreq_driver_fast_switch(policy, next_freq); + if (next_freq == CPUFREQ_ENTRY_INVALID) + return; + + policy->cur = next_freq; + trace_cpu_frequency(next_freq, smp_processor_id()); + } else if (sg_policy->next_freq != next_freq) { + sg_policy->next_freq = next_freq; + sg_policy->work_in_progress = true; + irq_work_queue(&sg_policy->irq_work); + } +} + +/** + * get_next_freq - Compute a new frequency for a given cpufreq policy. + * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @util: Current CPU utilization. + * @max: CPU capacity. + * + * If the utilization is frequency-invariant, choose the new frequency to be + * proportional to it, that is + * + * next_freq = C * max_freq * util / max + * + * Otherwise, approximate the would-be frequency-invariant utilization by + * util_raw * (curr_freq / max_freq) which leads to + * + * next_freq = C * curr_freq * util_raw / max + * + * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. + * + * The lowest driver-supported frequency which is equal or greater than the raw + * next_freq (as calculated above) is returned, subject to policy min/max and + * cpufreq driver limitations. + */ +static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, + unsigned long max) +{ + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; + unsigned int freq = arch_scale_freq_invariant() ? + policy->cpuinfo.max_freq : policy->cur; + + freq = (freq + (freq >> 2)) * util / max; + + if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + return sg_policy->next_freq; + sg_cpu->cached_raw_freq = freq; + return cpufreq_driver_resolve_freq(policy, freq); +} + +static void sugov_get_util(unsigned long *util, unsigned long *max) +{ + struct rq *rq = this_rq(); + unsigned long cfs_max; + + cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); + + *util = min(rq->cfs.avg.util_avg, cfs_max); + *max = cfs_max; +} + +static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + if (flags & SCHED_CPUFREQ_IOWAIT) { + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else if (sg_cpu->iowait_boost) { + s64 delta_ns = time - sg_cpu->last_update; + + /* Clear iowait_boost if the CPU apprears to have been idle. */ + if (delta_ns > TICK_NSEC) + sg_cpu->iowait_boost = 0; + } +} + +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, + unsigned long *max) +{ + unsigned long boost_util = sg_cpu->iowait_boost; + unsigned long boost_max = sg_cpu->iowait_boost_max; + + if (!boost_util) + return; + + if (*util * boost_max < *max * boost_util) { + *util = boost_util; + *max = boost_max; + } + sg_cpu->iowait_boost >>= 1; +} + +static void sugov_update_single(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util, max; + unsigned int next_f; + + sugov_set_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + + if (!sugov_should_update_freq(sg_policy, time)) + return; + + if (flags & SCHED_CPUFREQ_RT_DL) { + next_f = policy->cpuinfo.max_freq; + } else { + sugov_get_util(&util, &max); + sugov_iowait_boost(sg_cpu, &util, &max); + next_f = get_next_freq(sg_cpu, util, max); + } + sugov_update_commit(sg_policy, time, next_f); +} + +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, + unsigned long util, unsigned long max, + unsigned int flags) +{ + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; + unsigned int max_f = policy->cpuinfo.max_freq; + u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned int j; + + if (flags & SCHED_CPUFREQ_RT_DL) + return max_f; + + sugov_iowait_boost(sg_cpu, &util, &max); + + for_each_cpu(j, policy->cpus) { + struct sugov_cpu *j_sg_cpu; + unsigned long j_util, j_max; + s64 delta_ns; + + if (j == smp_processor_id()) + continue; + + j_sg_cpu = &per_cpu(sugov_cpu, j); + /* + * If the CPU utilization was last updated before the previous + * frequency update and the time elapsed between the last update + * of the CPU utilization and the last frequency update is long + * enough, don't take the CPU into account as it probably is + * idle now (and clear iowait_boost for it). + */ + delta_ns = last_freq_update_time - j_sg_cpu->last_update; + if (delta_ns > TICK_NSEC) { + j_sg_cpu->iowait_boost = 0; + continue; + } + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) + return max_f; + + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } + + sugov_iowait_boost(j_sg_cpu, &util, &max); + } + + return get_next_freq(sg_cpu, util, max); +} + +static void sugov_update_shared(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned long util, max; + unsigned int next_f; + + sugov_get_util(&util, &max); + + raw_spin_lock(&sg_policy->update_lock); + + sg_cpu->util = util; + sg_cpu->max = max; + sg_cpu->flags = flags; + + sugov_set_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + + if (sugov_should_update_freq(sg_policy, time)) { + next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); + sugov_update_commit(sg_policy, time, next_f); + } + + raw_spin_unlock(&sg_policy->update_lock); +} + +static void sugov_work(struct work_struct *work) +{ + struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + + mutex_lock(&sg_policy->work_lock); + __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, + CPUFREQ_RELATION_L); + mutex_unlock(&sg_policy->work_lock); + + sg_policy->work_in_progress = false; +} + +static void sugov_irq_work(struct irq_work *irq_work) +{ + struct sugov_policy *sg_policy; + + sg_policy = container_of(irq_work, struct sugov_policy, irq_work); + schedule_work_on(smp_processor_id(), &sg_policy->work); +} + +/************************** sysfs interface ************************/ + +static struct sugov_tunables *global_tunables; +static DEFINE_MUTEX(global_tunables_lock); + +static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) +{ + return container_of(attr_set, struct sugov_tunables, attr_set); +} + +static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + + return sprintf(buf, "%u\n", tunables->rate_limit_us); +} + +static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + struct sugov_policy *sg_policy; + unsigned int rate_limit_us; + + if (kstrtouint(buf, 10, &rate_limit_us)) + return -EINVAL; + + tunables->rate_limit_us = rate_limit_us; + + list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) + sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; + + return count; +} + +static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); + +static struct attribute *sugov_attributes[] = { + &rate_limit_us.attr, + NULL +}; + +static struct kobj_type sugov_tunables_ktype = { + .default_attrs = sugov_attributes, + .sysfs_ops = &governor_sysfs_ops, +}; + +/********************** cpufreq governor interface *********************/ +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL +static +#endif +struct cpufreq_governor cpufreq_gov_schedutil; + +static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + + sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); + if (!sg_policy) + return NULL; + + sg_policy->policy = policy; + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + INIT_WORK(&sg_policy->work, sugov_work); + mutex_init(&sg_policy->work_lock); + raw_spin_lock_init(&sg_policy->update_lock); + return sg_policy; +} + +static void sugov_policy_free(struct sugov_policy *sg_policy) +{ + mutex_destroy(&sg_policy->work_lock); + kfree(sg_policy); +} + +static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) +{ + struct sugov_tunables *tunables; + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (tunables) { + gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); + if (!have_governor_per_policy()) + global_tunables = tunables; + } + return tunables; +} + +static void sugov_tunables_free(struct sugov_tunables *tunables) +{ + if (!have_governor_per_policy()) + global_tunables = NULL; + + kfree(tunables); +} + +static int sugov_init(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + struct sugov_tunables *tunables; + unsigned int lat; + int ret = 0; + + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + + sg_policy = sugov_policy_alloc(policy); + if (!sg_policy) + return -ENOMEM; + + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_sg_policy; + } + policy->governor_data = sg_policy; + sg_policy->tunables = global_tunables; + + gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); + goto out; + } + + tunables = sugov_tunables_alloc(sg_policy); + if (!tunables) { + ret = -ENOMEM; + goto free_sg_policy; + } + + tunables->rate_limit_us = 20 * USEC_PER_MSEC; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) + tunables->rate_limit_us *= lat; + + policy->governor_data = sg_policy; + sg_policy->tunables = tunables; + + ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, + get_governor_parent_kobj(policy), "%s", + cpufreq_gov_schedutil.name); + if (ret) + goto fail; + + out: + mutex_unlock(&global_tunables_lock); + + cpufreq_enable_fast_switch(policy); + return 0; + + fail: + policy->governor_data = NULL; + sugov_tunables_free(tunables); + + free_sg_policy: + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); + pr_err("initialization failed (error %d)\n", ret); + return ret; +} + +static int sugov_exit(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + struct sugov_tunables *tunables = sg_policy->tunables; + unsigned int count; + + cpufreq_disable_fast_switch(policy); + + mutex_lock(&global_tunables_lock); + + count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); + policy->governor_data = NULL; + if (!count) + sugov_tunables_free(tunables); + + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); + + return 0; +} + +static int sugov_start(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + unsigned int cpu; + + sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; + sg_policy->last_freq_update_time = 0; + sg_policy->next_freq = UINT_MAX; + sg_policy->work_in_progress = false; + sg_policy->need_freq_update = false; + + for_each_cpu(cpu, policy->cpus) { + struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + + sg_cpu->sg_policy = sg_policy; + if (policy_is_shared(policy)) { + sg_cpu->util = 0; + sg_cpu->max = 0; + sg_cpu->flags = SCHED_CPUFREQ_RT; + sg_cpu->last_update = 0; + sg_cpu->cached_raw_freq = 0; + sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + sugov_update_shared); + } else { + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + sugov_update_single); + } + } + return 0; +} + +static int sugov_stop(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + unsigned int cpu; + + for_each_cpu(cpu, policy->cpus) + cpufreq_remove_update_util_hook(cpu); + + synchronize_sched(); + + irq_work_sync(&sg_policy->irq_work); + cancel_work_sync(&sg_policy->work); + + return 0; +} + +static int sugov_limits(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + + if (!policy->fast_switch_enabled) { + mutex_lock(&sg_policy->work_lock); + cpufreq_policy_apply_limits(policy); + mutex_unlock(&sg_policy->work_lock); + } + + sg_policy->need_freq_update = true; + + return 0; +} + +static int cpufreq_schedutil_cb(struct cpufreq_policy *policy, + unsigned int event) +{ + switch(event) { + case CPUFREQ_GOV_POLICY_INIT: + return sugov_init(policy); + case CPUFREQ_GOV_POLICY_EXIT: + return sugov_exit(policy); + case CPUFREQ_GOV_START: + return sugov_start(policy); + case CPUFREQ_GOV_STOP: + return sugov_stop(policy); + case CPUFREQ_GOV_LIMITS: + return sugov_limits(policy); + default: + BUG(); + } +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL +static +#endif +struct cpufreq_governor cpufreq_gov_schedutil = { + .name = "schedutil", + .governor = cpufreq_schedutil_cb, + .owner = THIS_MODULE, +}; + +static int __init sugov_register(void) +{ + return cpufreq_register_governor(&cpufreq_gov_schedutil); +} +fs_initcall(sugov_register);