sched: walt: Leverage existing helper APIs to apply invariance
There's no need for a separate hierarchy of notifiers, APIs and variables in walt.c for the purpose of applying frequency and IPC invariance. Let's just use capacity_curr_of and get rid of a lot of the infrastructure relating to capacity, load_scale_factor etc. Change-Id: Ia220e2c896373fa535db05bff60f9aa33aefc978 Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
This commit is contained in:
parent
d6fbbe5e66
commit
be832f69a9
4 changed files with 9 additions and 319 deletions
|
@ -1045,7 +1045,6 @@ TRACE_EVENT(walt_update_task_ravg,
|
|||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( pid_t, cur_pid )
|
||||
__field(unsigned int, cur_freq )
|
||||
__field( u64, wallclock )
|
||||
__field( u64, mark_start )
|
||||
__field( u64, delta_m )
|
||||
|
@ -1073,7 +1072,6 @@ TRACE_EVENT(walt_update_task_ravg,
|
|||
__entry->evt = evt;
|
||||
__entry->cpu = rq->cpu;
|
||||
__entry->cur_pid = rq->curr->pid;
|
||||
__entry->cur_freq = rq->cur_freq;
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->mark_start = p->ravg.mark_start;
|
||||
|
@ -1092,11 +1090,10 @@ TRACE_EVENT(walt_update_task_ravg,
|
|||
__entry->active_windows = p->ravg.active_windows;
|
||||
),
|
||||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
|
||||
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
|
||||
" cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u"
|
||||
, __entry->wallclock, __entry->win_start, __entry->delta,
|
||||
__entry->evt, __entry->cpu,
|
||||
__entry->cur_freq, __entry->cur_pid,
|
||||
__entry->evt, __entry->cpu, __entry->cur_pid,
|
||||
__entry->pid, __entry->comm, __entry->mark_start,
|
||||
__entry->delta_m, __entry->demand,
|
||||
__entry->sum, __entry->irqtime,
|
||||
|
|
|
@ -7702,7 +7702,6 @@ void __init sched_init_smp(void)
|
|||
{
|
||||
cpumask_var_t non_isolated_cpus;
|
||||
|
||||
walt_init_cpu_efficiency();
|
||||
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
|
||||
alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
|
||||
|
||||
|
|
|
@ -673,18 +673,7 @@ struct rq {
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/*
|
||||
* max_freq = user or thermal defined maximum
|
||||
* max_possible_freq = maximum supported by hardware
|
||||
*/
|
||||
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
|
||||
struct cpumask freq_domain_cpumask;
|
||||
|
||||
u64 cumulative_runnable_avg;
|
||||
int efficiency; /* Differentiate cpus with different IPC capability */
|
||||
int load_scale_factor;
|
||||
int capacity;
|
||||
int max_possible_capacity;
|
||||
u64 window_start;
|
||||
u64 curr_runnable_sum;
|
||||
u64 prev_runnable_sum;
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
*/
|
||||
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <trace/events/sched.h>
|
||||
#include "sched.h"
|
||||
#include "walt.h"
|
||||
|
@ -45,29 +44,6 @@ unsigned int sysctl_sched_walt_init_task_load_pct = 15;
|
|||
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
|
||||
unsigned int __read_mostly walt_disabled = 0;
|
||||
|
||||
static unsigned int max_possible_efficiency = 1024;
|
||||
static unsigned int min_possible_efficiency = 1024;
|
||||
|
||||
/*
|
||||
* Maximum possible frequency across all cpus. Task demand and cpu
|
||||
* capacity (cpu_power) metrics are scaled in reference to it.
|
||||
*/
|
||||
static unsigned int max_possible_freq = 1;
|
||||
|
||||
/*
|
||||
* Minimum possible max_freq across all cpus. This will be same as
|
||||
* max_possible_freq on homogeneous systems and could be different from
|
||||
* max_possible_freq on heterogenous systems. min_max_freq is used to derive
|
||||
* capacity (cpu_power) of cpus.
|
||||
*/
|
||||
static unsigned int min_max_freq = 1;
|
||||
|
||||
static unsigned int max_load_scale_factor = 1024;
|
||||
static unsigned int max_possible_capacity = 1024;
|
||||
|
||||
/* Mask of all CPUs that have max_possible_capacity */
|
||||
static cpumask_t mpc_mask = CPU_MASK_ALL;
|
||||
|
||||
/* Window size (in ns) */
|
||||
__read_mostly unsigned int walt_ravg_window = 20000000;
|
||||
|
||||
|
@ -206,24 +182,16 @@ update_window_start(struct rq *rq, u64 wallclock)
|
|||
rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate absolute delta time accounted on a CPU
|
||||
* to a scale where 1024 is the capacity of the most
|
||||
* capable CPU running at FMAX
|
||||
*/
|
||||
static u64 scale_exec_time(u64 delta, struct rq *rq)
|
||||
{
|
||||
unsigned int cur_freq = rq->cur_freq;
|
||||
int sf;
|
||||
unsigned long capcurr = capacity_curr_of(cpu_of(rq));
|
||||
|
||||
if (unlikely(cur_freq > max_possible_freq))
|
||||
cur_freq = rq->max_possible_freq;
|
||||
|
||||
/* round up div64 */
|
||||
delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
|
||||
max_possible_freq);
|
||||
|
||||
sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency);
|
||||
|
||||
delta *= sf;
|
||||
delta >>= 10;
|
||||
|
||||
return delta;
|
||||
return (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
|
||||
}
|
||||
|
||||
static int cpu_is_waiting_on_io(struct rq *rq)
|
||||
|
@ -746,33 +714,6 @@ done:
|
|||
p->ravg.mark_start = wallclock;
|
||||
}
|
||||
|
||||
unsigned long __weak arch_get_cpu_efficiency(int cpu)
|
||||
{
|
||||
return SCHED_LOAD_SCALE;
|
||||
}
|
||||
|
||||
void walt_init_cpu_efficiency(void)
|
||||
{
|
||||
int i, efficiency;
|
||||
unsigned int max = 0, min = UINT_MAX;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
efficiency = arch_get_cpu_efficiency(i);
|
||||
cpu_rq(i)->efficiency = efficiency;
|
||||
|
||||
if (efficiency > max)
|
||||
max = efficiency;
|
||||
if (efficiency < min)
|
||||
min = efficiency;
|
||||
}
|
||||
|
||||
if (max)
|
||||
max_possible_efficiency = max;
|
||||
|
||||
if (min)
|
||||
min_possible_efficiency = min;
|
||||
}
|
||||
|
||||
static void reset_task_stats(struct task_struct *p)
|
||||
{
|
||||
u32 sum = 0;
|
||||
|
@ -877,242 +818,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
|
|||
double_rq_unlock(src_rq, dest_rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
|
||||
* least efficient cpu gets capacity of 1024
|
||||
*/
|
||||
static unsigned long capacity_scale_cpu_efficiency(int cpu)
|
||||
{
|
||||
return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'capacity' of a cpu in reference to cpu with lowest max_freq
|
||||
* (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
|
||||
*/
|
||||
static unsigned long capacity_scale_cpu_freq(int cpu)
|
||||
{
|
||||
return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
|
||||
* that "most" efficient cpu gets a load_scale_factor of 1
|
||||
*/
|
||||
static unsigned long load_scale_cpu_efficiency(int cpu)
|
||||
{
|
||||
return DIV_ROUND_UP(1024 * max_possible_efficiency,
|
||||
cpu_rq(cpu)->efficiency);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return load_scale_factor of a cpu in reference to cpu with best max_freq
|
||||
* (max_possible_freq), so that one with best max_freq gets a load_scale_factor
|
||||
* of 1.
|
||||
*/
|
||||
static unsigned long load_scale_cpu_freq(int cpu)
|
||||
{
|
||||
return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq);
|
||||
}
|
||||
|
||||
static int compute_capacity(int cpu)
|
||||
{
|
||||
int capacity = 1024;
|
||||
|
||||
capacity *= capacity_scale_cpu_efficiency(cpu);
|
||||
capacity >>= 10;
|
||||
|
||||
capacity *= capacity_scale_cpu_freq(cpu);
|
||||
capacity >>= 10;
|
||||
|
||||
return capacity;
|
||||
}
|
||||
|
||||
static int compute_load_scale_factor(int cpu)
|
||||
{
|
||||
int load_scale = 1024;
|
||||
|
||||
/*
|
||||
* load_scale_factor accounts for the fact that task load
|
||||
* is in reference to "best" performing cpu. Task's load will need to be
|
||||
* scaled (up) by a factor to determine suitability to be placed on a
|
||||
* (little) cpu.
|
||||
*/
|
||||
load_scale *= load_scale_cpu_efficiency(cpu);
|
||||
load_scale >>= 10;
|
||||
|
||||
load_scale *= load_scale_cpu_freq(cpu);
|
||||
load_scale >>= 10;
|
||||
|
||||
return load_scale;
|
||||
}
|
||||
|
||||
static int cpufreq_notifier_policy(struct notifier_block *nb,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
|
||||
int i, update_max = 0;
|
||||
u64 highest_mpc = 0, highest_mplsf = 0;
|
||||
const struct cpumask *cpus = policy->related_cpus;
|
||||
unsigned int orig_min_max_freq = min_max_freq;
|
||||
unsigned int orig_max_possible_freq = max_possible_freq;
|
||||
/* Initialized to policy->max in case policy->related_cpus is empty! */
|
||||
unsigned int orig_max_freq = policy->max;
|
||||
|
||||
if (val != CPUFREQ_NOTIFY)
|
||||
return 0;
|
||||
|
||||
for_each_cpu(i, policy->related_cpus) {
|
||||
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
|
||||
policy->related_cpus);
|
||||
orig_max_freq = cpu_rq(i)->max_freq;
|
||||
cpu_rq(i)->min_freq = policy->min;
|
||||
cpu_rq(i)->max_freq = policy->max;
|
||||
cpu_rq(i)->cur_freq = policy->cur;
|
||||
cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;
|
||||
}
|
||||
|
||||
max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
|
||||
if (min_max_freq == 1)
|
||||
min_max_freq = UINT_MAX;
|
||||
min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
|
||||
BUG_ON(!min_max_freq);
|
||||
BUG_ON(!policy->max);
|
||||
|
||||
/* Changes to policy other than max_freq don't require any updates */
|
||||
if (orig_max_freq == policy->max)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* A changed min_max_freq or max_possible_freq (possible during bootup)
|
||||
* needs to trigger re-computation of load_scale_factor and capacity for
|
||||
* all possible cpus (even those offline). It also needs to trigger
|
||||
* re-computation of nr_big_task count on all online cpus.
|
||||
*
|
||||
* A changed rq->max_freq otoh needs to trigger re-computation of
|
||||
* load_scale_factor and capacity for just the cluster of cpus involved.
|
||||
* Since small task definition depends on max_load_scale_factor, a
|
||||
* changed load_scale_factor of one cluster could influence
|
||||
* classification of tasks in another cluster. Hence a changed
|
||||
* rq->max_freq will need to trigger re-computation of nr_big_task
|
||||
* count on all online cpus.
|
||||
*
|
||||
* While it should be sufficient for nr_big_tasks to be
|
||||
* re-computed for only online cpus, we have inadequate context
|
||||
* information here (in policy notifier) with regard to hotplug-safety
|
||||
* context in which notification is issued. As a result, we can't use
|
||||
* get_online_cpus() here, as it can lead to deadlock. Until cpufreq is
|
||||
* fixed up to issue notification always in hotplug-safe context,
|
||||
* re-compute nr_big_task for all possible cpus.
|
||||
*/
|
||||
|
||||
if (orig_min_max_freq != min_max_freq ||
|
||||
orig_max_possible_freq != max_possible_freq) {
|
||||
cpus = cpu_possible_mask;
|
||||
update_max = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Changed load_scale_factor can trigger reclassification of tasks as
|
||||
* big or small. Make this change "atomic" so that tasks are accounted
|
||||
* properly due to changed load_scale_factor
|
||||
*/
|
||||
for_each_cpu(i, cpus) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
rq->capacity = compute_capacity(i);
|
||||
rq->load_scale_factor = compute_load_scale_factor(i);
|
||||
|
||||
if (update_max) {
|
||||
u64 mpc, mplsf;
|
||||
|
||||
mpc = div_u64(((u64) rq->capacity) *
|
||||
rq->max_possible_freq, rq->max_freq);
|
||||
rq->max_possible_capacity = (int) mpc;
|
||||
|
||||
mplsf = div_u64(((u64) rq->load_scale_factor) *
|
||||
rq->max_possible_freq, rq->max_freq);
|
||||
|
||||
if (mpc > highest_mpc) {
|
||||
highest_mpc = mpc;
|
||||
cpumask_clear(&mpc_mask);
|
||||
cpumask_set_cpu(i, &mpc_mask);
|
||||
} else if (mpc == highest_mpc) {
|
||||
cpumask_set_cpu(i, &mpc_mask);
|
||||
}
|
||||
|
||||
if (mplsf > highest_mplsf)
|
||||
highest_mplsf = mplsf;
|
||||
}
|
||||
}
|
||||
|
||||
if (update_max) {
|
||||
max_possible_capacity = highest_mpc;
|
||||
max_load_scale_factor = highest_mplsf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpufreq_notifier_trans(struct notifier_block *nb,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
|
||||
unsigned int cpu = freq->cpu, new_freq = freq->new;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
if (val != CPUFREQ_POSTCHANGE)
|
||||
return 0;
|
||||
|
||||
BUG_ON(!new_freq);
|
||||
|
||||
if (cpu_rq(cpu)->cur_freq == new_freq)
|
||||
return 0;
|
||||
|
||||
for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
|
||||
walt_ktime_clock(), 0);
|
||||
rq->cur_freq = new_freq;
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block notifier_policy_block = {
|
||||
.notifier_call = cpufreq_notifier_policy
|
||||
};
|
||||
|
||||
static struct notifier_block notifier_trans_block = {
|
||||
.notifier_call = cpufreq_notifier_trans
|
||||
};
|
||||
|
||||
static int register_sched_callback(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpufreq_register_notifier(¬ifier_policy_block,
|
||||
CPUFREQ_POLICY_NOTIFIER);
|
||||
|
||||
if (!ret)
|
||||
ret = cpufreq_register_notifier(¬ifier_trans_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpufreq callbacks can be registered at core_initcall or later time.
|
||||
* Any registration done prior to that is "forgotten" by cpufreq. See
|
||||
* initialization of variable init_cpufreq_transition_notifier_list_called
|
||||
* for further information.
|
||||
*/
|
||||
core_initcall(register_sched_callback);
|
||||
|
||||
void walt_init_new_task_load(struct task_struct *p)
|
||||
{
|
||||
int i;
|
||||
|
|
Loading…
Add table
Reference in a new issue