sched: walt: Leverage existing helper APIs to apply invariance

There's no need for a separate hierarchy of notifiers, APIs
and variables in walt.c for the purpose of applying frequency
and IPC invariance. Let's just use capacity_curr_of and get
rid of a lot of the infrastructure relating to capacity,
load_scale_factor etc.

Change-Id: Ia220e2c896373fa535db05bff60f9aa33aefc978
Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
This commit is contained in:
Vikram Mulukutla 2017-08-24 11:38:00 -07:00 committed by Joel Fernandes
parent d6fbbe5e66
commit be832f69a9
4 changed files with 9 additions and 319 deletions

View file

@ -1045,7 +1045,6 @@ TRACE_EVENT(walt_update_task_ravg,
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( pid_t, cur_pid )
__field(unsigned int, cur_freq )
__field( u64, wallclock )
__field( u64, mark_start )
__field( u64, delta_m )
@ -1073,7 +1072,6 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->evt = evt;
__entry->cpu = rq->cpu;
__entry->cur_pid = rq->curr->pid;
__entry->cur_freq = rq->cur_freq;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->ravg.mark_start;
@ -1092,11 +1090,10 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->active_windows = p->ravg.active_windows;
),
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
" cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u"
, __entry->wallclock, __entry->win_start, __entry->delta,
__entry->evt, __entry->cpu,
__entry->cur_freq, __entry->cur_pid,
__entry->evt, __entry->cpu, __entry->cur_pid,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand,
__entry->sum, __entry->irqtime,

View file

@ -7702,7 +7702,6 @@ void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
walt_init_cpu_efficiency();
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
alloc_cpumask_var(&fallback_doms, GFP_KERNEL);

View file

@ -673,18 +673,7 @@ struct rq {
#endif
#ifdef CONFIG_SCHED_WALT
/*
* max_freq = user or thermal defined maximum
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
struct cpumask freq_domain_cpumask;
u64 cumulative_runnable_avg;
int efficiency; /* Differentiate cpus with different IPC capability */
int load_scale_factor;
int capacity;
int max_possible_capacity;
u64 window_start;
u64 curr_runnable_sum;
u64 prev_runnable_sum;

View file

@ -20,7 +20,6 @@
*/
#include <linux/syscore_ops.h>
#include <linux/cpufreq.h>
#include <trace/events/sched.h>
#include "sched.h"
#include "walt.h"
@ -45,29 +44,6 @@ unsigned int sysctl_sched_walt_init_task_load_pct = 15;
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
unsigned int __read_mostly walt_disabled = 0;
static unsigned int max_possible_efficiency = 1024;
static unsigned int min_possible_efficiency = 1024;
/*
* Maximum possible frequency across all cpus. Task demand and cpu
* capacity (cpu_power) metrics are scaled in reference to it.
*/
static unsigned int max_possible_freq = 1;
/*
* Minimum possible max_freq across all cpus. This will be same as
* max_possible_freq on homogeneous systems and could be different from
* max_possible_freq on heterogenous systems. min_max_freq is used to derive
* capacity (cpu_power) of cpus.
*/
static unsigned int min_max_freq = 1;
static unsigned int max_load_scale_factor = 1024;
static unsigned int max_possible_capacity = 1024;
/* Mask of all CPUs that have max_possible_capacity */
static cpumask_t mpc_mask = CPU_MASK_ALL;
/* Window size (in ns) */
__read_mostly unsigned int walt_ravg_window = 20000000;
@ -206,24 +182,16 @@ update_window_start(struct rq *rq, u64 wallclock)
rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
}
/*
* Translate absolute delta time accounted on a CPU
* to a scale where 1024 is the capacity of the most
* capable CPU running at FMAX
*/
static u64 scale_exec_time(u64 delta, struct rq *rq)
{
unsigned int cur_freq = rq->cur_freq;
int sf;
unsigned long capcurr = capacity_curr_of(cpu_of(rq));
if (unlikely(cur_freq > max_possible_freq))
cur_freq = rq->max_possible_freq;
/* round up div64 */
delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
max_possible_freq);
sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency);
delta *= sf;
delta >>= 10;
return delta;
return (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
}
static int cpu_is_waiting_on_io(struct rq *rq)
@ -746,33 +714,6 @@ done:
p->ravg.mark_start = wallclock;
}
unsigned long __weak arch_get_cpu_efficiency(int cpu)
{
return SCHED_LOAD_SCALE;
}
void walt_init_cpu_efficiency(void)
{
int i, efficiency;
unsigned int max = 0, min = UINT_MAX;
for_each_possible_cpu(i) {
efficiency = arch_get_cpu_efficiency(i);
cpu_rq(i)->efficiency = efficiency;
if (efficiency > max)
max = efficiency;
if (efficiency < min)
min = efficiency;
}
if (max)
max_possible_efficiency = max;
if (min)
min_possible_efficiency = min;
}
static void reset_task_stats(struct task_struct *p)
{
u32 sum = 0;
@ -877,242 +818,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
double_rq_unlock(src_rq, dest_rq);
}
/*
* Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
* least efficient cpu gets capacity of 1024
*/
static unsigned long capacity_scale_cpu_efficiency(int cpu)
{
return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency;
}
/*
* Return 'capacity' of a cpu in reference to cpu with lowest max_freq
* (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
*/
static unsigned long capacity_scale_cpu_freq(int cpu)
{
return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq;
}
/*
* Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
* that "most" efficient cpu gets a load_scale_factor of 1
*/
static unsigned long load_scale_cpu_efficiency(int cpu)
{
return DIV_ROUND_UP(1024 * max_possible_efficiency,
cpu_rq(cpu)->efficiency);
}
/*
* Return load_scale_factor of a cpu in reference to cpu with best max_freq
* (max_possible_freq), so that one with best max_freq gets a load_scale_factor
* of 1.
*/
static unsigned long load_scale_cpu_freq(int cpu)
{
return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq);
}
static int compute_capacity(int cpu)
{
int capacity = 1024;
capacity *= capacity_scale_cpu_efficiency(cpu);
capacity >>= 10;
capacity *= capacity_scale_cpu_freq(cpu);
capacity >>= 10;
return capacity;
}
static int compute_load_scale_factor(int cpu)
{
int load_scale = 1024;
/*
* load_scale_factor accounts for the fact that task load
* is in reference to "best" performing cpu. Task's load will need to be
* scaled (up) by a factor to determine suitability to be placed on a
* (little) cpu.
*/
load_scale *= load_scale_cpu_efficiency(cpu);
load_scale >>= 10;
load_scale *= load_scale_cpu_freq(cpu);
load_scale >>= 10;
return load_scale;
}
static int cpufreq_notifier_policy(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
int i, update_max = 0;
u64 highest_mpc = 0, highest_mplsf = 0;
const struct cpumask *cpus = policy->related_cpus;
unsigned int orig_min_max_freq = min_max_freq;
unsigned int orig_max_possible_freq = max_possible_freq;
/* Initialized to policy->max in case policy->related_cpus is empty! */
unsigned int orig_max_freq = policy->max;
if (val != CPUFREQ_NOTIFY)
return 0;
for_each_cpu(i, policy->related_cpus) {
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
policy->related_cpus);
orig_max_freq = cpu_rq(i)->max_freq;
cpu_rq(i)->min_freq = policy->min;
cpu_rq(i)->max_freq = policy->max;
cpu_rq(i)->cur_freq = policy->cur;
cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;
}
max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
if (min_max_freq == 1)
min_max_freq = UINT_MAX;
min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
BUG_ON(!min_max_freq);
BUG_ON(!policy->max);
/* Changes to policy other than max_freq don't require any updates */
if (orig_max_freq == policy->max)
return 0;
/*
* A changed min_max_freq or max_possible_freq (possible during bootup)
* needs to trigger re-computation of load_scale_factor and capacity for
* all possible cpus (even those offline). It also needs to trigger
* re-computation of nr_big_task count on all online cpus.
*
* A changed rq->max_freq otoh needs to trigger re-computation of
* load_scale_factor and capacity for just the cluster of cpus involved.
* Since small task definition depends on max_load_scale_factor, a
* changed load_scale_factor of one cluster could influence
* classification of tasks in another cluster. Hence a changed
* rq->max_freq will need to trigger re-computation of nr_big_task
* count on all online cpus.
*
* While it should be sufficient for nr_big_tasks to be
* re-computed for only online cpus, we have inadequate context
* information here (in policy notifier) with regard to hotplug-safety
* context in which notification is issued. As a result, we can't use
* get_online_cpus() here, as it can lead to deadlock. Until cpufreq is
* fixed up to issue notification always in hotplug-safe context,
* re-compute nr_big_task for all possible cpus.
*/
if (orig_min_max_freq != min_max_freq ||
orig_max_possible_freq != max_possible_freq) {
cpus = cpu_possible_mask;
update_max = 1;
}
/*
* Changed load_scale_factor can trigger reclassification of tasks as
* big or small. Make this change "atomic" so that tasks are accounted
* properly due to changed load_scale_factor
*/
for_each_cpu(i, cpus) {
struct rq *rq = cpu_rq(i);
rq->capacity = compute_capacity(i);
rq->load_scale_factor = compute_load_scale_factor(i);
if (update_max) {
u64 mpc, mplsf;
mpc = div_u64(((u64) rq->capacity) *
rq->max_possible_freq, rq->max_freq);
rq->max_possible_capacity = (int) mpc;
mplsf = div_u64(((u64) rq->load_scale_factor) *
rq->max_possible_freq, rq->max_freq);
if (mpc > highest_mpc) {
highest_mpc = mpc;
cpumask_clear(&mpc_mask);
cpumask_set_cpu(i, &mpc_mask);
} else if (mpc == highest_mpc) {
cpumask_set_cpu(i, &mpc_mask);
}
if (mplsf > highest_mplsf)
highest_mplsf = mplsf;
}
}
if (update_max) {
max_possible_capacity = highest_mpc;
max_load_scale_factor = highest_mplsf;
}
return 0;
}
static int cpufreq_notifier_trans(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
unsigned int cpu = freq->cpu, new_freq = freq->new;
unsigned long flags;
int i;
if (val != CPUFREQ_POSTCHANGE)
return 0;
BUG_ON(!new_freq);
if (cpu_rq(cpu)->cur_freq == new_freq)
return 0;
for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) {
struct rq *rq = cpu_rq(i);
raw_spin_lock_irqsave(&rq->lock, flags);
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
walt_ktime_clock(), 0);
rq->cur_freq = new_freq;
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
return 0;
}
static struct notifier_block notifier_policy_block = {
.notifier_call = cpufreq_notifier_policy
};
static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_notifier_trans
};
static int register_sched_callback(void)
{
int ret;
ret = cpufreq_register_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
if (!ret)
ret = cpufreq_register_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
return 0;
}
/*
* cpufreq callbacks can be registered at core_initcall or later time.
* Any registration done prior to that is "forgotten" by cpufreq. See
* initialization of variable init_cpufreq_transition_notifier_list_called
* for further information.
*/
core_initcall(register_sched_callback);
void walt_init_new_task_load(struct task_struct *p)
{
int i;