cpufreq: interactive: Use load prediction provided by scheduler

With modification in scheduler, governor now gets predicted
instantaneous demand waiting to run in addition to demand from
previous window for each CPU. Make use of this information since
prediction from scheduler could be more accurate than just looking at
past few windows.

Governor calculates two frequencies during each sampling period: one based
on demand in previous sampling period (f_prev), and the other based on
prediction provided by scheduler (f_pred). Max of both will be selected
as final frequency. Hispeed related logic, including both frequency
selection and delay is ignored when prediction is enabled. If only
f_pred but not f_prev picked policy->max, max_freq_hysteresis period is
not started/extended. This is to reduce power cost of mis-prediction
if it happens.

One use case prediction could dramatically help is when a heavy task
wakes up after sleeping for a long time. With prediction, governor
could ramp up to frequency the task needs much faster than before.

To enable prediction, echo 1 to enable_prediction file in
cpufreq interactive sysfs directory.

Change-Id: I27396785886e43ea01c9000c651c8bd142172273
Suggested-by: Saravana Kannan <skannan@codeaurora.org>
Signed-off-by: Junjie Wu <junjiew@codeaurora.org>
This commit is contained in:
Junjie Wu 2015-06-09 17:36:11 -07:00 committed by David Keitel
parent 1a76fe2389
commit 3eb23fd73c
3 changed files with 115 additions and 59 deletions

View file

@ -337,6 +337,16 @@ evaluation triggered by timer, min_sample_time is still always
enforced. fast_ramp_down has no effect if use_migration_notif is
set to zero. Default is zero.
enable_prediction: If non-zero, two frequencies will be calculated
during each sampling period: one based on busy time in previous sampling
period (f_prev), and the other based on prediction provided by scheduler
(f_pred). Max of both will be selected as final frequency. Hispeed
related logic, including both frequency selection and delay is ignored
if enable_prediction is set. If only f_pred but not f_prev picked
policy->max, max_freq_hysteresis period is not started/extended.
use_sched_load must be turned on before enabling this feature.
Default is zero.
3. The Governor Interface in the CPUfreq Core
=============================================

View file

@ -157,6 +157,9 @@ struct cpufreq_interactive_tunables {
/* Ignore min_sample_time for notification */
bool fast_ramp_down;
/* Whether to enable prediction or not */
bool enable_prediction;
};
/*
@ -458,28 +461,43 @@ static u64 update_load(int cpu)
return now;
}
static unsigned int sl_busy_to_laf(struct cpufreq_interactive_policyinfo *ppol,
unsigned long busy)
{
struct cpufreq_interactive_tunables *tunables =
ppol->policy->governor_data;
busy *= ppol->policy->cpuinfo.max_freq;
return div64_s64(busy, tunables->timer_rate) * 100;
}
#define NEW_TASK_RATIO 75
#define PRED_TOLERANCE_PCT 10
static void cpufreq_interactive_timer(unsigned long data)
{
u64 now;
s64 now;
unsigned int delta_time;
u64 cputime_speedadj;
int cpu_load;
int pol_load = 0;
struct cpufreq_interactive_policyinfo *ppol = per_cpu(polinfo, data);
struct cpufreq_interactive_tunables *tunables =
ppol->policy->governor_data;
struct sched_load *sl = ppol->sl;
struct cpufreq_interactive_cpuinfo *pcpu;
unsigned int new_freq;
unsigned int loadadjfreq = 0, tmploadadjfreq;
unsigned int prev_laf = 0, t_prevlaf;
unsigned int pred_laf = 0, t_predlaf = 0;
unsigned int prev_chfreq, pred_chfreq, chosen_freq;
unsigned int index;
unsigned long flags;
unsigned long max_cpu;
int i, fcpu;
struct sched_load *sl;
int cpu, i;
int new_load_pct = 0;
int prev_l, pred_l = 0;
struct cpufreq_govinfo govinfo;
bool skip_hispeed_logic, skip_min_sample_time;
bool policy_max_fast_restore = false;
bool jump_to_max_no_ts = false;
bool jump_to_max = false;
if (!down_read_trylock(&ppol->enable_sem))
@ -487,89 +505,106 @@ static void cpufreq_interactive_timer(unsigned long data)
if (!ppol->governor_enabled)
goto exit;
fcpu = cpumask_first(ppol->policy->related_cpus);
now = ktime_to_us(ktime_get());
spin_lock_irqsave(&ppol->target_freq_lock, flags);
spin_lock(&ppol->load_lock);
skip_hispeed_logic = tunables->ignore_hispeed_on_notif &&
ppol->notif_pending;
skip_hispeed_logic = tunables->enable_prediction ? true :
tunables->ignore_hispeed_on_notif && ppol->notif_pending;
skip_min_sample_time = tunables->fast_ramp_down && ppol->notif_pending;
ppol->notif_pending = false;
now = ktime_to_us(ktime_get());
ppol->last_evaluated_jiffy = get_jiffies_64();
if (tunables->use_sched_load)
sched_get_cpus_busy(ppol->sl, ppol->policy->related_cpus);
sched_get_cpus_busy(sl, ppol->policy->cpus);
max_cpu = cpumask_first(ppol->policy->cpus);
for_each_cpu(i, ppol->policy->cpus) {
pcpu = &per_cpu(cpuinfo, i);
sl = &ppol->sl[i - fcpu];
i = 0;
for_each_cpu(cpu, ppol->policy->cpus) {
pcpu = &per_cpu(cpuinfo, cpu);
if (tunables->use_sched_load) {
cputime_speedadj = (u64)sl->prev_load *
ppol->policy->cpuinfo.max_freq;
do_div(cputime_speedadj, tunables->timer_rate);
new_load_pct = 0;
if (sl->prev_load)
new_load_pct = sl->new_task_load * 100 /
sl->prev_load;
t_prevlaf = sl_busy_to_laf(ppol, sl[i].prev_load);
prev_l = t_prevlaf / ppol->target_freq;
if (tunables->enable_prediction) {
t_predlaf = sl_busy_to_laf(ppol,
sl[i].predicted_load);
pred_l = t_predlaf / ppol->target_freq;
}
if (sl[i].prev_load)
new_load_pct = sl[i].new_task_load * 100 /
sl[i].prev_load;
else
new_load_pct = 0;
} else {
now = update_load(i);
now = update_load(cpu);
delta_time = (unsigned int)
(now - pcpu->cputime_speedadj_timestamp);
if (WARN_ON_ONCE(!delta_time))
continue;
cputime_speedadj = pcpu->cputime_speedadj;
do_div(cputime_speedadj, delta_time);
t_prevlaf = (unsigned int)cputime_speedadj * 100;
prev_l = t_prevlaf / ppol->target_freq;
}
tmploadadjfreq = (unsigned int)cputime_speedadj * 100;
pcpu->loadadjfreq = tmploadadjfreq;
if (tmploadadjfreq > loadadjfreq) {
loadadjfreq = tmploadadjfreq;
max_cpu = i;
/* find max of loadadjfreq inside policy */
if (t_prevlaf > prev_laf) {
prev_laf = t_prevlaf;
max_cpu = cpu;
}
cpu_load = tmploadadjfreq / ppol->target_freq;
trace_cpufreq_interactive_cpuload(i, cpu_load, new_load_pct);
pred_laf = max(t_predlaf, pred_laf);
if (cpu_load >= tunables->go_hispeed_load &&
cpu_load = max(prev_l, pred_l);
pol_load = max(pol_load, cpu_load);
trace_cpufreq_interactive_cpuload(cpu, cpu_load, new_load_pct,
prev_l, pred_l);
/* save loadadjfreq for notification */
pcpu->loadadjfreq = max(t_prevlaf, t_predlaf);
/* detect heavy new task and jump to policy->max */
if (prev_l >= tunables->go_hispeed_load &&
new_load_pct >= NEW_TASK_RATIO) {
skip_hispeed_logic = true;
jump_to_max = true;
}
i++;
}
spin_unlock(&ppol->load_lock);
cpu_load = loadadjfreq / ppol->target_freq;
tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
prev_chfreq = choose_freq(ppol, prev_laf);
pred_chfreq = choose_freq(ppol, pred_laf);
chosen_freq = max(prev_chfreq, pred_chfreq);
if (prev_chfreq < ppol->policy->max && pred_chfreq >= ppol->policy->max)
if (!jump_to_max)
jump_to_max_no_ts = true;
if (now - ppol->max_freq_hyst_start_time <
tunables->max_freq_hysteresis &&
cpu_load >= tunables->go_hispeed_load &&
pol_load >= tunables->go_hispeed_load &&
ppol->target_freq < ppol->policy->max) {
skip_hispeed_logic = true;
skip_min_sample_time = true;
policy_max_fast_restore = true;
if (!jump_to_max)
jump_to_max_no_ts = true;
}
if (policy_max_fast_restore || jump_to_max) {
new_freq = chosen_freq;
if (jump_to_max_no_ts || jump_to_max) {
new_freq = ppol->policy->cpuinfo.max_freq;
} else if (skip_hispeed_logic) {
new_freq = choose_freq(ppol, loadadjfreq);
} else if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
if (ppol->target_freq < tunables->hispeed_freq) {
new_freq = tunables->hispeed_freq;
} else {
new_freq = choose_freq(ppol, loadadjfreq);
if (new_freq < tunables->hispeed_freq)
} else if (!skip_hispeed_logic) {
if (pol_load >= tunables->go_hispeed_load ||
tunables->boosted) {
if (ppol->target_freq < tunables->hispeed_freq)
new_freq = tunables->hispeed_freq;
else
new_freq = max(new_freq,
tunables->hispeed_freq);
}
} else {
new_freq = choose_freq(ppol, loadadjfreq);
if (new_freq > tunables->hispeed_freq &&
ppol->policy->cur < tunables->hispeed_freq)
new_freq = tunables->hispeed_freq;
}
if (now - ppol->max_freq_hyst_start_time <
@ -582,7 +617,7 @@ static void cpufreq_interactive_timer(unsigned long data)
now - ppol->hispeed_validate_time <
freq_to_above_hispeed_delay(tunables, ppol->target_freq)) {
trace_cpufreq_interactive_notyet(
max_cpu, cpu_load, ppol->target_freq,
max_cpu, pol_load, ppol->target_freq,
ppol->policy->cur, new_freq);
spin_unlock_irqrestore(&ppol->target_freq_lock, flags);
goto rearm;
@ -607,7 +642,7 @@ static void cpufreq_interactive_timer(unsigned long data)
if (now - ppol->floor_validate_time <
tunables->min_sample_time) {
trace_cpufreq_interactive_notyet(
max_cpu, cpu_load, ppol->target_freq,
max_cpu, pol_load, ppol->target_freq,
ppol->policy->cur, new_freq);
spin_unlock_irqrestore(&ppol->target_freq_lock, flags);
goto rearm;
@ -626,24 +661,24 @@ static void cpufreq_interactive_timer(unsigned long data)
*/
if ((!tunables->boosted || new_freq > tunables->hispeed_freq)
&& !policy_max_fast_restore) {
&& !jump_to_max_no_ts) {
ppol->floor_freq = new_freq;
ppol->floor_validate_time = now;
}
if (new_freq >= ppol->policy->max && !policy_max_fast_restore)
if (new_freq >= ppol->policy->max && !jump_to_max_no_ts)
ppol->max_freq_hyst_start_time = now;
if (ppol->target_freq == new_freq &&
ppol->target_freq <= ppol->policy->cur) {
trace_cpufreq_interactive_already(
max_cpu, cpu_load, ppol->target_freq,
max_cpu, pol_load, ppol->target_freq,
ppol->policy->cur, new_freq);
spin_unlock_irqrestore(&ppol->target_freq_lock, flags);
goto rearm;
}
trace_cpufreq_interactive_target(max_cpu, cpu_load, ppol->target_freq,
trace_cpufreq_interactive_target(max_cpu, pol_load, ppol->target_freq,
ppol->policy->cur, new_freq);
ppol->target_freq = new_freq;
@ -1037,6 +1072,7 @@ show_store_one(max_freq_hysteresis);
show_store_one(align_windows);
show_store_one(ignore_hispeed_on_notif);
show_store_one(fast_ramp_down);
show_store_one(enable_prediction);
static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables
*tunables, char *buf)
@ -1431,6 +1467,7 @@ show_store_gov_pol_sys(max_freq_hysteresis);
show_store_gov_pol_sys(align_windows);
show_store_gov_pol_sys(ignore_hispeed_on_notif);
show_store_gov_pol_sys(fast_ramp_down);
show_store_gov_pol_sys(enable_prediction);
#define gov_sys_attr_rw(_name) \
static struct global_attr _name##_gov_sys = \
@ -1460,6 +1497,7 @@ gov_sys_pol_attr_rw(max_freq_hysteresis);
gov_sys_pol_attr_rw(align_windows);
gov_sys_pol_attr_rw(ignore_hispeed_on_notif);
gov_sys_pol_attr_rw(fast_ramp_down);
gov_sys_pol_attr_rw(enable_prediction);
static struct global_attr boostpulse_gov_sys =
__ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys);
@ -1486,6 +1524,7 @@ static struct attribute *interactive_attributes_gov_sys[] = {
&align_windows_gov_sys.attr,
&ignore_hispeed_on_notif_gov_sys.attr,
&fast_ramp_down_gov_sys.attr,
&enable_prediction_gov_sys.attr,
NULL,
};
@ -1513,6 +1552,7 @@ static struct attribute *interactive_attributes_gov_pol[] = {
&align_windows_gov_pol.attr,
&ignore_hispeed_on_notif_gov_pol.attr,
&fast_ramp_down_gov_pol.attr,
&enable_prediction_gov_pol.attr,
NULL,
};

View file

@ -119,21 +119,27 @@ TRACE_EVENT(cpufreq_interactive_load_change,
);
TRACE_EVENT(cpufreq_interactive_cpuload,
TP_PROTO(unsigned long cpu_id, unsigned long load,
unsigned int new_task_pct),
TP_ARGS(cpu_id, load, new_task_pct),
TP_PROTO(unsigned long cpu_id, unsigned int load,
unsigned int new_task_pct, unsigned int prev,
unsigned int predicted),
TP_ARGS(cpu_id, load, new_task_pct, prev, predicted),
TP_STRUCT__entry(
__field(unsigned long, cpu_id)
__field(unsigned long, load)
__field(unsigned long, new_task_pct)
__field(unsigned int, load)
__field(unsigned int, new_task_pct)
__field(unsigned int, prev)
__field(unsigned int, predicted)
),
TP_fast_assign(
__entry->cpu_id = cpu_id;
__entry->load = load;
__entry->new_task_pct = new_task_pct;
__entry->prev = prev;
__entry->predicted = predicted;
),
TP_printk("cpu=%lu load=%lu new_task_pct=%lu", __entry->cpu_id,
__entry->load, __entry->new_task_pct)
TP_printk("cpu=%lu load=%u new_task_pct=%u prev=%u predicted=%u",
__entry->cpu_id, __entry->load, __entry->new_task_pct,
__entry->prev, __entry->predicted)
);
#endif /* _TRACE_CPUFREQ_INTERACTIVE_H */