sched: window-stats: Handle policy change properly

sched_window_stat_policy influences task demand and thus various
statistics maintained per-cpu like curr_runnable_sum. Changing policy
non-atomically would lead to improper accounting. For example, when
task is enqueued on a cpu's runqueue, its demand that is added to
rq->cumulative_runnable_avg could be based on AVG policy and when its
dequeued its demand that is removed can be based on MAX, leading to
erroneous accounting.

This change causes policy change to be "atomic" i.e all cpu's rq->lock
are held and all task's window-stats are reset before policy is changed.

Change-Id: I6a3e4fb7bc299dfc5c367693b5717a1ef518c32d
CRs-Fixed: 687409
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
[joonwoop@codeaurora.org: fixed minor conflict in
 include/linux/sched/sysctl.h.
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
Srivatsa Vaddagiri 2014-07-23 14:27:18 +05:30 committed by David Keitel
parent 0b210afc21
commit 1ffae4dc94
5 changed files with 57 additions and 5 deletions

View file

@ -103,6 +103,9 @@ extern int sched_hmp_proc_update_handler(struct ctl_table *table,
extern int sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
extern int sched_window_stats_policy_update_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos);
/*
* control realtime throttling:
*

View file

@ -1123,6 +1123,15 @@ __read_mostly unsigned int sched_ravg_window = 10000000;
__read_mostly unsigned int sysctl_sched_window_stats_policy =
WINDOW_STATS_USE_AVG;
/*
* copy of sysctl_sched_window_stats_policy. Required for atomically
* changing policy (see sched_window_stats_policy_update_handler() for details).
*
* Initialize both to same value!!
*/
static __read_mostly unsigned int sched_window_stats_policy =
WINDOW_STATS_USE_AVG;
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
unsigned int __read_mostly sched_use_pelt;
@ -1243,9 +1252,9 @@ update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
compute_demand:
avg = div64_u64(sum, RAVG_HIST_SIZE);
if (sysctl_sched_window_stats_policy == WINDOW_STATS_USE_RECENT)
if (sched_window_stats_policy == WINDOW_STATS_USE_RECENT)
demand = runtime;
else if (sysctl_sched_window_stats_policy == WINDOW_STATS_USE_MAX)
else if (sched_window_stats_policy == WINDOW_STATS_USE_MAX)
demand = max;
else
demand = max(avg, runtime);
@ -1515,7 +1524,8 @@ unsigned long sched_get_busy(int cpu)
}
/* Called with IRQs disabled */
void reset_all_window_stats(u64 window_start, unsigned int window_size)
void reset_all_window_stats(u64 window_start, unsigned int window_size,
int policy)
{
int cpu;
u64 wallclock;
@ -1557,6 +1567,9 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
fixup_nr_big_small_task(cpu);
}
if (policy >= 0)
sched_window_stats_policy = policy;
for_each_online_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
raw_spin_unlock(&rq->lock);
@ -1589,7 +1602,7 @@ int sched_set_window(u64 window_start, unsigned int window_size)
BUG_ON(sched_clock() < ws);
reset_all_window_stats(ws, window_size);
reset_all_window_stats(ws, window_size, -1);
local_irq_restore(flags);

View file

@ -3144,6 +3144,40 @@ void post_big_small_task_count_change(void)
local_irq_enable();
}
static DEFINE_MUTEX(policy_mutex);
int sched_window_stats_policy_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
unsigned int old_val;
unsigned long flags;
if (!sched_enable_hmp)
return -EINVAL;
mutex_lock(&policy_mutex);
old_val = *data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || (write && old_val == *data))
goto done;
local_irq_save(flags);
reset_all_window_stats(0, 0, sysctl_sched_window_stats_policy);
local_irq_restore(flags);
done:
mutex_unlock(&policy_mutex);
return ret;
}
/*
* Convert percentage value into absolute form. This will avoid div() operation
* in fast path, to convert task load in percentage scale.

View file

@ -1027,6 +1027,8 @@ extern void inc_nr_big_small_task(struct rq *rq, struct task_struct *p);
extern void dec_nr_big_small_task(struct rq *rq, struct task_struct *p);
extern void set_hmp_defaults(void);
extern unsigned int power_cost_at_freq(int cpu, unsigned int freq);
extern void reset_all_window_stats(u64 window_start, unsigned int window_size,
int policy);
#else /* CONFIG_SCHED_HMP */

View file

@ -314,7 +314,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_window_stats_policy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = sched_window_stats_policy_update_handler,
},
{
.procname = "sched_wakeup_load_threshold",