sched: account new task load so that governor can apply different policy

Account amount of load contributed by new tasks within CPU load so that
governor can apply different policy when CPU is loaded by new tasks.

To be able to distinguish new task load a new tunable
sched_new_task_windows also introduced.  The tunable defines tasks as new
when the tasks are have been active less than configured windows.

Change-Id: I2e2e62e4103882f7362154b792ab978b181b9f59
Suggested-by: Saravana Kannan <skannan@codeaurora.org>
[joonwoop@codeaurora.org: ommited changes for
 drivers/cpufreq/cpufreq_interactive.c.  cpufreq changes needs to be
 applied separately later.  fixed conflict in include/linux/sched.h and
 include/linux/sched/sysctl.h.  omitted changes for qhmp_core.c]
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
Joonwoo Park 2015-09-15 09:35:53 -07:00 committed by David Keitel
parent 809ea3fd1e
commit 446beddcd4
6 changed files with 130 additions and 20 deletions

View file

@ -1289,6 +1289,7 @@ struct ravg {
u32 sum_history[RAVG_HIST_SIZE_MAX];
#ifdef CONFIG_SCHED_FREQ_INPUT
u32 curr_window, prev_window;
u16 active_windows;
#endif
};
@ -2125,10 +2126,15 @@ static inline cputime_t task_gtime(struct task_struct *t)
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
struct sched_load {
unsigned long prev_load;
unsigned long new_task_load;
};
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern int sched_set_window(u64 window_start, unsigned int window_size);
extern unsigned long sched_get_busy(int cpu);
extern void sched_get_cpus_busy(unsigned long *busy,
extern void sched_get_cpus_busy(struct sched_load *busy,
const struct cpumask *query_cpus);
extern void sched_set_io_is_busy(int val);
int sched_update_freq_max_load(const cpumask_t *cpumask);
@ -2141,6 +2147,8 @@ static inline unsigned long sched_get_busy(int cpu)
{
return 0;
}
static inline void sched_get_cpus_busy(struct sched_load *busy,
const struct cpumask *query_cpus) {};
static inline void sched_set_io_is_busy(int val) {};
static inline int sched_update_freq_max_load(const cpumask_t *cpumask)

View file

@ -69,6 +69,9 @@ extern unsigned int sysctl_sched_powerband_limit_pct;
extern unsigned int sysctl_sched_lowspill_freq;
extern unsigned int sysctl_sched_pack_freq;
extern unsigned int sysctl_sched_boost;
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern unsigned int sysctl_sched_new_task_windows;
#endif
#else /* CONFIG_SCHED_HMP */

View file

@ -244,6 +244,9 @@ TRACE_EVENT(sched_update_task_ravg,
__field( u64, ps )
__field( u32, curr_window )
__field( u32, prev_window )
__field( u64, nt_cs )
__field( u64, nt_ps )
__field( u32, active_windows )
#endif
),
@ -267,12 +270,15 @@ TRACE_EVENT(sched_update_task_ravg,
__entry->ps = rq->prev_runnable_sum;
__entry->curr_window = p->ravg.curr_window;
__entry->prev_window = p->ravg.prev_window;
__entry->nt_cs = rq->nt_curr_runnable_sum;
__entry->nt_ps = rq->nt_prev_runnable_sum;
__entry->active_windows = p->ravg.active_windows;
#endif
),
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
#ifdef CONFIG_SCHED_FREQ_INPUT
" cs %llu ps %llu cur_window %u prev_window %u"
" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
#endif
, __entry->wallclock, __entry->win_start, __entry->delta,
task_event_names[__entry->evt], __entry->cpu,
@ -282,7 +288,9 @@ TRACE_EVENT(sched_update_task_ravg,
__entry->sum, __entry->irqtime
#ifdef CONFIG_SCHED_FREQ_INPUT
, __entry->cs, __entry->ps, __entry->curr_window,
__entry->prev_window
__entry->prev_window,
__entry->nt_cs, __entry->nt_ps,
__entry->active_windows
#endif
)
);
@ -374,37 +382,44 @@ TRACE_EVENT(sched_migration_update_sum,
__field(int, pid )
__field( u64, cs )
__field( u64, ps )
__field( s64, nt_cs )
__field( s64, nt_ps )
),
TP_fast_assign(
__entry->cpu = cpu_of(rq);
__entry->cs = rq->curr_runnable_sum;
__entry->ps = rq->prev_runnable_sum;
__entry->nt_cs = (s64)rq->nt_curr_runnable_sum;
__entry->nt_ps = (s64)rq->nt_prev_runnable_sum;
__entry->pid = p->pid;
),
TP_printk("cpu %d: cs %llu ps %llu pid %d", __entry->cpu,
__entry->cs, __entry->ps, __entry->pid)
TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
__entry->cpu, __entry->cs, __entry->ps,
__entry->nt_cs, __entry->nt_ps, __entry->pid)
);
TRACE_EVENT(sched_get_busy,
TP_PROTO(int cpu, u64 load),
TP_PROTO(int cpu, u64 load, u64 nload),
TP_ARGS(cpu, load),
TP_ARGS(cpu, load, nload),
TP_STRUCT__entry(
__field( int, cpu )
__field( u64, load )
__field( u64, nload )
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->load = load;
__entry->nload = nload;
),
TP_printk("cpu %d load %lld",
__entry->cpu, __entry->load)
TP_printk("cpu %d load %lld new_task_load %lld",
__entry->cpu, __entry->load, __entry->nload)
);
TRACE_EVENT(sched_freq_alert,

View file

@ -1226,6 +1226,8 @@ static __read_mostly unsigned int sched_window_stats_policy =
__read_mostly unsigned int sysctl_sched_window_stats_policy =
WINDOW_STATS_MAX_RECENT_AVG;
__read_mostly unsigned int sysctl_sched_new_task_windows = 5;
static __read_mostly unsigned int sched_account_wait_time = 1;
__read_mostly unsigned int sysctl_sched_account_wait_time = 1;
@ -1469,6 +1471,11 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event)
return (rq->window_start - p->ravg.mark_start > sched_ravg_window);
}
static inline bool is_new_task(struct task_struct *p)
{
return p->ravg.active_windows < sysctl_sched_new_task_windows;
}
/*
* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
*/
@ -1481,11 +1488,17 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
u64 window_start = rq->window_start;
u32 window_size = sched_ravg_window;
u64 delta;
bool new_task;
new_window = mark_start < window_start;
if (new_window)
if (new_window) {
nr_full_windows = div64_u64((window_start - mark_start),
window_size);
if (p->ravg.active_windows < USHRT_MAX)
p->ravg.active_windows++;
}
new_task = is_new_task(p);
/* Handle per-task window rollover. We don't care about the idle
* task or exiting tasks. */
@ -1516,14 +1529,18 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
/* A new window has started. The RQ demand must be rolled
* over if p is the current task. */
if (p_is_curr_task) {
u64 prev_sum = 0;
u64 prev_sum = 0, nt_prev_sum = 0;
/* p is either idle task or an exiting task */
if (!nr_full_windows)
if (!nr_full_windows) {
prev_sum = rq->curr_runnable_sum;
nt_prev_sum = rq->nt_curr_runnable_sum;
}
rq->prev_runnable_sum = prev_sum;
rq->curr_runnable_sum = 0;
rq->nt_prev_runnable_sum = nt_prev_sum;
rq->nt_curr_runnable_sum = 0;
} else if (heavy_task_wakeup(p, rq, event)) {
/* A new window has started. If p is a waking
@ -1535,6 +1552,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* tunable. */
p->ravg.prev_window = p->ravg.demand;
rq->prev_runnable_sum += p->ravg.demand;
if (new_task)
rq->nt_prev_runnable_sum += p->ravg.demand;
}
return;
@ -1553,6 +1572,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
delta = irqtime;
delta = scale_exec_time(delta, rq);
rq->curr_runnable_sum += delta;
if (new_task)
rq->nt_curr_runnable_sum += delta;
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.curr_window += delta;
@ -1586,10 +1607,14 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
p->ravg.prev_window = delta;
}
rq->prev_runnable_sum += delta;
if (new_task)
rq->nt_prev_runnable_sum += delta;
/* Account piece of busy time in the current window. */
delta = scale_exec_time(wallclock - window_start, rq);
rq->curr_runnable_sum += delta;
if (new_task)
rq->nt_curr_runnable_sum += delta;
if (!exiting_task(p))
p->ravg.curr_window = delta;
@ -1615,6 +1640,11 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
delta = scale_exec_time(window_start - mark_start, rq);
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.prev_window += delta;
rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
if (new_task)
rq->nt_prev_runnable_sum += delta;
delta += rq->curr_runnable_sum;
} else {
/* Since at least one full window has elapsed,
@ -1623,14 +1653,27 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
delta = scale_exec_time(window_size, rq);
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.prev_window = delta;
if (new_task)
rq->nt_prev_runnable_sum = delta;
else
rq->nt_prev_runnable_sum = 0;
}
/* Rollover is done here by overwriting the values in
* prev_runnable_sum and curr_runnable_sum. */
/*
* Rollover for normal runnable sum is done here by overwriting
* the values in prev_runnable_sum and curr_runnable_sum.
* Rollover for new task runnable sum has completed by previous
* if-else statement.
*/
rq->prev_runnable_sum = delta;
/* Account piece of busy time in the current window. */
delta = scale_exec_time(wallclock - window_start, rq);
rq->curr_runnable_sum = delta;
if (new_task)
rq->nt_curr_runnable_sum = delta;
else
rq->nt_curr_runnable_sum = 0;
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.curr_window = delta;
@ -1654,6 +1697,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
/* Roll window over. If IRQ busy time was just in the current
* window then that is all that need be accounted. */
rq->prev_runnable_sum = rq->curr_runnable_sum;
rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
rq->nt_curr_runnable_sum = 0;
if (mark_start > window_start) {
rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
return;
@ -2080,6 +2125,7 @@ static inline void set_window_start(struct rq *rq)
rq->window_start = cpu_rq(sync_cpu)->window_start;
#ifdef CONFIG_SCHED_FREQ_INPUT
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
#endif
raw_spin_unlock(&sync_rq->lock);
}
@ -2212,6 +2258,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
rq->window_start = window_start;
#ifdef CONFIG_SCHED_FREQ_INPUT
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
#endif
reset_cpu_hmp_stats(cpu, 1);
@ -2269,12 +2316,13 @@ scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}
void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
void sched_get_cpus_busy(struct sched_load *busy,
const struct cpumask *query_cpus)
{
unsigned long flags;
struct rq *rq;
const int cpus = cpumask_weight(query_cpus);
u64 load[cpus];
u64 load[cpus], nload[cpus];
unsigned int cur_freq[cpus], max_freq[cpus];
int notifier_sent[cpus];
int cpu, i = 0;
@ -2299,6 +2347,7 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
load[i] = rq->old_busy_time = rq->prev_runnable_sum;
nload[i] = rq->nt_prev_runnable_sum;
/*
* Scale load in reference to rq->max_possible_freq.
*
@ -2306,6 +2355,7 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
* rq->max_freq.
*/
load[i] = scale_load_to_cpu(load[i], cpu);
nload[i] = scale_load_to_cpu(nload[i], cpu);
notifier_sent[i] = rq->notifier_sent;
rq->notifier_sent = 0;
@ -2325,18 +2375,29 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
if (!notifier_sent[i]) {
load[i] = scale_load_to_freq(load[i], max_freq[i],
cur_freq[i]);
nload[i] = scale_load_to_freq(nload[i], max_freq[i],
cur_freq[i]);
if (load[i] > window_size)
load[i] = window_size;
if (nload[i] > window_size)
nload[i] = window_size;
load[i] = scale_load_to_freq(load[i], cur_freq[i],
rq->max_possible_freq);
nload[i] = scale_load_to_freq(nload[i], cur_freq[i],
rq->max_possible_freq);
} else {
load[i] = scale_load_to_freq(load[i], max_freq[i],
rq->max_possible_freq);
nload[i] = scale_load_to_freq(nload[i], max_freq[i],
rq->max_possible_freq);
}
busy[i] = div64_u64(load[i], NSEC_PER_USEC);
busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC);
busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC);
trace_sched_get_busy(cpu, busy[i]);
trace_sched_get_busy(cpu, busy[i].prev_load,
busy[i].new_task_load);
i++;
}
}
@ -2344,12 +2405,12 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
unsigned long sched_get_busy(int cpu)
{
struct cpumask query_cpu = CPU_MASK_NONE;
unsigned long busy;
struct sched_load busy;
cpumask_set_cpu(cpu, &query_cpu);
sched_get_cpus_busy(&busy, &query_cpu);
return busy;
return busy.prev_load;
}
void sched_set_io_is_busy(int val)
@ -2399,6 +2460,7 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
struct rq *src_rq = task_rq(p);
struct rq *dest_rq = cpu_rq(new_cpu);
u64 wallclock;
bool new_task;
if (!sched_enable_hmp || !sched_migration_fixup ||
exiting_task(p) || (!p->on_rq && p->state != TASK_WAKING))
@ -2421,18 +2483,30 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
update_task_ravg(p, task_rq(p), TASK_MIGRATE,
wallclock, 0);
new_task = is_new_task(p);
if (p->ravg.curr_window) {
src_rq->curr_runnable_sum -= p->ravg.curr_window;
dest_rq->curr_runnable_sum += p->ravg.curr_window;
if (new_task) {
src_rq->nt_curr_runnable_sum -= p->ravg.curr_window;
dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
}
}
if (p->ravg.prev_window) {
src_rq->prev_runnable_sum -= p->ravg.prev_window;
dest_rq->prev_runnable_sum += p->ravg.prev_window;
if (new_task) {
src_rq->nt_prev_runnable_sum -= p->ravg.prev_window;
dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
}
}
BUG_ON((s64)src_rq->prev_runnable_sum < 0);
BUG_ON((s64)src_rq->curr_runnable_sum < 0);
BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);
trace_sched_migration_update_sum(src_rq, p);
trace_sched_migration_update_sum(dest_rq, p);
@ -9429,6 +9503,7 @@ void __init sched_init(void)
#ifdef CONFIG_SCHED_FREQ_INPUT
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
rq->old_busy_time = 0;
rq->notifier_sent = 0;
#endif

View file

@ -702,6 +702,8 @@ struct rq {
#ifdef CONFIG_SCHED_FREQ_INPUT
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING

View file

@ -431,6 +431,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_new_task_windows",
.data = &sysctl_sched_new_task_windows,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_window_update_handler,
},
{
.procname = "sched_boost",
.data = &sysctl_sched_boost,