sched: Introduce Window Assisted Load Tracking (WALT)
use a window based view of time in order to track task demand and CPU utilization in the scheduler. Window Assisted Load Tracking (WALT) implementation credits: Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park, Pavan Kumar Kondeti, Olav Haugan 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla and Todd Kjos Change-Id: I21408236836625d4e7d7de1843d20ed5ff36c708 Includes fixes for issues: eas/walt: Use walt_ktime_clock() instead of ktime_get_ns() to avoid a race resulting in watchdog resets BUG: 29353986 Change-Id: Ic1820e22a136f7c7ebd6f42e15f14d470f6bbbdb Handle walt accounting anomoly during resume During resume, there is a corner case where on wakeup, a task's prev_runnable_sum can go negative. This is a workaround that fixes the condition and warns (instead of crashing). BUG: 29464099 Change-Id: I173e7874324b31a3584435530281708145773508 Signed-off-by: Todd Kjos <tkjos@google.com> Signed-off-by: Srinath Sridharan <srinathsr@google.com> Signed-off-by: Juri Lelli <juri.lelli@arm.com> [jstultz: fwdported to 4.4] Signed-off-by: John Stultz <john.stultz@linaro.org>
This commit is contained in:
parent
3a7e623182
commit
b41fa2aec5
13 changed files with 1498 additions and 1 deletions
|
@ -317,6 +317,15 @@ extern char ___assert_task_state[1 - 2*!!(
|
|||
/* Task command name length */
|
||||
#define TASK_COMM_LEN 16
|
||||
|
||||
enum task_event {
|
||||
PUT_PREV_TASK = 0,
|
||||
PICK_NEXT_TASK = 1,
|
||||
TASK_WAKE = 2,
|
||||
TASK_MIGRATE = 3,
|
||||
TASK_UPDATE = 4,
|
||||
IRQ_UPDATE = 5,
|
||||
};
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
/*
|
||||
|
@ -1276,6 +1285,41 @@ struct sched_statistics {
|
|||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
#define RAVG_HIST_SIZE_MAX 5
|
||||
|
||||
/* ravg represents frequency scaled cpu-demand of tasks */
|
||||
struct ravg {
|
||||
/*
|
||||
* 'mark_start' marks the beginning of an event (task waking up, task
|
||||
* starting to execute, task being preempted) within a window
|
||||
*
|
||||
* 'sum' represents how runnable a task has been within current
|
||||
* window. It incorporates both running time and wait time and is
|
||||
* frequency scaled.
|
||||
*
|
||||
* 'sum_history' keeps track of history of 'sum' seen over previous
|
||||
* RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
|
||||
* ignored.
|
||||
*
|
||||
* 'demand' represents maximum sum seen over previous
|
||||
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
|
||||
* demand for tasks.
|
||||
*
|
||||
* 'curr_window' represents task's contribution to cpu busy time
|
||||
* statistics (rq->curr_runnable_sum) in current window
|
||||
*
|
||||
* 'prev_window' represents task's contribution to cpu busy time
|
||||
* statistics (rq->prev_runnable_sum) in previous window
|
||||
*/
|
||||
u64 mark_start;
|
||||
u32 sum, demand;
|
||||
u32 sum_history[RAVG_HIST_SIZE_MAX];
|
||||
u32 curr_window, prev_window;
|
||||
u16 active_windows;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct sched_entity {
|
||||
struct load_weight load; /* for load-balancing */
|
||||
struct rb_node run_node;
|
||||
|
@ -1433,6 +1477,15 @@ struct task_struct {
|
|||
const struct sched_class *sched_class;
|
||||
struct sched_entity se;
|
||||
struct sched_rt_entity rt;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct ravg ravg;
|
||||
/*
|
||||
* 'init_load_pct' represents the initial task load assigned to children
|
||||
* of this task
|
||||
*/
|
||||
u32 init_load_pct;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *sched_task_group;
|
||||
#endif
|
||||
|
|
|
@ -43,6 +43,11 @@ extern unsigned int sysctl_sched_is_big_little;
|
|||
extern unsigned int sysctl_sched_sync_hint_enable;
|
||||
extern unsigned int sysctl_sched_initial_task_util;
|
||||
extern unsigned int sysctl_sched_cstate_aware;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
extern unsigned int sysctl_sched_use_walt_task_util;
|
||||
extern unsigned int sysctl_sched_walt_init_task_load_pct;
|
||||
#endif
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
SCHED_TUNABLESCALING_NONE,
|
||||
|
|
|
@ -937,6 +937,155 @@ TRACE_EVENT(sched_tune_filter,
|
|||
__entry->payoff, __entry->region)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct rq;
|
||||
|
||||
TRACE_EVENT(walt_update_task_ravg,
|
||||
|
||||
TP_PROTO(struct task_struct *p, struct rq *rq, int evt,
|
||||
u64 wallclock, u64 irqtime),
|
||||
|
||||
TP_ARGS(p, rq, evt, wallclock, irqtime),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( pid_t, cur_pid )
|
||||
__field(unsigned int, cur_freq )
|
||||
__field( u64, wallclock )
|
||||
__field( u64, mark_start )
|
||||
__field( u64, delta_m )
|
||||
__field( u64, win_start )
|
||||
__field( u64, delta )
|
||||
__field( u64, irqtime )
|
||||
__field( int, evt )
|
||||
__field(unsigned int, demand )
|
||||
__field(unsigned int, sum )
|
||||
__field( int, cpu )
|
||||
__field( u64, cs )
|
||||
__field( u64, ps )
|
||||
__field( u32, curr_window )
|
||||
__field( u32, prev_window )
|
||||
__field( u64, nt_cs )
|
||||
__field( u64, nt_ps )
|
||||
__field( u32, active_windows )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->wallclock = wallclock;
|
||||
__entry->win_start = rq->window_start;
|
||||
__entry->delta = (wallclock - rq->window_start);
|
||||
__entry->evt = evt;
|
||||
__entry->cpu = rq->cpu;
|
||||
__entry->cur_pid = rq->curr->pid;
|
||||
__entry->cur_freq = rq->cur_freq;
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->mark_start = p->ravg.mark_start;
|
||||
__entry->delta_m = (wallclock - p->ravg.mark_start);
|
||||
__entry->demand = p->ravg.demand;
|
||||
__entry->sum = p->ravg.sum;
|
||||
__entry->irqtime = irqtime;
|
||||
__entry->cs = rq->curr_runnable_sum;
|
||||
__entry->ps = rq->prev_runnable_sum;
|
||||
__entry->curr_window = p->ravg.curr_window;
|
||||
__entry->prev_window = p->ravg.prev_window;
|
||||
__entry->nt_cs = rq->nt_curr_runnable_sum;
|
||||
__entry->nt_ps = rq->nt_prev_runnable_sum;
|
||||
__entry->active_windows = p->ravg.active_windows;
|
||||
),
|
||||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
|
||||
" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
|
||||
, __entry->wallclock, __entry->win_start, __entry->delta,
|
||||
__entry->evt, __entry->cpu,
|
||||
__entry->cur_freq, __entry->cur_pid,
|
||||
__entry->pid, __entry->comm, __entry->mark_start,
|
||||
__entry->delta_m, __entry->demand,
|
||||
__entry->sum, __entry->irqtime,
|
||||
__entry->cs, __entry->ps,
|
||||
__entry->curr_window, __entry->prev_window,
|
||||
__entry->nt_cs, __entry->nt_ps,
|
||||
__entry->active_windows
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(walt_update_history,
|
||||
|
||||
TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
|
||||
int evt),
|
||||
|
||||
TP_ARGS(rq, p, runtime, samples, evt),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field(unsigned int, runtime )
|
||||
__field( int, samples )
|
||||
__field( int, evt )
|
||||
__field( u64, demand )
|
||||
__field(unsigned int, walt_avg )
|
||||
__field(unsigned int, pelt_avg )
|
||||
__array( u32, hist, RAVG_HIST_SIZE_MAX)
|
||||
__field( int, cpu )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->runtime = runtime;
|
||||
__entry->samples = samples;
|
||||
__entry->evt = evt;
|
||||
__entry->demand = p->ravg.demand;
|
||||
__entry->walt_avg = (__entry->demand << 10) / walt_ravg_window,
|
||||
__entry->pelt_avg = p->se.avg.util_avg;
|
||||
memcpy(__entry->hist, p->ravg.sum_history,
|
||||
RAVG_HIST_SIZE_MAX * sizeof(u32));
|
||||
__entry->cpu = rq->cpu;
|
||||
),
|
||||
|
||||
TP_printk("%d (%s): runtime %u samples %d event %d demand %llu"
|
||||
" walt %u pelt %u (hist: %u %u %u %u %u) cpu %d",
|
||||
__entry->pid, __entry->comm,
|
||||
__entry->runtime, __entry->samples, __entry->evt,
|
||||
__entry->demand,
|
||||
__entry->walt_avg,
|
||||
__entry->pelt_avg,
|
||||
__entry->hist[0], __entry->hist[1],
|
||||
__entry->hist[2], __entry->hist[3],
|
||||
__entry->hist[4], __entry->cpu)
|
||||
);
|
||||
|
||||
TRACE_EVENT(walt_migration_update_sum,
|
||||
|
||||
TP_PROTO(struct rq *rq, struct task_struct *p),
|
||||
|
||||
TP_ARGS(rq, p),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu )
|
||||
__field(int, pid )
|
||||
__field( u64, cs )
|
||||
__field( u64, ps )
|
||||
__field( s64, nt_cs )
|
||||
__field( s64, nt_ps )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = cpu_of(rq);
|
||||
__entry->cs = rq->curr_runnable_sum;
|
||||
__entry->ps = rq->prev_runnable_sum;
|
||||
__entry->nt_cs = (s64)rq->nt_curr_runnable_sum;
|
||||
__entry->nt_ps = (s64)rq->nt_prev_runnable_sum;
|
||||
__entry->pid = p->pid;
|
||||
),
|
||||
|
||||
TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
|
||||
__entry->cpu, __entry->cs, __entry->ps,
|
||||
__entry->nt_cs, __entry->nt_ps, __entry->pid)
|
||||
);
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
|
|
@ -392,6 +392,15 @@ config IRQ_TIME_ACCOUNTING
|
|||
|
||||
endchoice
|
||||
|
||||
config SCHED_WALT
|
||||
bool "Support window based load tracking"
|
||||
depends on SMP
|
||||
help
|
||||
This feature will allow the scheduler to maintain a tunable window
|
||||
based set of metrics for tasks and runqueues. These metrics can be
|
||||
used to guide task placement as well as task frequency requirements
|
||||
for cpufreq governors.
|
||||
|
||||
config BSD_PROCESS_ACCT
|
||||
bool "BSD Process Accounting"
|
||||
depends on MULTIUSER
|
||||
|
|
|
@ -15,6 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
|
|||
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
|
||||
obj-y += wait.o completion.o idle.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
|
||||
obj-$(CONFIG_SCHED_WALT) += walt.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
|
|
|
@ -89,6 +89,7 @@
|
|||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/sched.h>
|
||||
#include "walt.h"
|
||||
|
||||
DEFINE_MUTEX(sched_domains_mutex);
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
@ -1085,7 +1086,9 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
|
|||
|
||||
dequeue_task(rq, p, 0);
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
double_lock_balance(rq, cpu_rq(new_cpu));
|
||||
set_task_cpu(p, new_cpu);
|
||||
double_unlock_balance(rq, cpu_rq(new_cpu));
|
||||
raw_spin_unlock(&rq->lock);
|
||||
|
||||
rq = cpu_rq(new_cpu);
|
||||
|
@ -1309,6 +1312,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
|||
p->sched_class->migrate_task_rq(p);
|
||||
p->se.nr_migrations++;
|
||||
perf_event_task_migrate(p);
|
||||
|
||||
walt_fixup_busy_time(p, new_cpu);
|
||||
}
|
||||
|
||||
__set_task_cpu(p, new_cpu);
|
||||
|
@ -1937,6 +1942,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
|||
{
|
||||
unsigned long flags;
|
||||
int cpu, success = 0;
|
||||
#ifdef CONFIG_SMP
|
||||
struct rq *rq;
|
||||
u64 wallclock;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we are going to wake up a thread waiting for CONDITION we
|
||||
|
@ -1994,6 +2003,14 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
|||
*/
|
||||
smp_rmb();
|
||||
|
||||
rq = cpu_rq(task_cpu(p));
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
wallclock = walt_ktime_clock();
|
||||
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
|
||||
p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
||||
p->state = TASK_WAKING;
|
||||
|
||||
|
@ -2001,10 +2018,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
|||
p->sched_class->task_waking(p);
|
||||
|
||||
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
|
||||
|
||||
if (task_cpu(p) != cpu) {
|
||||
wake_flags |= WF_MIGRATED;
|
||||
set_task_cpu(p, cpu);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
ttwu_queue(p, cpu);
|
||||
|
@ -2053,8 +2072,13 @@ static void try_to_wake_up_local(struct task_struct *p)
|
|||
|
||||
trace_sched_waking(p);
|
||||
|
||||
if (!task_on_rq_queued(p))
|
||||
if (!task_on_rq_queued(p)) {
|
||||
u64 wallclock = walt_ktime_clock();
|
||||
|
||||
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
|
||||
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
|
||||
}
|
||||
|
||||
ttwu_do_wakeup(rq, p, 0);
|
||||
ttwu_stat(p, smp_processor_id(), 0);
|
||||
|
@ -2120,6 +2144,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
p->se.nr_migrations = 0;
|
||||
p->se.vruntime = 0;
|
||||
INIT_LIST_HEAD(&p->se.group_node);
|
||||
walt_init_new_task_load(p);
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
||||
|
@ -2387,6 +2412,9 @@ void wake_up_new_task(struct task_struct *p)
|
|||
struct rq *rq;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
|
||||
walt_init_new_task_load(p);
|
||||
|
||||
/* Initialize new task's runnable average */
|
||||
init_entity_runnable_average(&p->se);
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -2399,6 +2427,7 @@ void wake_up_new_task(struct task_struct *p)
|
|||
#endif
|
||||
|
||||
rq = __task_rq_lock(p);
|
||||
walt_mark_task_starting(p);
|
||||
activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
trace_sched_wakeup_new(p);
|
||||
|
@ -2948,9 +2977,12 @@ void scheduler_tick(void)
|
|||
sched_clock_tick();
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
walt_set_window_start(rq);
|
||||
update_rq_clock(rq);
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
update_cpu_load_active(rq);
|
||||
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
|
||||
walt_ktime_clock(), 0);
|
||||
calc_global_load_tick(rq);
|
||||
sched_freq_tick(cpu);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
|
@ -3189,6 +3221,7 @@ static void __sched notrace __schedule(bool preempt)
|
|||
unsigned long *switch_count;
|
||||
struct rq *rq;
|
||||
int cpu;
|
||||
u64 wallclock;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
rq = cpu_rq(cpu);
|
||||
|
@ -3250,6 +3283,9 @@ static void __sched notrace __schedule(bool preempt)
|
|||
update_rq_clock(rq);
|
||||
|
||||
next = pick_next_task(rq, prev);
|
||||
wallclock = walt_ktime_clock();
|
||||
walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
|
||||
walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
|
||||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
rq->clock_skip_update = 0;
|
||||
|
@ -5672,6 +5708,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
walt_set_window_start(rq);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq->calc_load_update = calc_load_update;
|
||||
account_reset_rq(rq);
|
||||
break;
|
||||
|
@ -5692,6 +5731,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
sched_ttwu_pending();
|
||||
/* Update our root-domain */
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
walt_migrate_sync_cpu(cpu);
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
set_rq_offline(rq);
|
||||
|
@ -7536,6 +7576,7 @@ void __init sched_init_smp(void)
|
|||
{
|
||||
cpumask_var_t non_isolated_cpus;
|
||||
|
||||
walt_init_cpu_efficiency();
|
||||
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
|
||||
alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
|
||||
|
||||
|
|
|
@ -30,11 +30,13 @@
|
|||
#include <linux/mempolicy.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include "tune.h"
|
||||
#include "walt.h"
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
|
@ -56,6 +58,10 @@ unsigned int sysctl_sched_sync_hint_enable = 1;
|
|||
unsigned int sysctl_sched_initial_task_util = 0;
|
||||
unsigned int sysctl_sched_cstate_aware = 1;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
unsigned int sysctl_sched_use_walt_cpu_util = 1;
|
||||
unsigned int sysctl_sched_use_walt_task_util = 1;
|
||||
#endif
|
||||
/*
|
||||
* The initial- and re-scaling of tunables is configurable
|
||||
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
||||
|
@ -4225,6 +4231,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running++;
|
||||
walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
|
||||
|
||||
flags = ENQUEUE_WAKEUP;
|
||||
}
|
||||
|
@ -4232,6 +4239,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
cfs_rq->h_nr_running++;
|
||||
walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
|
@ -4246,6 +4254,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
#ifdef CONFIG_SMP
|
||||
|
||||
if (!se) {
|
||||
walt_inc_cumulative_runnable_avg(rq, p);
|
||||
if (!task_new && !rq->rd->overutilized &&
|
||||
cpu_overutilized(rq->cpu))
|
||||
rq->rd->overutilized = true;
|
||||
|
@ -4295,6 +4304,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running--;
|
||||
walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
|
||||
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight) {
|
||||
|
@ -4315,6 +4325,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
cfs_rq->h_nr_running--;
|
||||
walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
|
@ -4329,6 +4340,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
#ifdef CONFIG_SMP
|
||||
|
||||
if (!se) {
|
||||
walt_dec_cumulative_runnable_avg(rq, p);
|
||||
|
||||
/*
|
||||
* We want to potentially trigger a freq switch
|
||||
|
@ -5228,6 +5240,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
|
|||
|
||||
static inline unsigned long task_util(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (!walt_disabled && sysctl_sched_use_walt_task_util) {
|
||||
unsigned long demand = p->ravg.demand;
|
||||
return (demand << 10) / walt_ravg_window;
|
||||
}
|
||||
#endif
|
||||
return p->se.avg.util_avg;
|
||||
}
|
||||
|
||||
|
@ -6620,7 +6638,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
|
|||
|
||||
deactivate_task(env->src_rq, p, 0);
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
double_lock_balance(env->src_rq, env->dst_rq);
|
||||
set_task_cpu(p, env->dst_cpu);
|
||||
double_unlock_balance(env->src_rq, env->dst_rq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
int sched_rr_timeslice = RR_TIMESLICE;
|
||||
|
||||
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
|
||||
|
@ -1261,6 +1263,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||
rt_se->timeout = 0;
|
||||
|
||||
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
|
||||
walt_inc_cumulative_runnable_avg(rq, p);
|
||||
|
||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
@ -1272,6 +1275,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||
|
||||
update_curr_rt(rq);
|
||||
dequeue_rt_entity(rt_se);
|
||||
walt_dec_cumulative_runnable_avg(rq, p);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
}
|
||||
|
|
|
@ -410,6 +410,10 @@ struct cfs_rq {
|
|||
struct list_head leaf_cfs_rq_list;
|
||||
struct task_group *tg; /* group that "owns" this runqueue */
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
u64 cumulative_runnable_avg;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
int runtime_enabled;
|
||||
u64 runtime_expires;
|
||||
|
@ -663,6 +667,27 @@ struct rq {
|
|||
u64 max_idle_balance_cost;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/*
|
||||
* max_freq = user or thermal defined maximum
|
||||
* max_possible_freq = maximum supported by hardware
|
||||
*/
|
||||
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
|
||||
struct cpumask freq_domain_cpumask;
|
||||
|
||||
u64 cumulative_runnable_avg;
|
||||
int efficiency; /* Differentiate cpus with different IPC capability */
|
||||
int load_scale_factor;
|
||||
int capacity;
|
||||
int max_possible_capacity;
|
||||
u64 window_start;
|
||||
u64 curr_runnable_sum;
|
||||
u64 prev_runnable_sum;
|
||||
u64 nt_curr_runnable_sum;
|
||||
u64 nt_prev_runnable_sum;
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
u64 prev_irq_time;
|
||||
#endif
|
||||
|
@ -1513,6 +1538,10 @@ static inline unsigned long capacity_orig_of(int cpu)
|
|||
return cpu_rq(cpu)->cpu_capacity_orig;
|
||||
}
|
||||
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
extern unsigned int walt_ravg_window;
|
||||
extern unsigned int walt_disabled;
|
||||
|
||||
/*
|
||||
* cpu_util returns the amount of capacity of a CPU that is used by CFS
|
||||
* tasks. The unit of the return value must be the one of capacity so we can
|
||||
|
@ -1544,6 +1573,11 @@ static inline unsigned long __cpu_util(int cpu, int delta)
|
|||
unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
|
||||
unsigned long capacity = capacity_orig_of(cpu);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
|
||||
util = (cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT) /
|
||||
walt_ravg_window;
|
||||
#endif
|
||||
delta += util;
|
||||
if (delta < 0)
|
||||
return 0;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "sched.h"
|
||||
#include "walt.h"
|
||||
|
||||
/*
|
||||
* stop-task scheduling class.
|
||||
|
@ -42,12 +43,14 @@ static void
|
|||
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
add_nr_running(rq, 1);
|
||||
walt_inc_cumulative_runnable_avg(rq, p);
|
||||
}
|
||||
|
||||
static void
|
||||
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
sub_nr_running(rq, 1);
|
||||
walt_dec_cumulative_runnable_avg(rq, p);
|
||||
}
|
||||
|
||||
static void yield_task_stop(struct rq *rq)
|
||||
|
|
1098
kernel/sched/walt.c
Normal file
1098
kernel/sched/walt.c
Normal file
File diff suppressed because it is too large
Load diff
57
kernel/sched/walt.h
Normal file
57
kernel/sched/walt.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 and
|
||||
* only version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __WALT_H
|
||||
#define __WALT_H
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
|
||||
void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
|
||||
u64 wallclock, u64 irqtime);
|
||||
void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
|
||||
void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
|
||||
void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
|
||||
struct task_struct *p);
|
||||
void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
|
||||
struct task_struct *p);
|
||||
void walt_fixup_busy_time(struct task_struct *p, int new_cpu);
|
||||
void walt_init_new_task_load(struct task_struct *p);
|
||||
void walt_mark_task_starting(struct task_struct *p);
|
||||
void walt_set_window_start(struct rq *rq);
|
||||
void walt_migrate_sync_cpu(int cpu);
|
||||
void walt_init_cpu_efficiency(void);
|
||||
u64 walt_ktime_clock(void);
|
||||
|
||||
#else /* CONFIG_SCHED_WALT */
|
||||
|
||||
static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
|
||||
int event, u64 wallclock, u64 irqtime) { }
|
||||
static inline void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
|
||||
static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
|
||||
static inline void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
|
||||
struct task_struct *p) { }
|
||||
static inline void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
|
||||
struct task_struct *p) { }
|
||||
static inline void walt_fixup_busy_time(struct task_struct *p, int new_cpu) { }
|
||||
static inline void walt_init_new_task_load(struct task_struct *p) { }
|
||||
static inline void walt_mark_task_starting(struct task_struct *p) { }
|
||||
static inline void walt_set_window_start(struct rq *rq) { }
|
||||
static inline void walt_migrate_sync_cpu(int cpu) { }
|
||||
static inline void walt_init_cpu_efficiency(void) { }
|
||||
static inline u64 walt_ktime_clock(void) { return 0; }
|
||||
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
extern unsigned int walt_disabled;
|
||||
|
||||
#endif
|
|
@ -311,6 +311,29 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
{
|
||||
.procname = "sched_use_walt_cpu_util",
|
||||
.data = &sysctl_sched_use_walt_cpu_util,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_use_walt_task_util",
|
||||
.data = &sysctl_sched_use_walt_task_util,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_walt_init_task_load_pct",
|
||||
.data = &sysctl_sched_walt_init_task_load_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "sched_sync_hint_enable",
|
||||
.data = &sysctl_sched_sync_hint_enable,
|
||||
|
|
Loading…
Add table
Reference in a new issue