sched: Introduce Window Assisted Load Tracking (WALT)

use a window based view of time in order to track task
demand and CPU utilization in the scheduler.

Window Assisted Load Tracking (WALT) implementation credits:
 Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park,
 Pavan Kumar Kondeti, Olav Haugan

2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla
            and Todd Kjos

Change-Id: I21408236836625d4e7d7de1843d20ed5ff36c708

Includes fixes for issues:

eas/walt: Use walt_ktime_clock() instead of ktime_get_ns() to avoid a
race resulting in watchdog resets
BUG: 29353986
Change-Id: Ic1820e22a136f7c7ebd6f42e15f14d470f6bbbdb

Handle walt accounting anomoly during resume

During resume, there is a corner case where on wakeup, a task's
prev_runnable_sum can go negative. This is a workaround that
fixes the condition and warns (instead of crashing).

BUG: 29464099
Change-Id: I173e7874324b31a3584435530281708145773508

Signed-off-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
This commit is contained in:
Srivatsa Vaddagiri 2016-05-31 09:08:38 -07:00 committed by Amit Pundir
parent 3a7e623182
commit b41fa2aec5
13 changed files with 1498 additions and 1 deletions

View file

@ -317,6 +317,15 @@ extern char ___assert_task_state[1 - 2*!!(
/* Task command name length */
#define TASK_COMM_LEN 16
enum task_event {
PUT_PREV_TASK = 0,
PICK_NEXT_TASK = 1,
TASK_WAKE = 2,
TASK_MIGRATE = 3,
TASK_UPDATE = 4,
IRQ_UPDATE = 5,
};
#include <linux/spinlock.h>
/*
@ -1276,6 +1285,41 @@ struct sched_statistics {
};
#endif
#ifdef CONFIG_SCHED_WALT
#define RAVG_HIST_SIZE_MAX 5
/* ravg represents frequency scaled cpu-demand of tasks */
struct ravg {
/*
* 'mark_start' marks the beginning of an event (task waking up, task
* starting to execute, task being preempted) within a window
*
* 'sum' represents how runnable a task has been within current
* window. It incorporates both running time and wait time and is
* frequency scaled.
*
* 'sum_history' keeps track of history of 'sum' seen over previous
* RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
* ignored.
*
* 'demand' represents maximum sum seen over previous
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
* demand for tasks.
*
* 'curr_window' represents task's contribution to cpu busy time
* statistics (rq->curr_runnable_sum) in current window
*
* 'prev_window' represents task's contribution to cpu busy time
* statistics (rq->prev_runnable_sum) in previous window
*/
u64 mark_start;
u32 sum, demand;
u32 sum_history[RAVG_HIST_SIZE_MAX];
u32 curr_window, prev_window;
u16 active_windows;
};
#endif
struct sched_entity {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
@ -1433,6 +1477,15 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
#ifdef CONFIG_SCHED_WALT
struct ravg ravg;
/*
* 'init_load_pct' represents the initial task load assigned to children
* of this task
*/
u32 init_load_pct;
#endif
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif

View file

@ -43,6 +43,11 @@ extern unsigned int sysctl_sched_is_big_little;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_initial_task_util;
extern unsigned int sysctl_sched_cstate_aware;
#ifdef CONFIG_SCHED_WALT
extern unsigned int sysctl_sched_use_walt_cpu_util;
extern unsigned int sysctl_sched_use_walt_task_util;
extern unsigned int sysctl_sched_walt_init_task_load_pct;
#endif
enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,

View file

@ -937,6 +937,155 @@ TRACE_EVENT(sched_tune_filter,
__entry->payoff, __entry->region)
);
#ifdef CONFIG_SCHED_WALT
struct rq;
TRACE_EVENT(walt_update_task_ravg,
TP_PROTO(struct task_struct *p, struct rq *rq, int evt,
u64 wallclock, u64 irqtime),
TP_ARGS(p, rq, evt, wallclock, irqtime),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( pid_t, cur_pid )
__field(unsigned int, cur_freq )
__field( u64, wallclock )
__field( u64, mark_start )
__field( u64, delta_m )
__field( u64, win_start )
__field( u64, delta )
__field( u64, irqtime )
__field( int, evt )
__field(unsigned int, demand )
__field(unsigned int, sum )
__field( int, cpu )
__field( u64, cs )
__field( u64, ps )
__field( u32, curr_window )
__field( u32, prev_window )
__field( u64, nt_cs )
__field( u64, nt_ps )
__field( u32, active_windows )
),
TP_fast_assign(
__entry->wallclock = wallclock;
__entry->win_start = rq->window_start;
__entry->delta = (wallclock - rq->window_start);
__entry->evt = evt;
__entry->cpu = rq->cpu;
__entry->cur_pid = rq->curr->pid;
__entry->cur_freq = rq->cur_freq;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->ravg.mark_start;
__entry->delta_m = (wallclock - p->ravg.mark_start);
__entry->demand = p->ravg.demand;
__entry->sum = p->ravg.sum;
__entry->irqtime = irqtime;
__entry->cs = rq->curr_runnable_sum;
__entry->ps = rq->prev_runnable_sum;
__entry->curr_window = p->ravg.curr_window;
__entry->prev_window = p->ravg.prev_window;
__entry->nt_cs = rq->nt_curr_runnable_sum;
__entry->nt_ps = rq->nt_prev_runnable_sum;
__entry->active_windows = p->ravg.active_windows;
),
TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
, __entry->wallclock, __entry->win_start, __entry->delta,
__entry->evt, __entry->cpu,
__entry->cur_freq, __entry->cur_pid,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand,
__entry->sum, __entry->irqtime,
__entry->cs, __entry->ps,
__entry->curr_window, __entry->prev_window,
__entry->nt_cs, __entry->nt_ps,
__entry->active_windows
)
);
TRACE_EVENT(walt_update_history,
TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
int evt),
TP_ARGS(rq, p, runtime, samples, evt),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field(unsigned int, runtime )
__field( int, samples )
__field( int, evt )
__field( u64, demand )
__field(unsigned int, walt_avg )
__field(unsigned int, pelt_avg )
__array( u32, hist, RAVG_HIST_SIZE_MAX)
__field( int, cpu )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->runtime = runtime;
__entry->samples = samples;
__entry->evt = evt;
__entry->demand = p->ravg.demand;
__entry->walt_avg = (__entry->demand << 10) / walt_ravg_window,
__entry->pelt_avg = p->se.avg.util_avg;
memcpy(__entry->hist, p->ravg.sum_history,
RAVG_HIST_SIZE_MAX * sizeof(u32));
__entry->cpu = rq->cpu;
),
TP_printk("%d (%s): runtime %u samples %d event %d demand %llu"
" walt %u pelt %u (hist: %u %u %u %u %u) cpu %d",
__entry->pid, __entry->comm,
__entry->runtime, __entry->samples, __entry->evt,
__entry->demand,
__entry->walt_avg,
__entry->pelt_avg,
__entry->hist[0], __entry->hist[1],
__entry->hist[2], __entry->hist[3],
__entry->hist[4], __entry->cpu)
);
TRACE_EVENT(walt_migration_update_sum,
TP_PROTO(struct rq *rq, struct task_struct *p),
TP_ARGS(rq, p),
TP_STRUCT__entry(
__field(int, cpu )
__field(int, pid )
__field( u64, cs )
__field( u64, ps )
__field( s64, nt_cs )
__field( s64, nt_ps )
),
TP_fast_assign(
__entry->cpu = cpu_of(rq);
__entry->cs = rq->curr_runnable_sum;
__entry->ps = rq->prev_runnable_sum;
__entry->nt_cs = (s64)rq->nt_curr_runnable_sum;
__entry->nt_ps = (s64)rq->nt_prev_runnable_sum;
__entry->pid = p->pid;
),
TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
__entry->cpu, __entry->cs, __entry->ps,
__entry->nt_cs, __entry->nt_ps, __entry->pid)
);
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_SMP */
#endif /* _TRACE_SCHED_H */

View file

@ -392,6 +392,15 @@ config IRQ_TIME_ACCOUNTING
endchoice
config SCHED_WALT
bool "Support window based load tracking"
depends on SMP
help
This feature will allow the scheduler to maintain a tunable window
based set of metrics for tasks and runqueues. These metrics can be
used to guide task placement as well as task frequency requirements
for cpufreq governors.
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER

View file

@ -15,6 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o idle.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o

View file

@ -89,6 +89,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
#include "walt.h"
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@ -1085,7 +1086,9 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
double_lock_balance(rq, cpu_rq(new_cpu));
set_task_cpu(p, new_cpu);
double_unlock_balance(rq, cpu_rq(new_cpu));
raw_spin_unlock(&rq->lock);
rq = cpu_rq(new_cpu);
@ -1309,6 +1312,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->sched_class->migrate_task_rq(p);
p->se.nr_migrations++;
perf_event_task_migrate(p);
walt_fixup_busy_time(p, new_cpu);
}
__set_task_cpu(p, new_cpu);
@ -1937,6 +1942,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
unsigned long flags;
int cpu, success = 0;
#ifdef CONFIG_SMP
struct rq *rq;
u64 wallclock;
#endif
/*
* If we are going to wake up a thread waiting for CONDITION we
@ -1994,6 +2003,14 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_rmb();
rq = cpu_rq(task_cpu(p));
raw_spin_lock(&rq->lock);
wallclock = walt_ktime_clock();
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
raw_spin_unlock(&rq->lock);
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
@ -2001,10 +2018,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
p->sched_class->task_waking(p);
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
if (task_cpu(p) != cpu) {
wake_flags |= WF_MIGRATED;
set_task_cpu(p, cpu);
}
#endif /* CONFIG_SMP */
ttwu_queue(p, cpu);
@ -2053,8 +2072,13 @@ static void try_to_wake_up_local(struct task_struct *p)
trace_sched_waking(p);
if (!task_on_rq_queued(p))
if (!task_on_rq_queued(p)) {
u64 wallclock = walt_ktime_clock();
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
}
ttwu_do_wakeup(rq, p, 0);
ttwu_stat(p, smp_processor_id(), 0);
@ -2120,6 +2144,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0;
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
walt_init_new_task_load(p);
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@ -2387,6 +2412,9 @@ void wake_up_new_task(struct task_struct *p)
struct rq *rq;
raw_spin_lock_irqsave(&p->pi_lock, flags);
walt_init_new_task_load(p);
/* Initialize new task's runnable average */
init_entity_runnable_average(&p->se);
#ifdef CONFIG_SMP
@ -2399,6 +2427,7 @@ void wake_up_new_task(struct task_struct *p)
#endif
rq = __task_rq_lock(p);
walt_mark_task_starting(p);
activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p);
@ -2948,9 +2977,12 @@ void scheduler_tick(void)
sched_clock_tick();
raw_spin_lock(&rq->lock);
walt_set_window_start(rq);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
update_cpu_load_active(rq);
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
walt_ktime_clock(), 0);
calc_global_load_tick(rq);
sched_freq_tick(cpu);
raw_spin_unlock(&rq->lock);
@ -3189,6 +3221,7 @@ static void __sched notrace __schedule(bool preempt)
unsigned long *switch_count;
struct rq *rq;
int cpu;
u64 wallclock;
cpu = smp_processor_id();
rq = cpu_rq(cpu);
@ -3250,6 +3283,9 @@ static void __sched notrace __schedule(bool preempt)
update_rq_clock(rq);
next = pick_next_task(rq, prev);
wallclock = walt_ktime_clock();
walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
rq->clock_skip_update = 0;
@ -5672,6 +5708,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
raw_spin_lock_irqsave(&rq->lock, flags);
walt_set_window_start(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
break;
@ -5692,6 +5731,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
walt_migrate_sync_cpu(cpu);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
@ -7536,6 +7576,7 @@ void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
walt_init_cpu_efficiency();
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
alloc_cpumask_var(&fallback_doms, GFP_KERNEL);

View file

@ -30,11 +30,13 @@
#include <linux/mempolicy.h>
#include <linux/migrate.h>
#include <linux/task_work.h>
#include <linux/module.h>
#include <trace/events/sched.h>
#include "sched.h"
#include "tune.h"
#include "walt.h"
/*
* Targeted preemption latency for CPU-bound tasks:
@ -56,6 +58,10 @@ unsigned int sysctl_sched_sync_hint_enable = 1;
unsigned int sysctl_sched_initial_task_util = 0;
unsigned int sysctl_sched_cstate_aware = 1;
#ifdef CONFIG_SCHED_WALT
unsigned int sysctl_sched_use_walt_cpu_util = 1;
unsigned int sysctl_sched_use_walt_task_util = 1;
#endif
/*
* The initial- and re-scaling of tunables is configurable
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@ -4225,6 +4231,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running++;
walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
flags = ENQUEUE_WAKEUP;
}
@ -4232,6 +4239,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running++;
walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
if (cfs_rq_throttled(cfs_rq))
break;
@ -4246,6 +4254,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
if (!se) {
walt_inc_cumulative_runnable_avg(rq, p);
if (!task_new && !rq->rd->overutilized &&
cpu_overutilized(rq->cpu))
rq->rd->overutilized = true;
@ -4295,6 +4304,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running--;
walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) {
@ -4315,6 +4325,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running--;
walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
if (cfs_rq_throttled(cfs_rq))
break;
@ -4329,6 +4340,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
if (!se) {
walt_dec_cumulative_runnable_avg(rq, p);
/*
* We want to potentially trigger a freq switch
@ -5228,6 +5240,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
static inline unsigned long task_util(struct task_struct *p)
{
#ifdef CONFIG_SCHED_WALT
if (!walt_disabled && sysctl_sched_use_walt_task_util) {
unsigned long demand = p->ravg.demand;
return (demand << 10) / walt_ravg_window;
}
#endif
return p->se.avg.util_avg;
}
@ -6620,7 +6638,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
double_lock_balance(env->src_rq, env->dst_rq);
set_task_cpu(p, env->dst_cpu);
double_unlock_balance(env->src_rq, env->dst_rq);
}
/*

View file

@ -8,6 +8,8 @@
#include <linux/slab.h>
#include <linux/irq_work.h>
#include "walt.h"
int sched_rr_timeslice = RR_TIMESLICE;
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
@ -1261,6 +1263,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
rt_se->timeout = 0;
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
walt_inc_cumulative_runnable_avg(rq, p);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
@ -1272,6 +1275,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
update_curr_rt(rq);
dequeue_rt_entity(rt_se);
walt_dec_cumulative_runnable_avg(rq, p);
dequeue_pushable_task(rq, p);
}

View file

@ -410,6 +410,10 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_SCHED_WALT
u64 cumulative_runnable_avg;
#endif
#ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled;
u64 runtime_expires;
@ -663,6 +667,27 @@ struct rq {
u64 max_idle_balance_cost;
#endif
#ifdef CONFIG_SCHED_WALT
/*
* max_freq = user or thermal defined maximum
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
struct cpumask freq_domain_cpumask;
u64 cumulative_runnable_avg;
int efficiency; /* Differentiate cpus with different IPC capability */
int load_scale_factor;
int capacity;
int max_possible_capacity;
u64 window_start;
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
#endif /* CONFIG_SCHED_WALT */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
#endif
@ -1513,6 +1538,10 @@ static inline unsigned long capacity_orig_of(int cpu)
return cpu_rq(cpu)->cpu_capacity_orig;
}
extern unsigned int sysctl_sched_use_walt_cpu_util;
extern unsigned int walt_ravg_window;
extern unsigned int walt_disabled;
/*
* cpu_util returns the amount of capacity of a CPU that is used by CFS
* tasks. The unit of the return value must be the one of capacity so we can
@ -1544,6 +1573,11 @@ static inline unsigned long __cpu_util(int cpu, int delta)
unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
unsigned long capacity = capacity_orig_of(cpu);
#ifdef CONFIG_SCHED_WALT
if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
util = (cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT) /
walt_ravg_window;
#endif
delta += util;
if (delta < 0)
return 0;

View file

@ -1,4 +1,5 @@
#include "sched.h"
#include "walt.h"
/*
* stop-task scheduling class.
@ -42,12 +43,14 @@ static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
add_nr_running(rq, 1);
walt_inc_cumulative_runnable_avg(rq, p);
}
static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
sub_nr_running(rq, 1);
walt_dec_cumulative_runnable_avg(rq, p);
}
static void yield_task_stop(struct rq *rq)

1098
kernel/sched/walt.c Normal file

File diff suppressed because it is too large Load diff

57
kernel/sched/walt.h Normal file
View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef __WALT_H
#define __WALT_H
#ifdef CONFIG_SCHED_WALT
void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
u64 wallclock, u64 irqtime);
void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
struct task_struct *p);
void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
struct task_struct *p);
void walt_fixup_busy_time(struct task_struct *p, int new_cpu);
void walt_init_new_task_load(struct task_struct *p);
void walt_mark_task_starting(struct task_struct *p);
void walt_set_window_start(struct rq *rq);
void walt_migrate_sync_cpu(int cpu);
void walt_init_cpu_efficiency(void);
u64 walt_ktime_clock(void);
#else /* CONFIG_SCHED_WALT */
static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
int event, u64 wallclock, u64 irqtime) { }
static inline void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
static inline void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
struct task_struct *p) { }
static inline void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
struct task_struct *p) { }
static inline void walt_fixup_busy_time(struct task_struct *p, int new_cpu) { }
static inline void walt_init_new_task_load(struct task_struct *p) { }
static inline void walt_mark_task_starting(struct task_struct *p) { }
static inline void walt_set_window_start(struct rq *rq) { }
static inline void walt_migrate_sync_cpu(int cpu) { }
static inline void walt_init_cpu_efficiency(void) { }
static inline u64 walt_ktime_clock(void) { return 0; }
#endif /* CONFIG_SCHED_WALT */
extern unsigned int walt_disabled;
#endif

View file

@ -311,6 +311,29 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHED_WALT
{
.procname = "sched_use_walt_cpu_util",
.data = &sysctl_sched_use_walt_cpu_util,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_use_walt_task_util",
.data = &sysctl_sched_use_walt_task_util,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_walt_init_task_load_pct",
.data = &sysctl_sched_walt_init_task_load_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
{
.procname = "sched_sync_hint_enable",
.data = &sysctl_sched_sync_hint_enable,