sched: Aggregate for frequency
Related threads in a group could execute on different CPUs and hence present a split-demand picture to cpufreq governor. IOW the governor fails to see the net cpu demand of all related threads in a given window if the threads's execution were to be split across CPUs. That could result in sub-optimal frequency chosen in comparison to the ideal frequency at which the aggregate work (taken up by related threads) needs to be run. This patch aggregates cpu execution stats in a window for all related threads in a group. This helps present cpu busy time to governor as if all related threads were part of the same thread and thus help select the right frequency required by related threads. This aggregation is done per-cluster. Change-Id: I71e6047620066323721c6d542034ddd4b2950e7f Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org> [joonwoop@codeaurora.org: Fixed notify_migration() to hold rcu read lock as this version of Linux doesn't hold p->pi_lock when the function gets called while keeping use of rcu_access_pointer() since we never dereference return value.] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
parent
450ba6dd70
commit
e6aae1c3e0
8 changed files with 762 additions and 156 deletions
|
@ -328,6 +328,16 @@ enum task_event {
|
|||
IRQ_UPDATE = 5,
|
||||
};
|
||||
|
||||
/* Note: this need to be in sync with migrate_type_names array */
|
||||
enum migrate_types {
|
||||
GROUP_TO_RQ,
|
||||
RQ_TO_GROUP,
|
||||
RQ_TO_RQ,
|
||||
GROUP_TO_GROUP,
|
||||
};
|
||||
|
||||
extern const char *migrate_type_names[];
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -75,6 +75,7 @@ extern unsigned int sysctl_sched_restrict_cluster_spill;
|
|||
#if defined(CONFIG_SCHED_FREQ_INPUT)
|
||||
extern unsigned int sysctl_sched_new_task_windows;
|
||||
extern unsigned int sysctl_sched_pred_alert_freq;
|
||||
extern unsigned int sysctl_sched_freq_aggregate;
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include <linux/binfmts.h>
|
||||
|
||||
struct rq;
|
||||
struct group_cpu_time;
|
||||
struct migration_sum_data;
|
||||
extern const char *task_event_names[];
|
||||
|
||||
/*
|
||||
|
@ -269,9 +271,10 @@ TRACE_EVENT(sched_set_boost,
|
|||
TRACE_EVENT(sched_update_task_ravg,
|
||||
|
||||
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
|
||||
u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time),
|
||||
u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time,
|
||||
struct group_cpu_time *cpu_time),
|
||||
|
||||
TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time),
|
||||
TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time, cpu_time),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
|
@ -290,8 +293,12 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
__field( int, cpu )
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
__field(unsigned int, pred_demand )
|
||||
__field( u64, cs )
|
||||
__field( u64, ps )
|
||||
__field( u64, rq_cs )
|
||||
__field( u64, rq_ps )
|
||||
__field( u64, grp_cs )
|
||||
__field( u64, grp_ps )
|
||||
__field( u64, grp_nt_cs )
|
||||
__field( u64, grp_nt_ps )
|
||||
__field( u32, curr_window )
|
||||
__field( u32, prev_window )
|
||||
__field( u64, nt_cs )
|
||||
|
@ -318,8 +325,12 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
__entry->irqtime = irqtime;
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
__entry->pred_demand = p->ravg.pred_demand;
|
||||
__entry->cs = rq->curr_runnable_sum;
|
||||
__entry->ps = rq->prev_runnable_sum;
|
||||
__entry->rq_cs = rq->curr_runnable_sum;
|
||||
__entry->rq_ps = rq->prev_runnable_sum;
|
||||
__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
|
||||
__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
|
||||
__entry->grp_nt_cs = cpu_time ? cpu_time->nt_curr_runnable_sum : 0;
|
||||
__entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0;
|
||||
__entry->curr_window = p->ravg.curr_window;
|
||||
__entry->prev_window = p->ravg.prev_window;
|
||||
__entry->nt_cs = rq->nt_curr_runnable_sum;
|
||||
|
@ -330,7 +341,7 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
" pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
|
||||
" pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu"
|
||||
#endif
|
||||
, __entry->wallclock, __entry->win_start, __entry->delta,
|
||||
task_event_names[__entry->evt], __entry->cpu,
|
||||
|
@ -339,10 +350,12 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
__entry->delta_m, __entry->demand,
|
||||
__entry->sum, __entry->irqtime
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
, __entry->pred_demand, __entry->cs, __entry->ps,
|
||||
, __entry->pred_demand, __entry->rq_cs, __entry->rq_ps,
|
||||
__entry->curr_window, __entry->prev_window,
|
||||
__entry->nt_cs, __entry->nt_ps,
|
||||
__entry->active_windows
|
||||
__entry->active_windows,
|
||||
__entry->grp_cs, __entry->grp_ps,
|
||||
__entry->grp_nt_cs, __entry->grp_nt_ps
|
||||
#endif
|
||||
)
|
||||
);
|
||||
|
@ -506,31 +519,62 @@ TRACE_EVENT(sched_update_pred_demand,
|
|||
|
||||
TRACE_EVENT(sched_migration_update_sum,
|
||||
|
||||
TP_PROTO(struct rq *rq, struct task_struct *p),
|
||||
TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct migration_sum_data *d),
|
||||
|
||||
TP_ARGS(rq, p),
|
||||
TP_ARGS(p, migrate_type, d),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu )
|
||||
__field(int, tcpu )
|
||||
__field(int, pid )
|
||||
__field( u64, cs )
|
||||
__field( u64, ps )
|
||||
__field( s64, nt_cs )
|
||||
__field( s64, nt_ps )
|
||||
__field(enum migrate_types, migrate_type )
|
||||
__field( s64, src_cs )
|
||||
__field( s64, src_ps )
|
||||
__field( s64, dst_cs )
|
||||
__field( s64, dst_ps )
|
||||
__field( s64, src_nt_cs )
|
||||
__field( s64, src_nt_ps )
|
||||
__field( s64, dst_nt_cs )
|
||||
__field( s64, dst_nt_ps )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = cpu_of(rq);
|
||||
__entry->cs = rq->curr_runnable_sum;
|
||||
__entry->ps = rq->prev_runnable_sum;
|
||||
__entry->nt_cs = (s64)rq->nt_curr_runnable_sum;
|
||||
__entry->nt_ps = (s64)rq->nt_prev_runnable_sum;
|
||||
__entry->tcpu = task_cpu(p);
|
||||
__entry->pid = p->pid;
|
||||
__entry->migrate_type = migrate_type;
|
||||
__entry->src_cs = d->src_rq ?
|
||||
d->src_rq->curr_runnable_sum :
|
||||
d->src_cpu_time->curr_runnable_sum;
|
||||
__entry->src_ps = d->src_rq ?
|
||||
d->src_rq->prev_runnable_sum :
|
||||
d->src_cpu_time->prev_runnable_sum;
|
||||
__entry->dst_cs = d->dst_rq ?
|
||||
d->dst_rq->curr_runnable_sum :
|
||||
d->dst_cpu_time->curr_runnable_sum;
|
||||
__entry->dst_ps = d->dst_rq ?
|
||||
d->dst_rq->prev_runnable_sum :
|
||||
d->dst_cpu_time->prev_runnable_sum;
|
||||
__entry->src_nt_cs = d->src_rq ?
|
||||
d->src_rq->nt_curr_runnable_sum :
|
||||
d->src_cpu_time->nt_curr_runnable_sum;
|
||||
__entry->src_nt_ps = d->src_rq ?
|
||||
d->src_rq->nt_prev_runnable_sum :
|
||||
d->src_cpu_time->nt_prev_runnable_sum;
|
||||
__entry->dst_nt_cs = d->dst_rq ?
|
||||
d->dst_rq->nt_curr_runnable_sum :
|
||||
d->dst_cpu_time->nt_curr_runnable_sum;
|
||||
__entry->dst_nt_ps = d->dst_rq ?
|
||||
d->dst_rq->nt_prev_runnable_sum :
|
||||
d->dst_cpu_time->nt_prev_runnable_sum;
|
||||
),
|
||||
|
||||
TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
|
||||
__entry->cpu, __entry->cs, __entry->ps,
|
||||
__entry->nt_cs, __entry->nt_ps, __entry->pid)
|
||||
TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
|
||||
__entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type],
|
||||
__entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps,
|
||||
__entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_get_busy,
|
||||
|
@ -562,15 +606,17 @@ TRACE_EVENT(sched_get_busy,
|
|||
|
||||
TRACE_EVENT(sched_freq_alert,
|
||||
|
||||
TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load,
|
||||
u64 old_pred, u64 new_pred),
|
||||
TP_PROTO(int cpu, int pd_notif, int check_groups, struct rq *rq,
|
||||
u64 new_load),
|
||||
|
||||
TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred),
|
||||
TP_ARGS(cpu, pd_notif, check_groups, rq, new_load),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, cpu )
|
||||
__field( int, pd_notif )
|
||||
__field( u64, old_load )
|
||||
__field( int, check_groups )
|
||||
__field( u64, old_busy_time )
|
||||
__field( u64, ps )
|
||||
__field( u64, new_load )
|
||||
__field( u64, old_pred )
|
||||
__field( u64, new_pred )
|
||||
|
@ -579,17 +625,18 @@ TRACE_EVENT(sched_freq_alert,
|
|||
TP_fast_assign(
|
||||
__entry->cpu = cpu;
|
||||
__entry->pd_notif = pd_notif;
|
||||
__entry->old_load = old_load;
|
||||
__entry->check_groups = check_groups;
|
||||
__entry->old_busy_time = rq->old_busy_time;
|
||||
__entry->ps = rq->prev_runnable_sum;
|
||||
__entry->new_load = new_load;
|
||||
__entry->old_pred = old_pred;
|
||||
__entry->new_pred = new_pred;
|
||||
__entry->old_pred = rq->old_estimated_time;
|
||||
__entry->new_pred = rq->hmp_stats.pred_demands_sum;
|
||||
),
|
||||
|
||||
TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu "
|
||||
"old_pred=%llu new_pred=%llu",
|
||||
__entry->cpu, __entry->pd_notif, __entry->old_load,
|
||||
__entry->new_load, __entry->old_pred,
|
||||
__entry->new_pred)
|
||||
TP_printk("cpu %d pd_notif=%d check_groups %d old_busy_time=%llu prev_sum=%lld new_load=%llu old_pred=%llu new_pred=%llu",
|
||||
__entry->cpu, __entry->pd_notif, __entry->check_groups,
|
||||
__entry->old_busy_time, __entry->ps, __entry->new_load,
|
||||
__entry->old_pred, __entry->new_pred)
|
||||
);
|
||||
|
||||
#endif /* CONFIG_SCHED_FREQ_INPUT */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -32,9 +32,8 @@
|
|||
#include <linux/task_work.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
|
@ -4059,6 +4058,9 @@ static inline int invalid_value_freq_input(unsigned int *data)
|
|||
if (data == &sysctl_sched_freq_account_wait_time)
|
||||
return !(*data == 0 || *data == 1);
|
||||
|
||||
if (data == &sysctl_sched_freq_aggregate)
|
||||
return !(*data == 0 || *data == 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
@ -7674,6 +7676,7 @@ enum fbq_type { regular, remote, all };
|
|||
LBF_BIG_TASK_ACTIVE_BALANCE)
|
||||
#define LBF_IGNORE_BIG_TASKS 0x100
|
||||
#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
|
||||
#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400
|
||||
|
||||
struct lb_env {
|
||||
struct sched_domain *sd;
|
||||
|
@ -7916,6 +7919,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
|
|||
deactivate_task(env->src_rq, p, 0);
|
||||
double_lock_balance(env->src_rq, env->dst_rq);
|
||||
set_task_cpu(p, env->dst_cpu);
|
||||
if (rcu_access_pointer(p->grp))
|
||||
env->flags |= LBF_MOVED_RELATED_THREAD_GROUP_TASK;
|
||||
double_unlock_balance(env->src_rq, env->dst_rq);
|
||||
}
|
||||
|
||||
|
@ -9575,10 +9580,13 @@ no_move:
|
|||
|
||||
/* Assumes one 'busiest' cpu that we pulled tasks from */
|
||||
if (!same_freq_domain(this_cpu, cpu_of(busiest))) {
|
||||
check_for_freq_change(this_rq, false);
|
||||
check_for_freq_change(busiest, false);
|
||||
int check_groups = !!(env.flags &
|
||||
LBF_MOVED_RELATED_THREAD_GROUP_TASK);
|
||||
|
||||
check_for_freq_change(this_rq, false, check_groups);
|
||||
check_for_freq_change(busiest, false, check_groups);
|
||||
} else {
|
||||
check_for_freq_change(this_rq, true);
|
||||
check_for_freq_change(this_rq, true, false);
|
||||
}
|
||||
}
|
||||
if (likely(!active_balance)) {
|
||||
|
@ -9876,10 +9884,12 @@ out_unlock:
|
|||
local_irq_enable();
|
||||
|
||||
if (moved && !same_freq_domain(busiest_cpu, target_cpu)) {
|
||||
check_for_freq_change(busiest_rq, false);
|
||||
check_for_freq_change(target_rq, false);
|
||||
int check_groups = !!(env.flags &
|
||||
LBF_MOVED_RELATED_THREAD_GROUP_TASK);
|
||||
check_for_freq_change(busiest_rq, false, check_groups);
|
||||
check_for_freq_change(target_rq, false, check_groups);
|
||||
} else if (moved) {
|
||||
check_for_freq_change(target_rq, true);
|
||||
check_for_freq_change(target_rq, true, false);
|
||||
}
|
||||
|
||||
if (per_cpu(dbs_boost_needed, target_cpu)) {
|
||||
|
|
|
@ -409,6 +409,16 @@ struct related_thread_group {
|
|||
struct sched_cluster *preferred_cluster;
|
||||
struct rcu_head rcu;
|
||||
u64 last_update;
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
struct group_cpu_time __percpu *cpu_time; /* one per cluster */
|
||||
#endif
|
||||
};
|
||||
|
||||
struct migration_sum_data {
|
||||
struct rq *src_rq, *dst_rq;
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
struct group_cpu_time *src_cpu_time, *dst_cpu_time;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern struct list_head cluster_head;
|
||||
|
@ -741,7 +751,7 @@ struct rq {
|
|||
struct task_struct *ed_task;
|
||||
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
unsigned int old_busy_time;
|
||||
u64 old_busy_time, old_busy_time_group;
|
||||
int notifier_sent;
|
||||
u64 old_estimated_time;
|
||||
#endif
|
||||
|
@ -1337,7 +1347,16 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,
|
|||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
#define PRED_DEMAND_DELTA ((s64)new_pred_demand - p->ravg.pred_demand)
|
||||
|
||||
extern void check_for_freq_change(struct rq *rq, bool check_cra);
|
||||
extern void
|
||||
check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups);
|
||||
|
||||
struct group_cpu_time {
|
||||
u64 curr_runnable_sum;
|
||||
u64 prev_runnable_sum;
|
||||
u64 nt_curr_runnable_sum;
|
||||
u64 nt_prev_runnable_sum;
|
||||
u64 window_start;
|
||||
};
|
||||
|
||||
/* Is frequency of two cpus synchronized with each other? */
|
||||
static inline int same_freq_domain(int src_cpu, int dst_cpu)
|
||||
|
@ -1355,7 +1374,8 @@ static inline int same_freq_domain(int src_cpu, int dst_cpu)
|
|||
#define sched_migration_fixup 0
|
||||
#define PRED_DEMAND_DELTA (0)
|
||||
|
||||
static inline void check_for_freq_change(struct rq *rq, bool check_cra) { }
|
||||
static inline void
|
||||
check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups) { }
|
||||
|
||||
static inline int same_freq_domain(int src_cpu, int dst_cpu)
|
||||
{
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
#include <linux/hrtimer.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/math64.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
static DEFINE_PER_CPU(u64, nr_prod_sum);
|
||||
static DEFINE_PER_CPU(u64, last_time);
|
||||
|
|
|
@ -472,6 +472,13 @@ static struct ctl_table kern_table[] = {
|
|||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
{
|
||||
.procname = "sched_freq_aggregate",
|
||||
.data = &sysctl_sched_freq_aggregate,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_window_update_handler,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "sched_boost",
|
||||
|
|
Loading…
Add table
Reference in a new issue