Merge "sched: Optimize the next top task search logic upon task migration"
This commit is contained in:
commit
268d4e5d68
9 changed files with 692 additions and 133 deletions
|
@ -356,7 +356,7 @@ extern int lockdep_tasklist_lock_is_held(void);
|
|||
extern void sched_init(void);
|
||||
extern void sched_init_smp(void);
|
||||
extern asmlinkage void schedule_tail(struct task_struct *prev);
|
||||
extern void init_idle(struct task_struct *idle, int cpu);
|
||||
extern void init_idle(struct task_struct *idle, int cpu, bool hotplug);
|
||||
extern void init_idle_bootup_task(struct task_struct *idle);
|
||||
|
||||
extern cpumask_var_t cpu_isolated_map;
|
||||
|
@ -1332,11 +1332,15 @@ struct ravg {
|
|||
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
|
||||
* demand for tasks.
|
||||
*
|
||||
* 'curr_window' represents task's contribution to cpu busy time
|
||||
* statistics (rq->curr_runnable_sum) in current window
|
||||
* 'curr_window_cpu' represents task's contribution to cpu busy time on
|
||||
* various CPUs in the current window
|
||||
*
|
||||
* 'prev_window' represents task's contribution to cpu busy time
|
||||
* statistics (rq->prev_runnable_sum) in previous window
|
||||
* 'prev_window_cpu' represents task's contribution to cpu busy time on
|
||||
* various CPUs in the previous window
|
||||
*
|
||||
* 'curr_window' represents the sum of all entries in curr_window_cpu
|
||||
*
|
||||
* 'prev_window' represents the sum of all entries in prev_window_cpu
|
||||
*
|
||||
* 'pred_demand' represents task's current predicted cpu busy time
|
||||
*
|
||||
|
@ -1346,6 +1350,7 @@ struct ravg {
|
|||
u64 mark_start;
|
||||
u32 sum, demand;
|
||||
u32 sum_history[RAVG_HIST_SIZE_MAX];
|
||||
u32 *curr_window_cpu, *prev_window_cpu;
|
||||
u32 curr_window, prev_window;
|
||||
u16 active_windows;
|
||||
u32 pred_demand;
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
#define DECLARE_BITMAP(name,bits) \
|
||||
unsigned long name[BITS_TO_LONGS(bits)]
|
||||
|
||||
#define DECLARE_BITMAP_ARRAY(name,nr,bits) \
|
||||
unsigned long name[nr][BITS_TO_LONGS(bits)]
|
||||
|
||||
typedef __u32 __kernel_dev_t;
|
||||
|
||||
typedef __kernel_fd_set fd_set;
|
||||
|
|
|
@ -260,6 +260,30 @@ TRACE_EVENT(sched_set_boost,
|
|||
TP_printk("ref_count=%d", __entry->ref_count)
|
||||
);
|
||||
|
||||
#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP)
|
||||
static inline void __window_data(u32 *dst, u32 *src)
|
||||
{
|
||||
if (src)
|
||||
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
|
||||
else
|
||||
memset(dst, 0, nr_cpu_ids * sizeof(u32));
|
||||
}
|
||||
|
||||
struct trace_seq;
|
||||
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
|
||||
{
|
||||
int i;
|
||||
const char *ret = p->buffer + seq_buf_used(&p->seq);
|
||||
|
||||
for (i = 0; i < buf_len; i++)
|
||||
trace_seq_printf(p, "%u ", buf[i]);
|
||||
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACE_EVENT(sched_update_task_ravg,
|
||||
|
||||
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
|
||||
|
@ -288,13 +312,17 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
__field( u64, rq_ps )
|
||||
__field( u64, grp_cs )
|
||||
__field( u64, grp_ps )
|
||||
__field( u64, grp_nt_cs )
|
||||
__field( u64, grp_nt_ps )
|
||||
__field( u64, grp_nt_cs )
|
||||
__field( u64, grp_nt_ps )
|
||||
__field( u32, curr_window )
|
||||
__field( u32, prev_window )
|
||||
__dynamic_array(u32, curr_sum, nr_cpu_ids )
|
||||
__dynamic_array(u32, prev_sum, nr_cpu_ids )
|
||||
__field( u64, nt_cs )
|
||||
__field( u64, nt_ps )
|
||||
__field( u32, active_windows )
|
||||
__field( u8, curr_top )
|
||||
__field( u8, prev_top )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -321,22 +349,30 @@ TRACE_EVENT(sched_update_task_ravg,
|
|||
__entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0;
|
||||
__entry->curr_window = p->ravg.curr_window;
|
||||
__entry->prev_window = p->ravg.prev_window;
|
||||
__window_data(__get_dynamic_array(curr_sum), p->ravg.curr_window_cpu);
|
||||
__window_data(__get_dynamic_array(prev_sum), p->ravg.prev_window_cpu);
|
||||
__entry->nt_cs = rq->nt_curr_runnable_sum;
|
||||
__entry->nt_ps = rq->nt_prev_runnable_sum;
|
||||
__entry->active_windows = p->ravg.active_windows;
|
||||
__entry->curr_top = rq->curr_top;
|
||||
__entry->prev_top = rq->prev_top;
|
||||
),
|
||||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu"
|
||||
, __entry->wallclock, __entry->win_start, __entry->delta,
|
||||
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu curr_top %u prev_top %u",
|
||||
__entry->wallclock, __entry->win_start, __entry->delta,
|
||||
task_event_names[__entry->evt], __entry->cpu,
|
||||
__entry->cur_freq, __entry->cur_pid,
|
||||
__entry->pid, __entry->comm, __entry->mark_start,
|
||||
__entry->delta_m, __entry->demand,
|
||||
__entry->sum, __entry->irqtime, __entry->pred_demand,
|
||||
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
|
||||
__entry->prev_window, __entry->nt_cs, __entry->nt_ps,
|
||||
__window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids),
|
||||
__entry->prev_window,
|
||||
__window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids),
|
||||
__entry->nt_cs, __entry->nt_ps,
|
||||
__entry->active_windows, __entry->grp_cs,
|
||||
__entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps)
|
||||
__entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps,
|
||||
__entry->curr_top, __entry->prev_top)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_get_task_cpu_cycles,
|
||||
|
|
|
@ -1684,7 +1684,7 @@ struct task_struct *fork_idle(int cpu)
|
|||
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
|
||||
if (!IS_ERR(task)) {
|
||||
init_idle_pids(task->pids);
|
||||
init_idle(task, cpu);
|
||||
init_idle(task, cpu, false);
|
||||
}
|
||||
|
||||
return task;
|
||||
|
|
|
@ -2255,13 +2255,13 @@ void __dl_clear_params(struct task_struct *p)
|
|||
void sched_exit(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu = get_cpu();
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct rq *rq;
|
||||
u64 wallclock;
|
||||
|
||||
sched_set_group_id(p, 0);
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq = task_rq_lock(p, &flags);
|
||||
|
||||
/* rq->curr == p */
|
||||
wallclock = sched_ktime_clock();
|
||||
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
|
@ -2269,11 +2269,13 @@ void sched_exit(struct task_struct *p)
|
|||
reset_task_stats(p);
|
||||
p->ravg.mark_start = wallclock;
|
||||
p->ravg.sum_history[0] = EXITING_TASK_MARKER;
|
||||
|
||||
kfree(p->ravg.curr_window_cpu);
|
||||
kfree(p->ravg.prev_window_cpu);
|
||||
|
||||
enqueue_task(rq, p, 0);
|
||||
clear_ed_task(p, rq);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
put_cpu();
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_HMP */
|
||||
|
||||
|
@ -2377,6 +2379,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
int cpu = get_cpu();
|
||||
|
||||
__sched_fork(clone_flags, p);
|
||||
init_new_task_load(p, false);
|
||||
/*
|
||||
* We mark the process as running here. This guarantees that
|
||||
* nobody will actually run it, and a signal or other external
|
||||
|
@ -2562,7 +2565,6 @@ void wake_up_new_task(struct task_struct *p)
|
|||
struct rq *rq;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
init_new_task_load(p);
|
||||
add_new_task_to_grp(p);
|
||||
/* Initialize new task's runnable average */
|
||||
init_entity_runnable_average(&p->se);
|
||||
|
@ -5210,17 +5212,21 @@ void init_idle_bootup_task(struct task_struct *idle)
|
|||
* init_idle - set up an idle thread for a given CPU
|
||||
* @idle: task in question
|
||||
* @cpu: cpu the idle task belongs to
|
||||
* @cpu_up: differentiate between initial boot vs hotplug
|
||||
*
|
||||
* NOTE: this function does not set the idle thread's NEED_RESCHED
|
||||
* flag, to make booting more robust.
|
||||
*/
|
||||
void init_idle(struct task_struct *idle, int cpu)
|
||||
void init_idle(struct task_struct *idle, int cpu, bool cpu_up)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
__sched_fork(0, idle);
|
||||
|
||||
if (!cpu_up)
|
||||
init_new_task_load(idle, true);
|
||||
|
||||
raw_spin_lock_irqsave(&idle->pi_lock, flags);
|
||||
raw_spin_lock(&rq->lock);
|
||||
|
||||
|
@ -8009,6 +8015,22 @@ void __init sched_init(void)
|
|||
rq->old_estimated_time = 0;
|
||||
rq->old_busy_time_group = 0;
|
||||
rq->hmp_stats.pred_demands_sum = 0;
|
||||
rq->curr_table = 0;
|
||||
rq->prev_top = 0;
|
||||
rq->curr_top = 0;
|
||||
|
||||
for (j = 0; j < NUM_TRACKED_WINDOWS; j++) {
|
||||
memset(&rq->load_subs[j], 0,
|
||||
sizeof(struct load_subtractions));
|
||||
|
||||
rq->top_tasks[j] = kcalloc(NUM_LOAD_INDICES,
|
||||
sizeof(u8), GFP_NOWAIT);
|
||||
|
||||
/* No other choice */
|
||||
BUG_ON(!rq->top_tasks[j]);
|
||||
|
||||
clear_top_tasks_bitmap(rq->top_tasks_bitmap[j]);
|
||||
}
|
||||
#endif
|
||||
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
|
||||
|
||||
|
@ -8051,7 +8073,7 @@ void __init sched_init(void)
|
|||
* but because we are the idle thread, we just pick up running again
|
||||
* when this runqueue becomes "idle".
|
||||
*/
|
||||
init_idle(current, smp_processor_id());
|
||||
init_idle(current, smp_processor_id(), false);
|
||||
|
||||
calc_load_update = jiffies + LOAD_FREQ;
|
||||
|
||||
|
|
|
@ -418,6 +418,7 @@ static void sched_debug_header(struct seq_file *m)
|
|||
P(min_capacity);
|
||||
P(max_capacity);
|
||||
P(sched_ravg_window);
|
||||
P(sched_load_granule);
|
||||
#endif
|
||||
#undef PN
|
||||
#undef P
|
||||
|
|
|
@ -590,6 +590,7 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
|
|||
cluster->dstate_wakeup_latency = 0;
|
||||
cluster->freq_init_done = false;
|
||||
|
||||
raw_spin_lock_init(&cluster->load_lock);
|
||||
cluster->cpus = *cpus;
|
||||
cluster->efficiency = arch_get_cpu_efficiency(cpumask_first(cpus));
|
||||
|
||||
|
@ -647,6 +648,7 @@ void init_clusters(void)
|
|||
{
|
||||
bitmap_clear(all_cluster_ids, 0, NR_CPUS);
|
||||
init_cluster.cpus = *cpu_possible_mask;
|
||||
raw_spin_lock_init(&init_cluster.load_lock);
|
||||
INIT_LIST_HEAD(&cluster_head);
|
||||
}
|
||||
|
||||
|
@ -823,15 +825,15 @@ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
|
|||
unsigned int
|
||||
min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
|
||||
|
||||
/* Window size (in ns) */
|
||||
__read_mostly unsigned int sched_ravg_window = 10000000;
|
||||
|
||||
/* Min window size (in ns) = 10ms */
|
||||
#define MIN_SCHED_RAVG_WINDOW 10000000
|
||||
|
||||
/* Max window size (in ns) = 1s */
|
||||
#define MAX_SCHED_RAVG_WINDOW 1000000000
|
||||
|
||||
/* Window size (in ns) */
|
||||
__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
|
||||
|
||||
/* Temporarily disable window-stats activity on all cpus */
|
||||
unsigned int __read_mostly sched_disable_window_stats;
|
||||
|
||||
|
@ -850,6 +852,21 @@ static DEFINE_RWLOCK(related_thread_group_lock);
|
|||
#define for_each_related_thread_group(grp) \
|
||||
list_for_each_entry(grp, &related_thread_groups, list)
|
||||
|
||||
/*
|
||||
* Task load is categorized into buckets for the purpose of top task tracking.
|
||||
* The entire range of load from 0 to sched_ravg_window needs to be covered
|
||||
* in NUM_LOAD_INDICES number of buckets. Therefore the size of each bucket
|
||||
* is given by sched_ravg_window / NUM_LOAD_INDICES. Since the default value
|
||||
* of sched_ravg_window is MIN_SCHED_RAVG_WINDOW, use that to compute
|
||||
* sched_load_granule.
|
||||
*/
|
||||
__read_mostly unsigned int sched_load_granule =
|
||||
MIN_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
|
||||
|
||||
/* Size of bitmaps maintained to track top tasks */
|
||||
static const unsigned int top_tasks_bitmap_size =
|
||||
BITS_TO_LONGS(NUM_LOAD_INDICES + 1) * sizeof(unsigned long);
|
||||
|
||||
/*
|
||||
* Demand aggregation for frequency purpose:
|
||||
*
|
||||
|
@ -1505,7 +1522,7 @@ static inline int invalid_value(unsigned int *data)
|
|||
|
||||
/*
|
||||
* Handle "atomic" update of sysctl_sched_window_stats_policy,
|
||||
* sysctl_sched_ravg_hist_size and sched_freq_legacy_mode variables.
|
||||
* sysctl_sched_ravg_hist_size variables.
|
||||
*/
|
||||
int sched_window_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
|
@ -1611,7 +1628,7 @@ unsigned int cpu_temp(int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void init_new_task_load(struct task_struct *p)
|
||||
void init_new_task_load(struct task_struct *p, bool idle_task)
|
||||
{
|
||||
int i;
|
||||
u32 init_load_windows = sched_init_task_load_windows;
|
||||
|
@ -1623,6 +1640,15 @@ void init_new_task_load(struct task_struct *p)
|
|||
memset(&p->ravg, 0, sizeof(struct ravg));
|
||||
p->cpu_cycles = 0;
|
||||
|
||||
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC);
|
||||
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC);
|
||||
|
||||
/* Don't have much choice. CPU frequency would be bogus */
|
||||
BUG_ON(!p->ravg.curr_window_cpu || !p->ravg.prev_window_cpu);
|
||||
|
||||
if (idle_task)
|
||||
return;
|
||||
|
||||
if (init_load_pct)
|
||||
init_load_windows = div64_u64((u64)init_load_pct *
|
||||
(u64)sched_ravg_window, 100);
|
||||
|
@ -2161,6 +2187,174 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
|
|||
p->ravg.pred_demand = new;
|
||||
}
|
||||
|
||||
void clear_top_tasks_bitmap(unsigned long *bitmap)
|
||||
{
|
||||
memset(bitmap, 0, top_tasks_bitmap_size);
|
||||
__set_bit(NUM_LOAD_INDICES, bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case the last index and provide a fast path for index = 0.
|
||||
* Note that sched_load_granule can change underneath us if we are not
|
||||
* holding any runqueue locks while calling the two functions below.
|
||||
*/
|
||||
static u32 __maybe_unused top_task_load(struct rq *rq)
|
||||
{
|
||||
int index = rq->prev_top;
|
||||
u8 prev = 1 - rq->curr_table;
|
||||
|
||||
if (!index) {
|
||||
int msb = NUM_LOAD_INDICES - 1;
|
||||
|
||||
if (!test_bit(msb, rq->top_tasks_bitmap[prev]))
|
||||
return 0;
|
||||
else
|
||||
return sched_load_granule;
|
||||
} else if (index == NUM_LOAD_INDICES - 1) {
|
||||
return sched_ravg_window;
|
||||
} else {
|
||||
return (index + 1) * sched_load_granule;
|
||||
}
|
||||
}
|
||||
|
||||
static int load_to_index(u32 load)
|
||||
{
|
||||
if (load < sched_load_granule)
|
||||
return 0;
|
||||
else if (load >= sched_ravg_window)
|
||||
return NUM_LOAD_INDICES - 1;
|
||||
else
|
||||
return load / sched_load_granule;
|
||||
}
|
||||
|
||||
static void update_top_tasks(struct task_struct *p, struct rq *rq,
|
||||
u32 old_curr_window, int new_window, bool full_window)
|
||||
{
|
||||
u8 curr = rq->curr_table;
|
||||
u8 prev = 1 - curr;
|
||||
u8 *curr_table = rq->top_tasks[curr];
|
||||
u8 *prev_table = rq->top_tasks[prev];
|
||||
int old_index, new_index, update_index;
|
||||
u32 curr_window = p->ravg.curr_window;
|
||||
u32 prev_window = p->ravg.prev_window;
|
||||
bool zero_index_update;
|
||||
|
||||
if (old_curr_window == curr_window && !new_window)
|
||||
return;
|
||||
|
||||
old_index = load_to_index(old_curr_window);
|
||||
new_index = load_to_index(curr_window);
|
||||
|
||||
if (!new_window) {
|
||||
zero_index_update = !old_curr_window && curr_window;
|
||||
if (old_index != new_index || zero_index_update) {
|
||||
if (old_curr_window)
|
||||
curr_table[old_index] -= 1;
|
||||
if (curr_window)
|
||||
curr_table[new_index] += 1;
|
||||
if (new_index > rq->curr_top)
|
||||
rq->curr_top = new_index;
|
||||
}
|
||||
|
||||
if (!curr_table[old_index])
|
||||
__clear_bit(NUM_LOAD_INDICES - old_index - 1,
|
||||
rq->top_tasks_bitmap[curr]);
|
||||
|
||||
if (curr_table[new_index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - new_index - 1,
|
||||
rq->top_tasks_bitmap[curr]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The window has rolled over for this task. By the time we get
|
||||
* here, curr/prev swaps would has already occurred. So we need
|
||||
* to use prev_window for the new index.
|
||||
*/
|
||||
update_index = load_to_index(prev_window);
|
||||
|
||||
if (full_window) {
|
||||
/*
|
||||
* Two cases here. Either 'p' ran for the entire window or
|
||||
* it didn't run at all. In either case there is no entry
|
||||
* in the prev table. If 'p' ran the entire window, we just
|
||||
* need to create a new entry in the prev table. In this case
|
||||
* update_index will be correspond to sched_ravg_window
|
||||
* so we can unconditionally update the top index.
|
||||
*/
|
||||
if (prev_window) {
|
||||
prev_table[update_index] += 1;
|
||||
rq->prev_top = update_index;
|
||||
}
|
||||
|
||||
if (prev_table[update_index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - update_index - 1,
|
||||
rq->top_tasks_bitmap[prev]);
|
||||
} else {
|
||||
zero_index_update = !old_curr_window && prev_window;
|
||||
if (old_index != update_index || zero_index_update) {
|
||||
if (old_curr_window)
|
||||
prev_table[old_index] -= 1;
|
||||
|
||||
prev_table[update_index] += 1;
|
||||
|
||||
if (update_index > rq->prev_top)
|
||||
rq->prev_top = update_index;
|
||||
|
||||
if (!prev_table[old_index])
|
||||
__clear_bit(NUM_LOAD_INDICES - old_index - 1,
|
||||
rq->top_tasks_bitmap[prev]);
|
||||
|
||||
if (prev_table[update_index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - update_index - 1,
|
||||
rq->top_tasks_bitmap[prev]);
|
||||
}
|
||||
}
|
||||
|
||||
if (curr_window) {
|
||||
curr_table[new_index] += 1;
|
||||
|
||||
if (new_index > rq->curr_top)
|
||||
rq->curr_top = new_index;
|
||||
|
||||
if (curr_table[new_index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - new_index - 1,
|
||||
rq->top_tasks_bitmap[curr]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void clear_top_tasks_table(u8 *table)
|
||||
{
|
||||
memset(table, 0, NUM_LOAD_INDICES * sizeof(u8));
|
||||
}
|
||||
|
||||
static u32 empty_windows[NR_CPUS];
|
||||
|
||||
static void rollover_task_window(struct task_struct *p, bool full_window)
|
||||
{
|
||||
u32 *curr_cpu_windows = empty_windows;
|
||||
u32 curr_window;
|
||||
int i;
|
||||
|
||||
/* Rollover the sum */
|
||||
curr_window = 0;
|
||||
|
||||
if (!full_window) {
|
||||
curr_window = p->ravg.curr_window;
|
||||
curr_cpu_windows = p->ravg.curr_window_cpu;
|
||||
}
|
||||
|
||||
p->ravg.prev_window = curr_window;
|
||||
p->ravg.curr_window = 0;
|
||||
|
||||
/* Roll over individual CPU contributions */
|
||||
for (i = 0; i < nr_cpu_ids; i++) {
|
||||
p->ravg.prev_window_cpu[i] = curr_cpu_windows[i];
|
||||
p->ravg.curr_window_cpu[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
|
||||
*/
|
||||
|
@ -2181,6 +2375,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
int prev_sum_reset = 0;
|
||||
bool new_task;
|
||||
struct related_thread_group *grp;
|
||||
int cpu = rq->cpu;
|
||||
u32 old_curr_window;
|
||||
|
||||
new_window = mark_start < window_start;
|
||||
if (new_window) {
|
||||
|
@ -2240,57 +2436,43 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
* Handle per-task window rollover. We don't care about the idle
|
||||
* task or exiting tasks.
|
||||
*/
|
||||
if (new_window && !is_idle_task(p) && !exiting_task(p)) {
|
||||
u32 curr_window = 0;
|
||||
if (!is_idle_task(p) && !exiting_task(p)) {
|
||||
old_curr_window = p->ravg.curr_window;
|
||||
|
||||
if (!full_window)
|
||||
curr_window = p->ravg.curr_window;
|
||||
|
||||
p->ravg.prev_window = curr_window;
|
||||
p->ravg.curr_window = 0;
|
||||
if (new_window)
|
||||
rollover_task_window(p, full_window);
|
||||
}
|
||||
|
||||
if (flip_counters) {
|
||||
u64 curr_sum = *curr_runnable_sum;
|
||||
u64 nt_curr_sum = *nt_curr_runnable_sum;
|
||||
u8 curr_table = rq->curr_table;
|
||||
u8 prev_table = 1 - curr_table;
|
||||
int curr_top = rq->curr_top;
|
||||
|
||||
if (prev_sum_reset)
|
||||
clear_top_tasks_table(rq->top_tasks[prev_table]);
|
||||
clear_top_tasks_bitmap(rq->top_tasks_bitmap[prev_table]);
|
||||
|
||||
if (prev_sum_reset) {
|
||||
curr_sum = nt_curr_sum = 0;
|
||||
curr_top = 0;
|
||||
clear_top_tasks_table(rq->top_tasks[curr_table]);
|
||||
clear_top_tasks_bitmap(
|
||||
rq->top_tasks_bitmap[curr_table]);
|
||||
}
|
||||
|
||||
*prev_runnable_sum = curr_sum;
|
||||
*nt_prev_runnable_sum = nt_curr_sum;
|
||||
|
||||
*curr_runnable_sum = 0;
|
||||
*nt_curr_runnable_sum = 0;
|
||||
rq->curr_table = prev_table;
|
||||
rq->prev_top = curr_top;
|
||||
rq->curr_top = 0;
|
||||
}
|
||||
|
||||
if (!account_busy_for_cpu_time(rq, p, irqtime, event)) {
|
||||
/*
|
||||
* account_busy_for_cpu_time() = 0, so no update to the
|
||||
* task's current window needs to be made. This could be
|
||||
* for example
|
||||
*
|
||||
* - a wakeup event on a task within the current
|
||||
* window (!new_window below, no action required),
|
||||
* - switching to a new task from idle (PICK_NEXT_TASK)
|
||||
* in a new window where irqtime is 0 and we aren't
|
||||
* waiting on IO
|
||||
*/
|
||||
|
||||
if (!new_window)
|
||||
return;
|
||||
|
||||
/*
|
||||
* A new window has started. The RQ demand must be rolled
|
||||
* over if p is the current task.
|
||||
*/
|
||||
if (p_is_curr_task) {
|
||||
/* p is idle task */
|
||||
BUG_ON(p != rq->idle);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
if (!account_busy_for_cpu_time(rq, p, irqtime, event))
|
||||
goto done;
|
||||
|
||||
if (!new_window) {
|
||||
/*
|
||||
|
@ -2310,10 +2492,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
if (new_task)
|
||||
*nt_curr_runnable_sum += delta;
|
||||
|
||||
if (!is_idle_task(p) && !exiting_task(p))
|
||||
if (!is_idle_task(p) && !exiting_task(p)) {
|
||||
p->ravg.curr_window += delta;
|
||||
p->ravg.curr_window_cpu[cpu] += delta;
|
||||
}
|
||||
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!p_is_curr_task) {
|
||||
|
@ -2336,8 +2520,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
* contribution to previous completed window.
|
||||
*/
|
||||
delta = scale_exec_time(window_start - mark_start, rq);
|
||||
if (!exiting_task(p))
|
||||
if (!exiting_task(p)) {
|
||||
p->ravg.prev_window += delta;
|
||||
p->ravg.prev_window_cpu[cpu] += delta;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Since at least one full window has elapsed,
|
||||
|
@ -2345,8 +2531,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
* full window (window_size).
|
||||
*/
|
||||
delta = scale_exec_time(window_size, rq);
|
||||
if (!exiting_task(p))
|
||||
if (!exiting_task(p)) {
|
||||
p->ravg.prev_window = delta;
|
||||
p->ravg.prev_window_cpu[cpu] = delta;
|
||||
}
|
||||
}
|
||||
|
||||
*prev_runnable_sum += delta;
|
||||
|
@ -2359,10 +2547,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
if (new_task)
|
||||
*nt_curr_runnable_sum += delta;
|
||||
|
||||
if (!exiting_task(p))
|
||||
if (!exiting_task(p)) {
|
||||
p->ravg.curr_window = delta;
|
||||
p->ravg.curr_window_cpu[cpu] = delta;
|
||||
}
|
||||
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
|
||||
|
@ -2386,8 +2576,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
* contribution to previous completed window.
|
||||
*/
|
||||
delta = scale_exec_time(window_start - mark_start, rq);
|
||||
if (!is_idle_task(p) && !exiting_task(p))
|
||||
if (!is_idle_task(p) && !exiting_task(p)) {
|
||||
p->ravg.prev_window += delta;
|
||||
p->ravg.prev_window_cpu[cpu] += delta;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Since at least one full window has elapsed,
|
||||
|
@ -2395,8 +2587,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
* full window (window_size).
|
||||
*/
|
||||
delta = scale_exec_time(window_size, rq);
|
||||
if (!is_idle_task(p) && !exiting_task(p))
|
||||
if (!is_idle_task(p) && !exiting_task(p)) {
|
||||
p->ravg.prev_window = delta;
|
||||
p->ravg.prev_window_cpu[cpu] = delta;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2413,10 +2607,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
if (new_task)
|
||||
*nt_curr_runnable_sum += delta;
|
||||
|
||||
if (!is_idle_task(p) && !exiting_task(p))
|
||||
if (!is_idle_task(p) && !exiting_task(p)) {
|
||||
p->ravg.curr_window = delta;
|
||||
p->ravg.curr_window_cpu[cpu] = delta;
|
||||
}
|
||||
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (irqtime) {
|
||||
|
@ -2461,7 +2657,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|||
return;
|
||||
}
|
||||
|
||||
BUG();
|
||||
done:
|
||||
if (!is_idle_task(p) && !exiting_task(p))
|
||||
update_top_tasks(p, rq, old_curr_window,
|
||||
new_window, full_window);
|
||||
}
|
||||
|
||||
static inline u32 predict_and_update_buckets(struct rq *rq,
|
||||
|
@ -2829,11 +3028,23 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
|
|||
void reset_task_stats(struct task_struct *p)
|
||||
{
|
||||
u32 sum = 0;
|
||||
u32 *curr_window_ptr = NULL;
|
||||
u32 *prev_window_ptr = NULL;
|
||||
|
||||
if (exiting_task(p))
|
||||
if (exiting_task(p)) {
|
||||
sum = EXITING_TASK_MARKER;
|
||||
} else {
|
||||
curr_window_ptr = p->ravg.curr_window_cpu;
|
||||
prev_window_ptr = p->ravg.prev_window_cpu;
|
||||
memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
|
||||
memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
|
||||
}
|
||||
|
||||
memset(&p->ravg, 0, sizeof(struct ravg));
|
||||
|
||||
p->ravg.curr_window_cpu = curr_window_ptr;
|
||||
p->ravg.prev_window_cpu = prev_window_ptr;
|
||||
|
||||
/* Retain EXITING_TASK marker */
|
||||
p->ravg.sum_history[0] = sum;
|
||||
}
|
||||
|
@ -2889,7 +3100,9 @@ static void reset_all_task_stats(void)
|
|||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
raw_spin_lock(&p->pi_lock);
|
||||
reset_task_stats(p);
|
||||
raw_spin_unlock(&p->pi_lock);
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
@ -2934,7 +3147,7 @@ const char *sched_window_reset_reasons[] = {
|
|||
/* Called with IRQs enabled */
|
||||
void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
||||
{
|
||||
int cpu;
|
||||
int cpu, i;
|
||||
unsigned long flags;
|
||||
u64 start_ts = sched_ktime_clock();
|
||||
int reason = WINDOW_CHANGE;
|
||||
|
@ -2968,6 +3181,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
if (window_size) {
|
||||
sched_ravg_window = window_size * TICK_NSEC;
|
||||
set_hmp_defaults();
|
||||
sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES;
|
||||
}
|
||||
|
||||
enable_window_stats();
|
||||
|
@ -2979,6 +3193,16 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
rq->window_start = window_start;
|
||||
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
|
||||
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
|
||||
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
|
||||
memset(&rq->load_subs[i], 0,
|
||||
sizeof(struct load_subtractions));
|
||||
clear_top_tasks_table(rq->top_tasks[i]);
|
||||
clear_top_tasks_bitmap(rq->top_tasks_bitmap[i]);
|
||||
}
|
||||
|
||||
rq->curr_table = 0;
|
||||
rq->curr_top = 0;
|
||||
rq->prev_top = 0;
|
||||
reset_cpu_hmp_stats(cpu, 1);
|
||||
}
|
||||
|
||||
|
@ -3011,6 +3235,39 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
sched_ktime_clock() - start_ts, reason, old, new);
|
||||
}
|
||||
|
||||
/*
|
||||
* In this function we match the accumulated subtractions with the current
|
||||
* and previous windows we are operating with. Ignore any entries where
|
||||
* the window start in the load_subtraction struct does not match either
|
||||
* the curent or the previous window. This could happen whenever CPUs
|
||||
* become idle or busy with interrupts disabled for an extended period.
|
||||
*/
|
||||
static inline void account_load_subtractions(struct rq *rq)
|
||||
{
|
||||
u64 ws = rq->window_start;
|
||||
u64 prev_ws = ws - sched_ravg_window;
|
||||
struct load_subtractions *ls = rq->load_subs;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
|
||||
if (ls[i].window_start == ws) {
|
||||
rq->curr_runnable_sum -= ls[i].subs;
|
||||
rq->nt_curr_runnable_sum -= ls[i].new_subs;
|
||||
} else if (ls[i].window_start == prev_ws) {
|
||||
rq->prev_runnable_sum -= ls[i].subs;
|
||||
rq->nt_prev_runnable_sum -= ls[i].new_subs;
|
||||
}
|
||||
|
||||
ls[i].subs = 0;
|
||||
ls[i].new_subs = 0;
|
||||
}
|
||||
|
||||
BUG_ON((s64)rq->prev_runnable_sum < 0);
|
||||
BUG_ON((s64)rq->curr_runnable_sum < 0);
|
||||
BUG_ON((s64)rq->nt_prev_runnable_sum < 0);
|
||||
BUG_ON((s64)rq->nt_curr_runnable_sum < 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sync_window_start(struct rq *rq, struct group_cpu_time *cpu_time);
|
||||
|
||||
|
@ -3033,6 +3290,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
|
|||
struct related_thread_group *grp;
|
||||
u64 total_group_load = 0, total_ngload = 0;
|
||||
bool aggregate_load = false;
|
||||
struct sched_cluster *cluster = cpu_cluster(cpumask_first(query_cpus));
|
||||
|
||||
if (unlikely(cpus == 0))
|
||||
return;
|
||||
|
@ -3050,6 +3308,13 @@ void sched_get_cpus_busy(struct sched_load *busy,
|
|||
|
||||
window_size = sched_ravg_window;
|
||||
|
||||
/*
|
||||
* We don't really need the cluster lock for this entire for loop
|
||||
* block. However, there is no advantage in optimizing this as rq
|
||||
* locks are held regardless and would prevent migration anyways
|
||||
*/
|
||||
raw_spin_lock(&cluster->load_lock);
|
||||
|
||||
for_each_cpu(cpu, query_cpus) {
|
||||
rq = cpu_rq(cpu);
|
||||
|
||||
|
@ -3057,6 +3322,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
|
|||
0);
|
||||
cur_freq[i] = cpu_cycles_to_freq(rq->cc.cycles, rq->cc.time);
|
||||
|
||||
account_load_subtractions(rq);
|
||||
load[i] = rq->old_busy_time = rq->prev_runnable_sum;
|
||||
nload[i] = rq->nt_prev_runnable_sum;
|
||||
pload[i] = rq->hmp_stats.pred_demands_sum;
|
||||
|
@ -3083,6 +3349,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
|
|||
i++;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&cluster->load_lock);
|
||||
|
||||
for_each_related_thread_group(grp) {
|
||||
for_each_cpu(cpu, query_cpus) {
|
||||
/* Protected by rq_lock */
|
||||
|
@ -3237,6 +3505,189 @@ int sched_set_window(u64 window_start, unsigned int window_size)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void create_subtraction_entry(struct rq *rq, u64 ws, int index)
|
||||
{
|
||||
rq->load_subs[index].window_start = ws;
|
||||
rq->load_subs[index].subs = 0;
|
||||
rq->load_subs[index].new_subs = 0;
|
||||
}
|
||||
|
||||
static bool get_subtraction_index(struct rq *rq, u64 ws)
|
||||
{
|
||||
int i;
|
||||
u64 oldest = ULLONG_MAX;
|
||||
int oldest_index = 0;
|
||||
|
||||
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
|
||||
u64 entry_ws = rq->load_subs[i].window_start;
|
||||
|
||||
if (ws == entry_ws)
|
||||
return i;
|
||||
|
||||
if (entry_ws < oldest) {
|
||||
oldest = entry_ws;
|
||||
oldest_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
create_subtraction_entry(rq, ws, oldest_index);
|
||||
return oldest_index;
|
||||
}
|
||||
|
||||
static void update_rq_load_subtractions(int index, struct rq *rq,
|
||||
u32 sub_load, bool new_task)
|
||||
{
|
||||
rq->load_subs[index].subs += sub_load;
|
||||
if (new_task)
|
||||
rq->load_subs[index].new_subs += sub_load;
|
||||
}
|
||||
|
||||
static void update_cluster_load_subtractions(struct task_struct *p,
|
||||
int cpu, u64 ws, bool new_task)
|
||||
{
|
||||
struct sched_cluster *cluster = cpu_cluster(cpu);
|
||||
struct cpumask cluster_cpus = cluster->cpus;
|
||||
u64 prev_ws = ws - sched_ravg_window;
|
||||
int i;
|
||||
|
||||
cpumask_clear_cpu(cpu, &cluster_cpus);
|
||||
raw_spin_lock(&cluster->load_lock);
|
||||
|
||||
for_each_cpu(i, &cluster_cpus) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
int index;
|
||||
|
||||
if (p->ravg.curr_window_cpu[i]) {
|
||||
index = get_subtraction_index(rq, ws);
|
||||
update_rq_load_subtractions(index, rq,
|
||||
p->ravg.curr_window_cpu[i], new_task);
|
||||
p->ravg.curr_window_cpu[i] = 0;
|
||||
}
|
||||
|
||||
if (p->ravg.prev_window_cpu[i]) {
|
||||
index = get_subtraction_index(rq, prev_ws);
|
||||
update_rq_load_subtractions(index, rq,
|
||||
p->ravg.prev_window_cpu[i], new_task);
|
||||
p->ravg.prev_window_cpu[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_unlock(&cluster->load_lock);
|
||||
}
|
||||
|
||||
static inline void inter_cluster_migration_fixup
|
||||
(struct task_struct *p, int new_cpu, int task_cpu, bool new_task)
|
||||
{
|
||||
struct rq *dest_rq = cpu_rq(new_cpu);
|
||||
struct rq *src_rq = cpu_rq(task_cpu);
|
||||
|
||||
if (same_freq_domain(new_cpu, task_cpu))
|
||||
return;
|
||||
|
||||
p->ravg.curr_window_cpu[new_cpu] = p->ravg.curr_window;
|
||||
p->ravg.prev_window_cpu[new_cpu] = p->ravg.prev_window;
|
||||
|
||||
dest_rq->curr_runnable_sum += p->ravg.curr_window;
|
||||
dest_rq->prev_runnable_sum += p->ravg.prev_window;
|
||||
|
||||
src_rq->curr_runnable_sum -= p->ravg.curr_window_cpu[task_cpu];
|
||||
src_rq->prev_runnable_sum -= p->ravg.prev_window_cpu[task_cpu];
|
||||
|
||||
if (new_task) {
|
||||
dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
|
||||
dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
|
||||
|
||||
src_rq->nt_curr_runnable_sum -=
|
||||
p->ravg.curr_window_cpu[task_cpu];
|
||||
src_rq->nt_prev_runnable_sum -=
|
||||
p->ravg.prev_window_cpu[task_cpu];
|
||||
}
|
||||
|
||||
p->ravg.curr_window_cpu[task_cpu] = 0;
|
||||
p->ravg.prev_window_cpu[task_cpu] = 0;
|
||||
|
||||
update_cluster_load_subtractions(p, task_cpu,
|
||||
src_rq->window_start, new_task);
|
||||
|
||||
BUG_ON((s64)src_rq->prev_runnable_sum < 0);
|
||||
BUG_ON((s64)src_rq->curr_runnable_sum < 0);
|
||||
BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
|
||||
BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);
|
||||
}
|
||||
|
||||
static int get_top_index(unsigned long *bitmap, unsigned long old_top)
|
||||
{
|
||||
int index = find_next_bit(bitmap, NUM_LOAD_INDICES, old_top);
|
||||
|
||||
if (index == NUM_LOAD_INDICES)
|
||||
return 0;
|
||||
|
||||
return NUM_LOAD_INDICES - 1 - index;
|
||||
}
|
||||
|
||||
static void
|
||||
migrate_top_tasks(struct task_struct *p, struct rq *src_rq, struct rq *dst_rq)
|
||||
{
|
||||
int index;
|
||||
int top_index;
|
||||
u32 curr_window = p->ravg.curr_window;
|
||||
u32 prev_window = p->ravg.prev_window;
|
||||
u8 src = src_rq->curr_table;
|
||||
u8 dst = dst_rq->curr_table;
|
||||
u8 *src_table;
|
||||
u8 *dst_table;
|
||||
|
||||
if (curr_window) {
|
||||
src_table = src_rq->top_tasks[src];
|
||||
dst_table = dst_rq->top_tasks[dst];
|
||||
index = load_to_index(curr_window);
|
||||
src_table[index] -= 1;
|
||||
dst_table[index] += 1;
|
||||
|
||||
if (!src_table[index])
|
||||
__clear_bit(NUM_LOAD_INDICES - index - 1,
|
||||
src_rq->top_tasks_bitmap[src]);
|
||||
|
||||
if (dst_table[index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - index - 1,
|
||||
dst_rq->top_tasks_bitmap[dst]);
|
||||
|
||||
if (index > dst_rq->curr_top)
|
||||
dst_rq->curr_top = index;
|
||||
|
||||
top_index = src_rq->curr_top;
|
||||
if (index == top_index && !src_table[index])
|
||||
src_rq->curr_top = get_top_index(
|
||||
src_rq->top_tasks_bitmap[src], top_index);
|
||||
}
|
||||
|
||||
if (prev_window) {
|
||||
src = 1 - src;
|
||||
dst = 1 - dst;
|
||||
src_table = src_rq->top_tasks[src];
|
||||
dst_table = dst_rq->top_tasks[dst];
|
||||
index = load_to_index(prev_window);
|
||||
src_table[index] -= 1;
|
||||
dst_table[index] += 1;
|
||||
|
||||
if (!src_table[index])
|
||||
__clear_bit(NUM_LOAD_INDICES - index - 1,
|
||||
src_rq->top_tasks_bitmap[src]);
|
||||
|
||||
if (dst_table[index] == 1)
|
||||
__set_bit(NUM_LOAD_INDICES - index - 1,
|
||||
dst_rq->top_tasks_bitmap[dst]);
|
||||
|
||||
if (index > dst_rq->prev_top)
|
||||
dst_rq->prev_top = index;
|
||||
|
||||
top_index = src_rq->prev_top;
|
||||
if (index == top_index && !src_table[index])
|
||||
src_rq->prev_top = get_top_index(
|
||||
src_rq->top_tasks_bitmap[src], top_index);
|
||||
}
|
||||
}
|
||||
|
||||
void fixup_busy_time(struct task_struct *p, int new_cpu)
|
||||
{
|
||||
struct rq *src_rq = task_rq(p);
|
||||
|
@ -3246,8 +3697,6 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
|
|||
u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
|
||||
u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
|
||||
u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
|
||||
int migrate_type;
|
||||
struct migration_sum_data d;
|
||||
bool new_task;
|
||||
struct related_thread_group *grp;
|
||||
|
||||
|
@ -3281,62 +3730,55 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
|
|||
new_task = is_new_task(p);
|
||||
/* Protected by rq_lock */
|
||||
grp = p->grp;
|
||||
|
||||
/*
|
||||
* For frequency aggregation, we continue to do migration fixups
|
||||
* even for intra cluster migrations. This is because, the aggregated
|
||||
* load has to reported on a single CPU regardless.
|
||||
*/
|
||||
if (grp && sched_freq_aggregate) {
|
||||
struct group_cpu_time *cpu_time;
|
||||
|
||||
migrate_type = GROUP_TO_GROUP;
|
||||
/* Protected by rq_lock */
|
||||
cpu_time = _group_cpu_time(grp, cpu_of(src_rq));
|
||||
d.src_rq = NULL;
|
||||
d.src_cpu_time = cpu_time;
|
||||
src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
||||
src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
||||
src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
||||
src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
||||
|
||||
/* Protected by rq_lock */
|
||||
cpu_time = _group_cpu_time(grp, cpu_of(dest_rq));
|
||||
d.dst_rq = NULL;
|
||||
d.dst_cpu_time = cpu_time;
|
||||
dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
||||
dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
||||
dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
||||
dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
||||
sync_window_start(dest_rq, cpu_time);
|
||||
|
||||
if (p->ravg.curr_window) {
|
||||
*src_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*dst_curr_runnable_sum += p->ravg.curr_window;
|
||||
if (new_task) {
|
||||
*src_nt_curr_runnable_sum -=
|
||||
p->ravg.curr_window;
|
||||
*dst_nt_curr_runnable_sum +=
|
||||
p->ravg.curr_window;
|
||||
}
|
||||
}
|
||||
|
||||
if (p->ravg.prev_window) {
|
||||
*src_prev_runnable_sum -= p->ravg.prev_window;
|
||||
*dst_prev_runnable_sum += p->ravg.prev_window;
|
||||
if (new_task) {
|
||||
*src_nt_prev_runnable_sum -=
|
||||
p->ravg.prev_window;
|
||||
*dst_nt_prev_runnable_sum +=
|
||||
p->ravg.prev_window;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
migrate_type = RQ_TO_RQ;
|
||||
d.src_rq = src_rq;
|
||||
d.src_cpu_time = NULL;
|
||||
d.dst_rq = dest_rq;
|
||||
d.dst_cpu_time = NULL;
|
||||
src_curr_runnable_sum = &src_rq->curr_runnable_sum;
|
||||
src_prev_runnable_sum = &src_rq->prev_runnable_sum;
|
||||
src_nt_curr_runnable_sum = &src_rq->nt_curr_runnable_sum;
|
||||
src_nt_prev_runnable_sum = &src_rq->nt_prev_runnable_sum;
|
||||
|
||||
dst_curr_runnable_sum = &dest_rq->curr_runnable_sum;
|
||||
dst_prev_runnable_sum = &dest_rq->prev_runnable_sum;
|
||||
dst_nt_curr_runnable_sum = &dest_rq->nt_curr_runnable_sum;
|
||||
dst_nt_prev_runnable_sum = &dest_rq->nt_prev_runnable_sum;
|
||||
inter_cluster_migration_fixup(p, new_cpu,
|
||||
task_cpu(p), new_task);
|
||||
}
|
||||
|
||||
if (p->ravg.curr_window) {
|
||||
*src_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*dst_curr_runnable_sum += p->ravg.curr_window;
|
||||
if (new_task) {
|
||||
*src_nt_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*dst_nt_curr_runnable_sum += p->ravg.curr_window;
|
||||
}
|
||||
}
|
||||
|
||||
if (p->ravg.prev_window) {
|
||||
*src_prev_runnable_sum -= p->ravg.prev_window;
|
||||
*dst_prev_runnable_sum += p->ravg.prev_window;
|
||||
if (new_task) {
|
||||
*src_nt_prev_runnable_sum -= p->ravg.prev_window;
|
||||
*dst_nt_prev_runnable_sum += p->ravg.prev_window;
|
||||
}
|
||||
}
|
||||
migrate_top_tasks(p, src_rq, dest_rq);
|
||||
|
||||
if (p == src_rq->ed_task) {
|
||||
src_rq->ed_task = NULL;
|
||||
|
@ -3344,12 +3786,6 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
|
|||
dest_rq->ed_task = p;
|
||||
}
|
||||
|
||||
trace_sched_migration_update_sum(p, migrate_type, &d);
|
||||
BUG_ON((s64)*src_prev_runnable_sum < 0);
|
||||
BUG_ON((s64)*src_curr_runnable_sum < 0);
|
||||
BUG_ON((s64)*src_nt_prev_runnable_sum < 0);
|
||||
BUG_ON((s64)*src_nt_curr_runnable_sum < 0);
|
||||
|
||||
done:
|
||||
if (p->state == TASK_WAKING)
|
||||
double_rq_unlock(src_rq, dest_rq);
|
||||
|
@ -3501,6 +3937,9 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
|
|||
u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
|
||||
struct migration_sum_data d;
|
||||
int migrate_type;
|
||||
int cpu = cpu_of(rq);
|
||||
bool new_task = is_new_task(p);
|
||||
int i;
|
||||
|
||||
if (!sched_freq_aggregate)
|
||||
return;
|
||||
|
@ -3511,7 +3950,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
|
|||
update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
|
||||
|
||||
/* cpu_time protected by related_thread_group_lock, grp->lock rq_lock */
|
||||
cpu_time = _group_cpu_time(grp, cpu_of(rq));
|
||||
cpu_time = _group_cpu_time(grp, cpu);
|
||||
if (event == ADD_TASK) {
|
||||
sync_window_start(rq, cpu_time);
|
||||
migrate_type = RQ_TO_GROUP;
|
||||
|
@ -3528,6 +3967,19 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
|
|||
dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
||||
src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
|
||||
dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
||||
|
||||
*src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu];
|
||||
*src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu];
|
||||
if (new_task) {
|
||||
*src_nt_curr_runnable_sum -=
|
||||
p->ravg.curr_window_cpu[cpu];
|
||||
*src_nt_prev_runnable_sum -=
|
||||
p->ravg.prev_window_cpu[cpu];
|
||||
}
|
||||
|
||||
update_cluster_load_subtractions(p, cpu,
|
||||
rq->window_start, new_task);
|
||||
|
||||
} else {
|
||||
migrate_type = GROUP_TO_RQ;
|
||||
d.src_rq = NULL;
|
||||
|
@ -3550,21 +4002,42 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
|
|||
dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
|
||||
src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
||||
dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
|
||||
|
||||
*src_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*src_prev_runnable_sum -= p->ravg.prev_window;
|
||||
if (new_task) {
|
||||
*src_nt_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*src_nt_prev_runnable_sum -= p->ravg.prev_window;
|
||||
}
|
||||
|
||||
/*
|
||||
* Need to reset curr/prev windows for all CPUs, not just the
|
||||
* ones in the same cluster. Since inter cluster migrations
|
||||
* did not result in the appropriate book keeping, the values
|
||||
* per CPU would be inaccurate.
|
||||
*/
|
||||
for_each_possible_cpu(i) {
|
||||
p->ravg.curr_window_cpu[i] = 0;
|
||||
p->ravg.prev_window_cpu[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
*src_curr_runnable_sum -= p->ravg.curr_window;
|
||||
*dst_curr_runnable_sum += p->ravg.curr_window;
|
||||
|
||||
*src_prev_runnable_sum -= p->ravg.prev_window;
|
||||
*dst_prev_runnable_sum += p->ravg.prev_window;
|
||||
|
||||
if (is_new_task(p)) {
|
||||
*src_nt_curr_runnable_sum -= p->ravg.curr_window;
|
||||
if (new_task) {
|
||||
*dst_nt_curr_runnable_sum += p->ravg.curr_window;
|
||||
*src_nt_prev_runnable_sum -= p->ravg.prev_window;
|
||||
*dst_nt_prev_runnable_sum += p->ravg.prev_window;
|
||||
}
|
||||
|
||||
/*
|
||||
* When a task enter or exits a group, it's curr and prev windows are
|
||||
* moved to a single CPU. This behavior might be sub-optimal in the
|
||||
* exit case, however, it saves us the overhead of handling inter
|
||||
* cluster migration fixups while the task is part of a related group.
|
||||
*/
|
||||
p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
|
||||
p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
|
||||
|
||||
trace_sched_migration_update_sum(p, migrate_type, &d);
|
||||
|
||||
BUG_ON((s64)*src_curr_runnable_sum < 0);
|
||||
|
|
|
@ -351,13 +351,23 @@ struct cfs_bandwidth { };
|
|||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
|
||||
#define NUM_TRACKED_WINDOWS 2
|
||||
#define NUM_LOAD_INDICES 1000
|
||||
|
||||
struct hmp_sched_stats {
|
||||
int nr_big_tasks;
|
||||
u64 cumulative_runnable_avg;
|
||||
u64 pred_demands_sum;
|
||||
};
|
||||
|
||||
struct load_subtractions {
|
||||
u64 window_start;
|
||||
u64 subs;
|
||||
u64 new_subs;
|
||||
};
|
||||
|
||||
struct sched_cluster {
|
||||
raw_spinlock_t load_lock;
|
||||
struct list_head list;
|
||||
struct cpumask cpus;
|
||||
int id;
|
||||
|
@ -742,6 +752,13 @@ struct rq {
|
|||
u64 prev_runnable_sum;
|
||||
u64 nt_curr_runnable_sum;
|
||||
u64 nt_prev_runnable_sum;
|
||||
struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
|
||||
DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
|
||||
NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
|
||||
u8 *top_tasks[NUM_TRACKED_WINDOWS];
|
||||
u8 curr_table;
|
||||
int prev_top;
|
||||
int curr_top;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
|
@ -1056,8 +1073,9 @@ extern unsigned int __read_mostly sched_spill_load;
|
|||
extern unsigned int __read_mostly sched_upmigrate;
|
||||
extern unsigned int __read_mostly sched_downmigrate;
|
||||
extern unsigned int __read_mostly sysctl_sched_spill_nr_run;
|
||||
extern unsigned int __read_mostly sched_load_granule;
|
||||
|
||||
extern void init_new_task_load(struct task_struct *p);
|
||||
extern void init_new_task_load(struct task_struct *p, bool idle_task);
|
||||
extern u64 sched_ktime_clock(void);
|
||||
extern int got_boost_kick(void);
|
||||
extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
|
||||
|
@ -1401,6 +1419,7 @@ extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
|
|||
struct cftype *cft, u64 upmigrate_discourage);
|
||||
extern void sched_hmp_parse_dt(void);
|
||||
extern void init_sched_hmp_boost_policy(void);
|
||||
extern void clear_top_tasks_bitmap(unsigned long *bitmap);
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
|
@ -1503,7 +1522,9 @@ static inline struct sched_cluster *rq_cluster(struct rq *rq)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline void init_new_task_load(struct task_struct *p) { }
|
||||
static inline void init_new_task_load(struct task_struct *p, bool idle_task)
|
||||
{
|
||||
}
|
||||
|
||||
static inline u64 scale_load_to_cpu(u64 load, int cpu)
|
||||
{
|
||||
|
@ -1570,8 +1591,6 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,
|
|||
static inline void add_new_task_to_grp(struct task_struct *new) {}
|
||||
|
||||
#define sched_enable_hmp 0
|
||||
#define sched_freq_legacy_mode 1
|
||||
#define sched_migration_fixup 0
|
||||
#define PRED_DEMAND_DELTA (0)
|
||||
|
||||
static inline void
|
||||
|
|
|
@ -32,7 +32,7 @@ struct task_struct *idle_thread_get(unsigned int cpu)
|
|||
|
||||
if (!tsk)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
init_idle(tsk, cpu);
|
||||
init_idle(tsk, cpu, true);
|
||||
return tsk;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue