2011-10-25 10:00:11 +02:00
|
|
|
#include "sched.h"
|
|
|
|
|
2007-07-09 18:51:58 +02:00
|
|
|
/*
|
|
|
|
* idle-task scheduling class.
|
|
|
|
*
|
|
|
|
* (NOTE: these are not related to SCHED_IDLE tasks which are
|
2012-04-02 17:00:44 +09:00
|
|
|
* handled in sched/fair.c)
|
2007-07-09 18:51:58 +02:00
|
|
|
*/
|
|
|
|
|
2008-01-25 21:08:09 +01:00
|
|
|
#ifdef CONFIG_SMP
|
2010-03-24 18:34:10 +01:00
|
|
|
static int
|
FROMLIST: sched/fair: Use wake_q length as a hint for wake_wide
(from https://patchwork.kernel.org/patch/9895261/)
This patch adds a parameter to select_task_rq, sibling_count_hint
allowing the caller, where it has this information, to inform the
sched_class the number of tasks that are being woken up as part of
the same event.
The wake_q mechanism is one case where this information is available.
select_task_rq_fair can then use the information to detect that it
needs to widen the search space for task placement in order to avoid
overloading the last-level cache domain's CPUs.
* * *
The reason I am investigating this change is the following use case
on ARM big.LITTLE (asymmetrical CPU capacity): 1 task per CPU, which
all repeatedly do X amount of work then
pthread_barrier_wait (i.e. sleep until the last task finishes its X
and hits the barrier). On big.LITTLE, the tasks which get a "big" CPU
finish faster, and then those CPUs pull over the tasks that are still
running:
v CPU v ->time->
-------------
0 (big) 11111 /333
-------------
1 (big) 22222 /444|
-------------
2 (LITTLE) 333333/
-------------
3 (LITTLE) 444444/
-------------
Now when task 4 hits the barrier (at |) and wakes the others up,
there are 4 tasks with prev_cpu=<big> and 0 tasks with
prev_cpu=<little>. want_affine therefore means that we'll only look
in CPUs 0 and 1 (sd_llc), so tasks will be unnecessarily coscheduled
on the bigs until the next load balance, something like this:
v CPU v ->time->
------------------------
0 (big) 11111 /333 31313\33333
------------------------
1 (big) 22222 /444|424\4444444
------------------------
2 (LITTLE) 333333/ \222222
------------------------
3 (LITTLE) 444444/ \1111
------------------------
^^^
underutilization
So, I'm trying to get want_affine = 0 for these tasks.
I don't _think_ any incarnation of the wakee_flips mechanism can help
us here because which task is waker and which tasks are wakees
generally changes with each iteration.
However pthread_barrier_wait (or more accurately FUTEX_WAKE) has the
nice property that we know exactly how many tasks are being woken, so
we can cheat.
It might be a disadvantage that we "widen" _every_ task that's woken in
an event, while select_idle_sibling would work fine for the first
sd_llc_size - 1 tasks.
IIUC, if wake_affine() behaves correctly this trick wouldn't be
necessary on SMP systems, so it might be best guarded by the presence
of SD_ASYM_CPUCAPACITY?
* * *
Final note..
In order to observe "perfect" behaviour for this use case, I also had
to disable the TTWU_QUEUE sched feature. Suppose during the wakeup
above we are working through the work queue and have placed tasks 3
and 2, and are about to place task 1:
v CPU v ->time->
--------------
0 (big) 11111 /333 3
--------------
1 (big) 22222 /444|4
--------------
2 (LITTLE) 333333/ 2
--------------
3 (LITTLE) 444444/ <- Task 1 should go here
--------------
If TTWU_QUEUE is enabled, we will not yet have enqueued task
2 (having instead sent a reschedule IPI) or attached its load to CPU
2. So we are likely to also place task 1 on cpu 2. Disabling
TTWU_QUEUE means that we enqueue task 2 before placing task 1,
solving this issue. TTWU_QUEUE is there to minimise rq lock
contention, and I guess that this contention is less of an issue on
big.LITTLE systems since they have relatively few CPUs, which
suggests the trade-off makes sense here.
Change-Id: I2080302839a263e0841a89efea8589ea53bbda9c
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
2017-08-07 15:46:13 +01:00
|
|
|
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags,
|
|
|
|
int sibling_count_hint)
|
2008-01-25 21:08:09 +01:00
|
|
|
{
|
|
|
|
return task_cpu(p); /* IDLE tasks as never migrated */
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_SMP */
|
2014-01-23 20:32:21 +01:00
|
|
|
|
2007-07-09 18:51:58 +02:00
|
|
|
/*
|
|
|
|
* Idle tasks are unconditionally rescheduled:
|
|
|
|
*/
|
2009-09-14 19:55:44 +02:00
|
|
|
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
|
2007-07-09 18:51:58 +02:00
|
|
|
{
|
2014-06-29 00:03:57 +04:00
|
|
|
resched_curr(rq);
|
2007-07-09 18:51:58 +02:00
|
|
|
}
|
|
|
|
|
2012-02-11 06:05:00 +01:00
|
|
|
static struct task_struct *
|
|
|
|
pick_next_task_idle(struct rq *rq, struct task_struct *prev)
|
2007-07-09 18:51:58 +02:00
|
|
|
{
|
2014-02-12 10:49:30 +01:00
|
|
|
put_prev_task(rq, prev);
|
2012-02-11 06:05:00 +01:00
|
|
|
|
2007-07-09 18:51:58 +02:00
|
|
|
schedstat_inc(rq, sched_goidle);
|
|
|
|
return rq->idle;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is not legal to sleep in the idle task - print a warning
|
|
|
|
* message if some code attempts to do it:
|
|
|
|
*/
|
|
|
|
static void
|
2010-03-24 16:38:48 +01:00
|
|
|
dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
|
2007-07-09 18:51:58 +02:00
|
|
|
{
|
2009-11-17 14:28:38 +01:00
|
|
|
raw_spin_unlock_irq(&rq->lock);
|
2009-12-20 14:23:57 +01:00
|
|
|
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
|
2007-07-09 18:51:58 +02:00
|
|
|
dump_stack();
|
2009-11-17 14:28:38 +01:00
|
|
|
raw_spin_lock_irq(&rq->lock);
|
2007-07-09 18:51:58 +02:00
|
|
|
}
|
|
|
|
|
2007-08-09 11:16:49 +02:00
|
|
|
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
2007-07-09 18:51:58 +02:00
|
|
|
{
|
2014-01-23 20:32:21 +01:00
|
|
|
idle_exit_fair(rq);
|
|
|
|
rq_last_tick_reset(rq);
|
2007-07-09 18:51:58 +02:00
|
|
|
}
|
|
|
|
|
2008-01-25 21:08:29 +01:00
|
|
|
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
2007-07-09 18:51:58 +02:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2007-10-15 17:00:08 +02:00
|
|
|
static void set_curr_task_idle(struct rq *rq)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2011-01-17 17:03:27 +01:00
|
|
|
static void switched_to_idle(struct rq *rq, struct task_struct *p)
|
2008-01-25 21:08:22 +01:00
|
|
|
{
|
2011-01-25 16:30:03 +01:00
|
|
|
BUG();
|
2008-01-25 21:08:22 +01:00
|
|
|
}
|
|
|
|
|
2011-01-17 17:03:27 +01:00
|
|
|
static void
|
|
|
|
prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
|
2008-01-25 21:08:22 +01:00
|
|
|
{
|
2011-01-25 16:30:03 +01:00
|
|
|
BUG();
|
2008-01-25 21:08:22 +01:00
|
|
|
}
|
|
|
|
|
2010-01-13 20:21:52 -07:00
|
|
|
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
2009-09-21 01:31:53 +00:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-23 23:04:52 +01:00
|
|
|
static void update_curr_idle(struct rq *rq)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-01-16 11:27:31 +05:30
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
|
|
|
|
static void
|
|
|
|
inc_hmp_sched_stats_idle(struct rq *rq, struct task_struct *p)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dec_hmp_sched_stats_idle(struct rq *rq, struct task_struct *p)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-07-13 21:04:18 -07:00
|
|
|
static void
|
|
|
|
fixup_hmp_sched_stats_idle(struct rq *rq, struct task_struct *p,
|
2015-06-08 09:08:47 +05:30
|
|
|
u32 new_task_load, u32 new_pred_demand)
|
2015-07-13 21:04:18 -07:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-01-16 11:27:31 +05:30
|
|
|
#endif
|
|
|
|
|
2007-07-09 18:51:58 +02:00
|
|
|
/*
|
|
|
|
* Simple, special scheduling class for the per-CPU idle tasks:
|
|
|
|
*/
|
2011-10-25 10:00:11 +02:00
|
|
|
const struct sched_class idle_sched_class = {
|
2007-10-15 17:00:12 +02:00
|
|
|
/* .next is NULL */
|
2007-07-09 18:51:58 +02:00
|
|
|
/* no enqueue/yield_task for idle tasks */
|
|
|
|
|
|
|
|
/* dequeue is not valid, we print a debug message there: */
|
|
|
|
.dequeue_task = dequeue_task_idle,
|
|
|
|
|
|
|
|
.check_preempt_curr = check_preempt_curr_idle,
|
|
|
|
|
|
|
|
.pick_next_task = pick_next_task_idle,
|
|
|
|
.put_prev_task = put_prev_task_idle,
|
|
|
|
|
2007-10-24 18:23:51 +02:00
|
|
|
#ifdef CONFIG_SMP
|
2008-10-22 15:25:26 +08:00
|
|
|
.select_task_rq = select_task_rq_idle,
|
2015-05-15 17:43:35 +02:00
|
|
|
.set_cpus_allowed = set_cpus_allowed_common,
|
2007-10-24 18:23:51 +02:00
|
|
|
#endif
|
2007-07-09 18:51:58 +02:00
|
|
|
|
2007-10-15 17:00:08 +02:00
|
|
|
.set_curr_task = set_curr_task_idle,
|
2007-07-09 18:51:58 +02:00
|
|
|
.task_tick = task_tick_idle,
|
2008-01-25 21:08:22 +01:00
|
|
|
|
2009-09-21 01:31:53 +00:00
|
|
|
.get_rr_interval = get_rr_interval_idle,
|
|
|
|
|
2008-01-25 21:08:22 +01:00
|
|
|
.prio_changed = prio_changed_idle,
|
|
|
|
.switched_to = switched_to_idle,
|
2014-11-23 23:04:52 +01:00
|
|
|
.update_curr = update_curr_idle,
|
2015-01-16 11:27:31 +05:30
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
.inc_hmp_sched_stats = inc_hmp_sched_stats_idle,
|
|
|
|
.dec_hmp_sched_stats = dec_hmp_sched_stats_idle,
|
2015-07-13 21:04:18 -07:00
|
|
|
.fixup_hmp_sched_stats = fixup_hmp_sched_stats_idle,
|
2015-01-16 11:27:31 +05:30
|
|
|
#endif
|
2007-07-09 18:51:58 +02:00
|
|
|
};
|