Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - various sched/deadline fixes and enhancements - rescheduling latency fixes/cleanups - rework the rq->clock code to be more consistent and more robust. - minor micro-optimizations - ->avg.decay_count fixes - add a stack overflow check to might_sleep() - idle-poll handler fix, possibly resulting in power savings - misc smaller updates and fixes" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/Documentation: Remove unneeded word sched/wait: Introduce wait_on_bit_timeout() sched: Pull resched loop to __schedule() callers sched/deadline: Remove cpu_active_mask from cpudl_find() sched: Fix hrtick_start() on UP sched/deadline: Avoid pointless __setscheduler() sched/deadline: Fix stale yield state sched/deadline: Fix hrtick for a non-leftmost task sched/deadline: Modify cpudl::free_cpus to reflect rd->online sched/idle: Add missing checks to the exit condition of cpu_idle_poll() sched: Fix missing preemption opportunity sched/rt: Reduce rq lock contention by eliminating locking of non-feasible target sched/debug: Print rq->clock_task sched/core: Rework rq->clock update skips sched/core: Validate rq_clock*() serialization sched/core: Remove check of p->sched_class sched/fair: Fix sched_entity::avg::decay_count initialization sched/debug: Fix potential call to __ffs(0) in sched_show_task() sched/debug: Check for stack overflow in ___might_sleep() sched/fair: Fix the dealing with decay_count in __synchronize_entity_decay()
This commit is contained in:
commit
5b9b28a63f
11 changed files with 197 additions and 77 deletions
|
@ -989,6 +989,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
|
||||||
mode);
|
mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
|
||||||
|
* @word: the word being waited on, a kernel virtual address
|
||||||
|
* @bit: the bit of the word being waited on
|
||||||
|
* @mode: the task state to sleep in
|
||||||
|
* @timeout: timeout, in jiffies
|
||||||
|
*
|
||||||
|
* Use the standard hashed waitqueue table to wait for a bit
|
||||||
|
* to be cleared. This is similar to wait_on_bit(), except also takes a
|
||||||
|
* timeout parameter.
|
||||||
|
*
|
||||||
|
* Returned value will be zero if the bit was cleared before the
|
||||||
|
* @timeout elapsed, or non-zero if the @timeout elapsed or process
|
||||||
|
* received a signal and the mode permitted wakeup on that signal.
|
||||||
|
*/
|
||||||
|
static inline int
|
||||||
|
wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
|
||||||
|
{
|
||||||
|
might_sleep();
|
||||||
|
if (!test_bit(bit, word))
|
||||||
|
return 0;
|
||||||
|
return out_of_line_wait_on_bit_timeout(word, bit,
|
||||||
|
bit_wait_timeout,
|
||||||
|
mode, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_action - wait for a bit to be cleared
|
* wait_on_bit_action - wait for a bit to be cleared
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the word being waited on, a kernel virtual address
|
||||||
|
|
|
@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
|
||||||
* The mutex must later on be released by the same task that
|
* The mutex must later on be released by the same task that
|
||||||
* acquired it. Recursive locking is not allowed. The task
|
* acquired it. Recursive locking is not allowed. The task
|
||||||
* may not exit without first unlocking the mutex. Also, kernel
|
* may not exit without first unlocking the mutex. Also, kernel
|
||||||
* memory where the mutex resides mutex must not be freed with
|
* memory where the mutex resides must not be freed with
|
||||||
* the mutex still locked. The mutex must first be initialized
|
* the mutex still locked. The mutex must first be initialized
|
||||||
* (or statically defined) before it can be locked. memset()-ing
|
* (or statically defined) before it can be locked. memset()-ing
|
||||||
* the mutex to 0 is not allowed.
|
* the mutex to 0 is not allowed.
|
||||||
|
|
|
@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
|
||||||
{
|
{
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
|
||||||
if (rq->skip_clock_update > 0)
|
lockdep_assert_held(&rq->lock);
|
||||||
|
|
||||||
|
if (rq->clock_skip_update & RQCF_ACT_SKIP)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
||||||
|
@ -490,6 +492,11 @@ static __init void init_hrtick(void)
|
||||||
*/
|
*/
|
||||||
void hrtick_start(struct rq *rq, u64 delay)
|
void hrtick_start(struct rq *rq, u64 delay)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Don't schedule slices shorter than 10000ns, that just
|
||||||
|
* doesn't make sense. Rely on vruntime for fairness.
|
||||||
|
*/
|
||||||
|
delay = max_t(u64, delay, 10000LL);
|
||||||
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
|
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
|
||||||
HRTIMER_MODE_REL_PINNED, 0);
|
HRTIMER_MODE_REL_PINNED, 0);
|
||||||
}
|
}
|
||||||
|
@ -1046,7 +1053,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||||
* this case, we can save a useless back to back clock update.
|
* this case, we can save a useless back to back clock update.
|
||||||
*/
|
*/
|
||||||
if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
|
if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
|
||||||
rq->skip_clock_update = 1;
|
rq_clock_skip_update(rq, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -1836,6 +1843,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||||
p->se.prev_sum_exec_runtime = 0;
|
p->se.prev_sum_exec_runtime = 0;
|
||||||
p->se.nr_migrations = 0;
|
p->se.nr_migrations = 0;
|
||||||
p->se.vruntime = 0;
|
p->se.vruntime = 0;
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
p->se.avg.decay_count = 0;
|
||||||
|
#endif
|
||||||
INIT_LIST_HEAD(&p->se.group_node);
|
INIT_LIST_HEAD(&p->se.group_node);
|
||||||
|
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
|
@ -2755,6 +2765,10 @@ again:
|
||||||
* - explicit schedule() call
|
* - explicit schedule() call
|
||||||
* - return from syscall or exception to user-space
|
* - return from syscall or exception to user-space
|
||||||
* - return from interrupt-handler to user-space
|
* - return from interrupt-handler to user-space
|
||||||
|
*
|
||||||
|
* WARNING: all callers must re-check need_resched() afterward and reschedule
|
||||||
|
* accordingly in case an event triggered the need for rescheduling (such as
|
||||||
|
* an interrupt waking up a task) while preemption was disabled in __schedule().
|
||||||
*/
|
*/
|
||||||
static void __sched __schedule(void)
|
static void __sched __schedule(void)
|
||||||
{
|
{
|
||||||
|
@ -2763,7 +2777,6 @@ static void __sched __schedule(void)
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
need_resched:
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
||||||
rq = cpu_rq(cpu);
|
rq = cpu_rq(cpu);
|
||||||
|
@ -2783,6 +2796,8 @@ need_resched:
|
||||||
smp_mb__before_spinlock();
|
smp_mb__before_spinlock();
|
||||||
raw_spin_lock_irq(&rq->lock);
|
raw_spin_lock_irq(&rq->lock);
|
||||||
|
|
||||||
|
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
|
||||||
|
|
||||||
switch_count = &prev->nivcsw;
|
switch_count = &prev->nivcsw;
|
||||||
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
|
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
|
||||||
if (unlikely(signal_pending_state(prev->state, prev))) {
|
if (unlikely(signal_pending_state(prev->state, prev))) {
|
||||||
|
@ -2807,13 +2822,13 @@ need_resched:
|
||||||
switch_count = &prev->nvcsw;
|
switch_count = &prev->nvcsw;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
|
if (task_on_rq_queued(prev))
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
|
|
||||||
next = pick_next_task(rq, prev);
|
next = pick_next_task(rq, prev);
|
||||||
clear_tsk_need_resched(prev);
|
clear_tsk_need_resched(prev);
|
||||||
clear_preempt_need_resched();
|
clear_preempt_need_resched();
|
||||||
rq->skip_clock_update = 0;
|
rq->clock_skip_update = 0;
|
||||||
|
|
||||||
if (likely(prev != next)) {
|
if (likely(prev != next)) {
|
||||||
rq->nr_switches++;
|
rq->nr_switches++;
|
||||||
|
@ -2828,8 +2843,6 @@ need_resched:
|
||||||
post_schedule(rq);
|
post_schedule(rq);
|
||||||
|
|
||||||
sched_preempt_enable_no_resched();
|
sched_preempt_enable_no_resched();
|
||||||
if (need_resched())
|
|
||||||
goto need_resched;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sched_submit_work(struct task_struct *tsk)
|
static inline void sched_submit_work(struct task_struct *tsk)
|
||||||
|
@ -2849,7 +2862,9 @@ asmlinkage __visible void __sched schedule(void)
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
|
|
||||||
sched_submit_work(tsk);
|
sched_submit_work(tsk);
|
||||||
__schedule();
|
do {
|
||||||
|
__schedule();
|
||||||
|
} while (need_resched());
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(schedule);
|
EXPORT_SYMBOL(schedule);
|
||||||
|
|
||||||
|
@ -2884,6 +2899,21 @@ void __sched schedule_preempt_disabled(void)
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void preempt_schedule_common(void)
|
||||||
|
{
|
||||||
|
do {
|
||||||
|
__preempt_count_add(PREEMPT_ACTIVE);
|
||||||
|
__schedule();
|
||||||
|
__preempt_count_sub(PREEMPT_ACTIVE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check again in case we missed a preemption opportunity
|
||||||
|
* between schedule and now.
|
||||||
|
*/
|
||||||
|
barrier();
|
||||||
|
} while (need_resched());
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT
|
#ifdef CONFIG_PREEMPT
|
||||||
/*
|
/*
|
||||||
* this is the entry point to schedule() from in-kernel preemption
|
* this is the entry point to schedule() from in-kernel preemption
|
||||||
|
@ -2899,17 +2929,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
|
||||||
if (likely(!preemptible()))
|
if (likely(!preemptible()))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
do {
|
preempt_schedule_common();
|
||||||
__preempt_count_add(PREEMPT_ACTIVE);
|
|
||||||
__schedule();
|
|
||||||
__preempt_count_sub(PREEMPT_ACTIVE);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check again in case we missed a preemption opportunity
|
|
||||||
* between schedule and now.
|
|
||||||
*/
|
|
||||||
barrier();
|
|
||||||
} while (need_resched());
|
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(preempt_schedule);
|
NOKPROBE_SYMBOL(preempt_schedule);
|
||||||
EXPORT_SYMBOL(preempt_schedule);
|
EXPORT_SYMBOL(preempt_schedule);
|
||||||
|
@ -3405,6 +3425,20 @@ static bool check_same_owner(struct task_struct *p)
|
||||||
return match;
|
return match;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool dl_param_changed(struct task_struct *p,
|
||||||
|
const struct sched_attr *attr)
|
||||||
|
{
|
||||||
|
struct sched_dl_entity *dl_se = &p->dl;
|
||||||
|
|
||||||
|
if (dl_se->dl_runtime != attr->sched_runtime ||
|
||||||
|
dl_se->dl_deadline != attr->sched_deadline ||
|
||||||
|
dl_se->dl_period != attr->sched_period ||
|
||||||
|
dl_se->flags != attr->sched_flags)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int __sched_setscheduler(struct task_struct *p,
|
static int __sched_setscheduler(struct task_struct *p,
|
||||||
const struct sched_attr *attr,
|
const struct sched_attr *attr,
|
||||||
bool user)
|
bool user)
|
||||||
|
@ -3533,7 +3567,7 @@ recheck:
|
||||||
goto change;
|
goto change;
|
||||||
if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
|
if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
|
||||||
goto change;
|
goto change;
|
||||||
if (dl_policy(policy))
|
if (dl_policy(policy) && dl_param_changed(p, attr))
|
||||||
goto change;
|
goto change;
|
||||||
|
|
||||||
p->sched_reset_on_fork = reset_on_fork;
|
p->sched_reset_on_fork = reset_on_fork;
|
||||||
|
@ -4225,17 +4259,10 @@ SYSCALL_DEFINE0(sched_yield)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __cond_resched(void)
|
|
||||||
{
|
|
||||||
__preempt_count_add(PREEMPT_ACTIVE);
|
|
||||||
__schedule();
|
|
||||||
__preempt_count_sub(PREEMPT_ACTIVE);
|
|
||||||
}
|
|
||||||
|
|
||||||
int __sched _cond_resched(void)
|
int __sched _cond_resched(void)
|
||||||
{
|
{
|
||||||
if (should_resched()) {
|
if (should_resched()) {
|
||||||
__cond_resched();
|
preempt_schedule_common();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -4260,7 +4287,7 @@ int __cond_resched_lock(spinlock_t *lock)
|
||||||
if (spin_needbreak(lock) || resched) {
|
if (spin_needbreak(lock) || resched) {
|
||||||
spin_unlock(lock);
|
spin_unlock(lock);
|
||||||
if (resched)
|
if (resched)
|
||||||
__cond_resched();
|
preempt_schedule_common();
|
||||||
else
|
else
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
@ -4276,7 +4303,7 @@ int __sched __cond_resched_softirq(void)
|
||||||
|
|
||||||
if (should_resched()) {
|
if (should_resched()) {
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
__cond_resched();
|
preempt_schedule_common();
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -4531,9 +4558,10 @@ void sched_show_task(struct task_struct *p)
|
||||||
{
|
{
|
||||||
unsigned long free = 0;
|
unsigned long free = 0;
|
||||||
int ppid;
|
int ppid;
|
||||||
unsigned state;
|
unsigned long state = p->state;
|
||||||
|
|
||||||
state = p->state ? __ffs(p->state) + 1 : 0;
|
if (state)
|
||||||
|
state = __ffs(state) + 1;
|
||||||
printk(KERN_INFO "%-15.15s %c", p->comm,
|
printk(KERN_INFO "%-15.15s %c", p->comm,
|
||||||
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
|
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
|
||||||
#if BITS_PER_LONG == 32
|
#if BITS_PER_LONG == 32
|
||||||
|
@ -4766,7 +4794,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
|
||||||
|
|
||||||
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||||
{
|
{
|
||||||
if (p->sched_class && p->sched_class->set_cpus_allowed)
|
if (p->sched_class->set_cpus_allowed)
|
||||||
p->sched_class->set_cpus_allowed(p, new_mask);
|
p->sched_class->set_cpus_allowed(p, new_mask);
|
||||||
|
|
||||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||||
|
@ -7275,6 +7303,11 @@ void __init sched_init(void)
|
||||||
atomic_inc(&init_mm.mm_count);
|
atomic_inc(&init_mm.mm_count);
|
||||||
enter_lazy_tlb(&init_mm, current);
|
enter_lazy_tlb(&init_mm, current);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* During early bootup we pretend to be a normal task:
|
||||||
|
*/
|
||||||
|
current->sched_class = &fair_sched_class;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make us the idle thread. Technically, schedule() should not be
|
* Make us the idle thread. Technically, schedule() should not be
|
||||||
* called from this thread, however somewhere below it might be,
|
* called from this thread, however somewhere below it might be,
|
||||||
|
@ -7285,11 +7318,6 @@ void __init sched_init(void)
|
||||||
|
|
||||||
calc_load_update = jiffies + LOAD_FREQ;
|
calc_load_update = jiffies + LOAD_FREQ;
|
||||||
|
|
||||||
/*
|
|
||||||
* During early bootup we pretend to be a normal task:
|
|
||||||
*/
|
|
||||||
current->sched_class = &fair_sched_class;
|
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
|
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
|
||||||
/* May be allocated at isolcpus cmdline parse time */
|
/* May be allocated at isolcpus cmdline parse time */
|
||||||
|
@ -7350,6 +7378,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
|
||||||
in_atomic(), irqs_disabled(),
|
in_atomic(), irqs_disabled(),
|
||||||
current->pid, current->comm);
|
current->pid, current->comm);
|
||||||
|
|
||||||
|
if (task_stack_end_corrupted(current))
|
||||||
|
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
|
||||||
|
|
||||||
debug_show_held_locks(current);
|
debug_show_held_locks(current);
|
||||||
if (irqs_disabled())
|
if (irqs_disabled())
|
||||||
print_irqtrace_events(current);
|
print_irqtrace_events(current);
|
||||||
|
|
|
@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||||
int best_cpu = -1;
|
int best_cpu = -1;
|
||||||
const struct sched_dl_entity *dl_se = &p->dl;
|
const struct sched_dl_entity *dl_se = &p->dl;
|
||||||
|
|
||||||
if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
|
if (later_mask &&
|
||||||
|
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
|
||||||
best_cpu = cpumask_any(later_mask);
|
best_cpu = cpumask_any(later_mask);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
|
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
|
||||||
|
@ -185,6 +186,26 @@ out:
|
||||||
raw_spin_unlock_irqrestore(&cp->lock, flags);
|
raw_spin_unlock_irqrestore(&cp->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cpudl_set_freecpu - Set the cpudl.free_cpus
|
||||||
|
* @cp: the cpudl max-heap context
|
||||||
|
* @cpu: rd attached cpu
|
||||||
|
*/
|
||||||
|
void cpudl_set_freecpu(struct cpudl *cp, int cpu)
|
||||||
|
{
|
||||||
|
cpumask_set_cpu(cpu, cp->free_cpus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cpudl_clear_freecpu - Clear the cpudl.free_cpus
|
||||||
|
* @cp: the cpudl max-heap context
|
||||||
|
* @cpu: rd attached cpu
|
||||||
|
*/
|
||||||
|
void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
|
||||||
|
{
|
||||||
|
cpumask_clear_cpu(cpu, cp->free_cpus);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cpudl_init - initialize the cpudl structure
|
* cpudl_init - initialize the cpudl structure
|
||||||
* @cp: the cpudl max-heap context
|
* @cp: the cpudl max-heap context
|
||||||
|
@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp)
|
||||||
if (!cp->elements)
|
if (!cp->elements)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
|
if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
|
||||||
kfree(cp->elements);
|
kfree(cp->elements);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp)
|
||||||
for_each_possible_cpu(i)
|
for_each_possible_cpu(i)
|
||||||
cp->elements[i].idx = IDX_INVALID;
|
cp->elements[i].idx = IDX_INVALID;
|
||||||
|
|
||||||
cpumask_setall(cp->free_cpus);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||||
struct cpumask *later_mask);
|
struct cpumask *later_mask);
|
||||||
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
|
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
|
||||||
int cpudl_init(struct cpudl *cp);
|
int cpudl_init(struct cpudl *cp);
|
||||||
|
void cpudl_set_freecpu(struct cpudl *cp, int cpu);
|
||||||
|
void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
|
||||||
void cpudl_cleanup(struct cpudl *cp);
|
void cpudl_cleanup(struct cpudl *cp);
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
|
|
@ -350,6 +350,11 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||||
dl_se->runtime = pi_se->dl_runtime;
|
dl_se->runtime = pi_se->dl_runtime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dl_se->dl_yielded)
|
||||||
|
dl_se->dl_yielded = 0;
|
||||||
|
if (dl_se->dl_throttled)
|
||||||
|
dl_se->dl_throttled = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -536,23 +541,19 @@ again:
|
||||||
|
|
||||||
sched_clock_tick();
|
sched_clock_tick();
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
dl_se->dl_throttled = 0;
|
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
|
||||||
dl_se->dl_yielded = 0;
|
if (dl_task(rq->curr))
|
||||||
if (task_on_rq_queued(p)) {
|
check_preempt_curr_dl(rq, p, 0);
|
||||||
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
|
else
|
||||||
if (dl_task(rq->curr))
|
resched_curr(rq);
|
||||||
check_preempt_curr_dl(rq, p, 0);
|
|
||||||
else
|
|
||||||
resched_curr(rq);
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/*
|
/*
|
||||||
* Queueing this task back might have overloaded rq,
|
* Queueing this task back might have overloaded rq,
|
||||||
* check if we need to kick someone away.
|
* check if we need to kick someone away.
|
||||||
*/
|
*/
|
||||||
if (has_pushable_dl_tasks(rq))
|
if (has_pushable_dl_tasks(rq))
|
||||||
push_dl_task(rq);
|
push_dl_task(rq);
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
unlock:
|
unlock:
|
||||||
raw_spin_unlock(&rq->lock);
|
raw_spin_unlock(&rq->lock);
|
||||||
|
|
||||||
|
@ -613,10 +614,9 @@ static void update_curr_dl(struct rq *rq)
|
||||||
|
|
||||||
dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
|
dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
|
||||||
if (dl_runtime_exceeded(rq, dl_se)) {
|
if (dl_runtime_exceeded(rq, dl_se)) {
|
||||||
|
dl_se->dl_throttled = 1;
|
||||||
__dequeue_task_dl(rq, curr, 0);
|
__dequeue_task_dl(rq, curr, 0);
|
||||||
if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))
|
if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
|
||||||
dl_se->dl_throttled = 1;
|
|
||||||
else
|
|
||||||
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
|
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
|
||||||
|
|
||||||
if (!is_leftmost(curr, &rq->dl))
|
if (!is_leftmost(curr, &rq->dl))
|
||||||
|
@ -853,7 +853,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||||
* its rq, the bandwidth timer callback (which clearly has not
|
* its rq, the bandwidth timer callback (which clearly has not
|
||||||
* run yet) will take care of this.
|
* run yet) will take care of this.
|
||||||
*/
|
*/
|
||||||
if (p->dl.dl_throttled)
|
if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
enqueue_dl_entity(&p->dl, pi_se, flags);
|
enqueue_dl_entity(&p->dl, pi_se, flags);
|
||||||
|
@ -1073,7 +1073,13 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
|
||||||
{
|
{
|
||||||
update_curr_dl(rq);
|
update_curr_dl(rq);
|
||||||
|
|
||||||
if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)
|
/*
|
||||||
|
* Even when we have runtime, update_curr_dl() might have resulted in us
|
||||||
|
* not being the leftmost task anymore. In that case NEED_RESCHED will
|
||||||
|
* be set and schedule() will start a new hrtick for the next task.
|
||||||
|
*/
|
||||||
|
if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
|
||||||
|
is_leftmost(p, &rq->dl))
|
||||||
start_hrtick_dl(rq, p);
|
start_hrtick_dl(rq, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1166,9 +1172,6 @@ static int find_later_rq(struct task_struct *task)
|
||||||
* We have to consider system topology and task affinity
|
* We have to consider system topology and task affinity
|
||||||
* first, then we can look for a suitable cpu.
|
* first, then we can look for a suitable cpu.
|
||||||
*/
|
*/
|
||||||
cpumask_copy(later_mask, task_rq(task)->rd->span);
|
|
||||||
cpumask_and(later_mask, later_mask, cpu_active_mask);
|
|
||||||
cpumask_and(later_mask, later_mask, &task->cpus_allowed);
|
|
||||||
best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
|
best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
|
||||||
task, later_mask);
|
task, later_mask);
|
||||||
if (best_cpu == -1)
|
if (best_cpu == -1)
|
||||||
|
@ -1563,6 +1566,7 @@ static void rq_online_dl(struct rq *rq)
|
||||||
if (rq->dl.overloaded)
|
if (rq->dl.overloaded)
|
||||||
dl_set_overload(rq);
|
dl_set_overload(rq);
|
||||||
|
|
||||||
|
cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
|
||||||
if (rq->dl.dl_nr_running > 0)
|
if (rq->dl.dl_nr_running > 0)
|
||||||
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
|
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
|
||||||
}
|
}
|
||||||
|
@ -1574,6 +1578,7 @@ static void rq_offline_dl(struct rq *rq)
|
||||||
dl_clear_overload(rq);
|
dl_clear_overload(rq);
|
||||||
|
|
||||||
cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
|
cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
|
||||||
|
cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_sched_dl_class(void)
|
void init_sched_dl_class(void)
|
||||||
|
|
|
@ -305,6 +305,7 @@ do { \
|
||||||
PN(next_balance);
|
PN(next_balance);
|
||||||
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
|
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
|
||||||
PN(clock);
|
PN(clock);
|
||||||
|
PN(clock_task);
|
||||||
P(cpu_load[0]);
|
P(cpu_load[0]);
|
||||||
P(cpu_load[1]);
|
P(cpu_load[1]);
|
||||||
P(cpu_load[2]);
|
P(cpu_load[2]);
|
||||||
|
|
|
@ -676,7 +676,6 @@ void init_task_runnable_average(struct task_struct *p)
|
||||||
{
|
{
|
||||||
u32 slice;
|
u32 slice;
|
||||||
|
|
||||||
p->se.avg.decay_count = 0;
|
|
||||||
slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
|
slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
|
||||||
p->se.avg.runnable_avg_sum = slice;
|
p->se.avg.runnable_avg_sum = slice;
|
||||||
p->se.avg.runnable_avg_period = slice;
|
p->se.avg.runnable_avg_period = slice;
|
||||||
|
@ -2574,11 +2573,11 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
|
||||||
u64 decays = atomic64_read(&cfs_rq->decay_counter);
|
u64 decays = atomic64_read(&cfs_rq->decay_counter);
|
||||||
|
|
||||||
decays -= se->avg.decay_count;
|
decays -= se->avg.decay_count;
|
||||||
|
se->avg.decay_count = 0;
|
||||||
if (!decays)
|
if (!decays)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
|
se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
|
||||||
se->avg.decay_count = 0;
|
|
||||||
|
|
||||||
return decays;
|
return decays;
|
||||||
}
|
}
|
||||||
|
@ -5157,7 +5156,7 @@ static void yield_task_fair(struct rq *rq)
|
||||||
* so we don't do microscopic update in schedule()
|
* so we don't do microscopic update in schedule()
|
||||||
* and double the fastpath cost.
|
* and double the fastpath cost.
|
||||||
*/
|
*/
|
||||||
rq->skip_clock_update = 1;
|
rq_clock_skip_update(rq, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
set_skip_buddy(se);
|
set_skip_buddy(se);
|
||||||
|
@ -5949,8 +5948,8 @@ static unsigned long scale_rt_capacity(int cpu)
|
||||||
*/
|
*/
|
||||||
age_stamp = ACCESS_ONCE(rq->age_stamp);
|
age_stamp = ACCESS_ONCE(rq->age_stamp);
|
||||||
avg = ACCESS_ONCE(rq->rt_avg);
|
avg = ACCESS_ONCE(rq->rt_avg);
|
||||||
|
delta = __rq_clock_broken(rq) - age_stamp;
|
||||||
|
|
||||||
delta = rq_clock(rq) - age_stamp;
|
|
||||||
if (unlikely(delta < 0))
|
if (unlikely(delta < 0))
|
||||||
delta = 0;
|
delta = 0;
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,8 @@ static inline int cpu_idle_poll(void)
|
||||||
rcu_idle_enter();
|
rcu_idle_enter();
|
||||||
trace_cpu_idle_rcuidle(0, smp_processor_id());
|
trace_cpu_idle_rcuidle(0, smp_processor_id());
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
while (!tif_need_resched())
|
while (!tif_need_resched() &&
|
||||||
|
(cpu_idle_force_poll || tick_check_broadcast_expired()))
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||||
rcu_idle_exit();
|
rcu_idle_exit();
|
||||||
|
|
|
@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||||
enqueue = 1;
|
enqueue = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Force a clock update if the CPU was idle,
|
* When we're idle and a woken (rt) task is
|
||||||
* lest wakeup -> unthrottle time accumulate.
|
* throttled check_preempt_curr() will set
|
||||||
|
* skip_update and the time between the wakeup
|
||||||
|
* and this unthrottle will get accounted as
|
||||||
|
* 'runtime'.
|
||||||
*/
|
*/
|
||||||
if (rt_rq->rt_nr_running && rq->curr == rq->idle)
|
if (rt_rq->rt_nr_running && rq->curr == rq->idle)
|
||||||
rq->skip_clock_update = -1;
|
rq_clock_skip_update(rq, false);
|
||||||
}
|
}
|
||||||
if (rt_rq->rt_time || rt_rq->rt_nr_running)
|
if (rt_rq->rt_time || rt_rq->rt_nr_running)
|
||||||
idle = 0;
|
idle = 0;
|
||||||
|
@ -1337,7 +1340,12 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||||
curr->prio <= p->prio)) {
|
curr->prio <= p->prio)) {
|
||||||
int target = find_lowest_rq(p);
|
int target = find_lowest_rq(p);
|
||||||
|
|
||||||
if (target != -1)
|
/*
|
||||||
|
* Don't bother moving it if the destination CPU is
|
||||||
|
* not running a lower priority task.
|
||||||
|
*/
|
||||||
|
if (target != -1 &&
|
||||||
|
p->prio < cpu_rq(target)->rt.highest_prio.curr)
|
||||||
cpu = target;
|
cpu = target;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
@ -1614,6 +1622,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||||
|
|
||||||
lowest_rq = cpu_rq(cpu);
|
lowest_rq = cpu_rq(cpu);
|
||||||
|
|
||||||
|
if (lowest_rq->rt.highest_prio.curr <= task->prio) {
|
||||||
|
/*
|
||||||
|
* Target rq has tasks of equal or higher priority,
|
||||||
|
* retrying does not release any lock and is unlikely
|
||||||
|
* to yield a different result.
|
||||||
|
*/
|
||||||
|
lowest_rq = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* if the prio of this runqueue changed, try again */
|
/* if the prio of this runqueue changed, try again */
|
||||||
if (double_lock_balance(rq, lowest_rq)) {
|
if (double_lock_balance(rq, lowest_rq)) {
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -558,8 +558,6 @@ struct rq {
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
unsigned long last_sched_tick;
|
unsigned long last_sched_tick;
|
||||||
#endif
|
#endif
|
||||||
int skip_clock_update;
|
|
||||||
|
|
||||||
/* capture load from *all* tasks on this cpu: */
|
/* capture load from *all* tasks on this cpu: */
|
||||||
struct load_weight load;
|
struct load_weight load;
|
||||||
unsigned long nr_load_updates;
|
unsigned long nr_load_updates;
|
||||||
|
@ -588,6 +586,7 @@ struct rq {
|
||||||
unsigned long next_balance;
|
unsigned long next_balance;
|
||||||
struct mm_struct *prev_mm;
|
struct mm_struct *prev_mm;
|
||||||
|
|
||||||
|
unsigned int clock_skip_update;
|
||||||
u64 clock;
|
u64 clock;
|
||||||
u64 clock_task;
|
u64 clock_task;
|
||||||
|
|
||||||
|
@ -687,16 +686,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||||
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
|
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
|
||||||
#define raw_rq() raw_cpu_ptr(&runqueues)
|
#define raw_rq() raw_cpu_ptr(&runqueues)
|
||||||
|
|
||||||
|
static inline u64 __rq_clock_broken(struct rq *rq)
|
||||||
|
{
|
||||||
|
return ACCESS_ONCE(rq->clock);
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 rq_clock(struct rq *rq)
|
static inline u64 rq_clock(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&rq->lock);
|
||||||
return rq->clock;
|
return rq->clock;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 rq_clock_task(struct rq *rq)
|
static inline u64 rq_clock_task(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&rq->lock);
|
||||||
return rq->clock_task;
|
return rq->clock_task;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define RQCF_REQ_SKIP 0x01
|
||||||
|
#define RQCF_ACT_SKIP 0x02
|
||||||
|
|
||||||
|
static inline void rq_clock_skip_update(struct rq *rq, bool skip)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&rq->lock);
|
||||||
|
if (skip)
|
||||||
|
rq->clock_skip_update |= RQCF_REQ_SKIP;
|
||||||
|
else
|
||||||
|
rq->clock_skip_update &= ~RQCF_REQ_SKIP;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
enum numa_topology_type {
|
enum numa_topology_type {
|
||||||
NUMA_DIRECT,
|
NUMA_DIRECT,
|
||||||
|
|
Loading…
Add table
Reference in a new issue