Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - various sched/deadline fixes and enhancements - rescheduling latency fixes/cleanups - rework the rq->clock code to be more consistent and more robust. - minor micro-optimizations - ->avg.decay_count fixes - add a stack overflow check to might_sleep() - idle-poll handler fix, possibly resulting in power savings - misc smaller updates and fixes" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/Documentation: Remove unneeded word sched/wait: Introduce wait_on_bit_timeout() sched: Pull resched loop to __schedule() callers sched/deadline: Remove cpu_active_mask from cpudl_find() sched: Fix hrtick_start() on UP sched/deadline: Avoid pointless __setscheduler() sched/deadline: Fix stale yield state sched/deadline: Fix hrtick for a non-leftmost task sched/deadline: Modify cpudl::free_cpus to reflect rd->online sched/idle: Add missing checks to the exit condition of cpu_idle_poll() sched: Fix missing preemption opportunity sched/rt: Reduce rq lock contention by eliminating locking of non-feasible target sched/debug: Print rq->clock_task sched/core: Rework rq->clock update skips sched/core: Validate rq_clock*() serialization sched/core: Remove check of p->sched_class sched/fair: Fix sched_entity::avg::decay_count initialization sched/debug: Fix potential call to __ffs(0) in sched_show_task() sched/debug: Check for stack overflow in ___might_sleep() sched/fair: Fix the dealing with decay_count in __synchronize_entity_decay()
2015-02-09 16:06:06 -08:00 · 2015-02-09 16:06:06 -08:00 · 5b9b28a63f
commit 5b9b28a63f
parent a4cbbf549a 139b6fd26d
11 changed files with 197 additions and 77 deletions
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@ -989,6 +989,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
 				       mode);
 }
 /**
 * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
 * @word: the word being waited on, a kernel virtual address
 * @bit: the bit of the word being waited on
 * @mode: the task state to sleep in
 * @timeout: timeout, in jiffies
 *
 * Use the standard hashed waitqueue table to wait for a bit
 * to be cleared. This is similar to wait_on_bit(), except also takes a
 * timeout parameter.
 *
 * Returned value will be zero if the bit was cleared before the
 * @timeout elapsed, or non-zero if the @timeout elapsed or process
 * received a signal and the mode permitted wakeup on that signal.
 */
 static inline int
 wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
 {
 	might_sleep();
 	if (!test_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit_timeout(word, bit,
 					       bit_wait_timeout,
 					       mode, timeout);
 }
 /**
 * wait_on_bit_action - wait for a bit to be cleared
 * @word: the word being waited on, a kernel virtual address
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 * The mutex must later on be released by the same task that
 * acquired it. Recursive locking is not allowed. The task
 * may not exit without first unlocking the mutex. Also, kernel
- * memory where the mutex resides mutex must not be freed with
+ * memory where the mutex resides must not be freed with
 * the mutex still locked. The mutex must first be initialized
 * (or statically defined) before it can be locked. memset()-ing
 * the mutex to 0 is not allowed.
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
 {
 	s64 delta;
-	if (rq->skip_clock_update > 0)
+	lockdep_assert_held(&rq->lock);
 	if (rq->clock_skip_update & RQCF_ACT_SKIP)
 		return;
 	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@ -490,6 +492,11 @@ static __init void init_hrtick(void)
 */
 void hrtick_start(struct rq *rq, u64 delay)
 {
 	/*
 	 * Don't schedule slices shorter than 10000ns, that just
 	 * doesn't make sense. Rely on vruntime for fairness.
 	 */
 	delay = max_t(u64, delay, 10000LL);
 	__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
 			HRTIMER_MODE_REL_PINNED, 0);
 }
@ -1046,7 +1053,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * this case, we can save a useless back to back clock update.
 	 */
 	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
-		rq->skip_clock_update = 1;
+		rq_clock_skip_update(rq, true);
 }
 #ifdef CONFIG_SMP
@ -1836,6 +1843,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 #ifdef CONFIG_SMP
 	p->se.avg.decay_count		= 0;
 #endif
 	INIT_LIST_HEAD(&p->se.group_node);
 #ifdef CONFIG_SCHEDSTATS
@ -2755,6 +2765,10 @@ again:
 *          - explicit schedule() call
 *          - return from syscall or exception to user-space
 *          - return from interrupt-handler to user-space
 *
 * WARNING: all callers must re-check need_resched() afterward and reschedule
 * accordingly in case an event triggered the need for rescheduling (such as
 * an interrupt waking up a task) while preemption was disabled in __schedule().
 */
 static void __sched __schedule(void)
 {
@ -2763,7 +2777,6 @@ static void __sched __schedule(void)
 	struct rq *rq;
 	int cpu;
 need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
@ -2783,6 +2796,8 @@ need_resched:
 	smp_mb__before_spinlock();
 	raw_spin_lock_irq(&rq->lock);
 	rq->clock_skip_update <<= 1; /* promote REQ to ACT */
 	switch_count = &prev->nivcsw;
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev))) {
@ -2807,13 +2822,13 @@ need_resched:
 		switch_count = &prev->nvcsw;
 	}
-	if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
+	if (task_on_rq_queued(prev))
 		update_rq_clock(rq);
 	next = pick_next_task(rq, prev);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
-	rq->skip_clock_update = 0;
+	rq->clock_skip_update = 0;
 	if (likely(prev != next)) {
 		rq->nr_switches++;
@ -2828,8 +2843,6 @@ need_resched:
 	post_schedule(rq);
 	sched_preempt_enable_no_resched();
 	if (need_resched())
 		goto need_resched;
 }
 static inline void sched_submit_work(struct task_struct *tsk)
@ -2849,7 +2862,9 @@ asmlinkage __visible void __sched schedule(void)
 	struct task_struct *tsk = current;
 	sched_submit_work(tsk);
-	__schedule();
+	do {
 		__schedule();
 	} while (need_resched());
 }
 EXPORT_SYMBOL(schedule);
@ -2884,6 +2899,21 @@ void __sched schedule_preempt_disabled(void)
 	preempt_disable();
 }
 static void preempt_schedule_common(void)
 {
 	do {
 		__preempt_count_add(PREEMPT_ACTIVE);
 		__schedule();
 		__preempt_count_sub(PREEMPT_ACTIVE);
 		/*
 		 * Check again in case we missed a preemption opportunity
 		 * between schedule and now.
 		 */
 		barrier();
 	} while (need_resched());
 }
 #ifdef CONFIG_PREEMPT
 /*
 * this is the entry point to schedule() from in-kernel preemption
@ -2899,17 +2929,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
 	if (likely(!preemptible()))
 		return;
-	do {
+	preempt_schedule_common();
 		__preempt_count_add(PREEMPT_ACTIVE);
 		__schedule();
 		__preempt_count_sub(PREEMPT_ACTIVE);
 		/*
 		 * Check again in case we missed a preemption opportunity
 		 * between schedule and now.
 		 */
 		barrier();
 	} while (need_resched());
 }
 NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
@ -3405,6 +3425,20 @@ static bool check_same_owner(struct task_struct *p)
 	return match;
 }
 static bool dl_param_changed(struct task_struct *p,
 		const struct sched_attr *attr)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
 	if (dl_se->dl_runtime != attr->sched_runtime ||
 		dl_se->dl_deadline != attr->sched_deadline ||
 		dl_se->dl_period != attr->sched_period ||
 		dl_se->flags != attr->sched_flags)
 		return true;
 	return false;
 }
 static int __sched_setscheduler(struct task_struct *p,
 				const struct sched_attr *attr,
 				bool user)
@ -3533,7 +3567,7 @@ recheck:
 			goto change;
 		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
 			goto change;
-		if (dl_policy(policy))
+		if (dl_policy(policy) && dl_param_changed(p, attr))
 			goto change;
 		p->sched_reset_on_fork = reset_on_fork;
@ -4225,17 +4259,10 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 static void __cond_resched(void)
 {
 	__preempt_count_add(PREEMPT_ACTIVE);
 	__schedule();
 	__preempt_count_sub(PREEMPT_ACTIVE);
 }
 int __sched _cond_resched(void)
 {
 	if (should_resched()) {
-		__cond_resched();
+		preempt_schedule_common();
 		return 1;
 	}
 	return 0;
@ -4260,7 +4287,7 @@ int __cond_resched_lock(spinlock_t *lock)
 	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
 		if (resched)
-			__cond_resched();
+			preempt_schedule_common();
 		else
 			cpu_relax();
 		ret = 1;
@ -4276,7 +4303,7 @@ int __sched __cond_resched_softirq(void)
 	if (should_resched()) {
 		local_bh_enable();
-		__cond_resched();
+		preempt_schedule_common();
 		local_bh_disable();
 		return 1;
 	}
@ -4531,9 +4558,10 @@ void sched_show_task(struct task_struct *p)
 {
 	unsigned long free = 0;
 	int ppid;
-	unsigned state;
+	unsigned long state = p->state;
-	state = p->state ? __ffs(p->state) + 1 : 0;
+	if (state)
 		state = __ffs(state) + 1;
 	printk(KERN_INFO "%-15.15s %c", p->comm,
 		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
@ -4766,7 +4794,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
-	if (p->sched_class && p->sched_class->set_cpus_allowed)
+	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 	cpumask_copy(&p->cpus_allowed, new_mask);
@ -7275,6 +7303,11 @@ void __init sched_init(void)
 	atomic_inc(&init_mm.mm_count);
 	enter_lazy_tlb(&init_mm, current);
 	/*
 	 * During early bootup we pretend to be a normal task:
 	 */
 	current->sched_class = &fair_sched_class;
 	/*
 	 * Make us the idle thread. Technically, schedule() should not be
 	 * called from this thread, however somewhere below it might be,
@ -7285,11 +7318,6 @@ void __init sched_init(void)
 	calc_load_update = jiffies + LOAD_FREQ;
 	/*
 	 * During early bootup we pretend to be a normal task:
 	 */
 	current->sched_class = &fair_sched_class;
 #ifdef CONFIG_SMP
 	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
 	/* May be allocated at isolcpus cmdline parse time */
@ -7350,6 +7378,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 			in_atomic(), irqs_disabled(),
 			current->pid, current->comm);
 	if (task_stack_end_corrupted(current))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 	debug_show_held_locks(current);
 	if (irqs_disabled())
 		print_irqtrace_events(current);
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
-	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
+	if (later_mask &&
 	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
@ -185,6 +186,26 @@ out:
 	raw_spin_unlock_irqrestore(&cp->lock, flags);
 }
 /*
 * cpudl_set_freecpu - Set the cpudl.free_cpus
 * @cp: the cpudl max-heap context
 * @cpu: rd attached cpu
 */
 void cpudl_set_freecpu(struct cpudl *cp, int cpu)
 {
 	cpumask_set_cpu(cpu, cp->free_cpus);
 }
 /*
 * cpudl_clear_freecpu - Clear the cpudl.free_cpus
 * @cp: the cpudl max-heap context
 * @cpu: rd attached cpu
 */
 void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
 {
 	cpumask_clear_cpu(cpu, cp->free_cpus);
 }
 /*
 * cpudl_init - initialize the cpudl structure
 * @cp: the cpudl max-heap context
@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp)
 	if (!cp->elements)
 		return -ENOMEM;
-	if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
 		kfree(cp->elements);
 		return -ENOMEM;
 	}
@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp)
 	for_each_possible_cpu(i)
 		cp->elements[i].idx = IDX_INVALID;
 	cpumask_setall(cp->free_cpus);
 	return 0;
 }
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	       struct cpumask *later_mask);
 void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
 int cpudl_init(struct cpudl *cp);
 void cpudl_set_freecpu(struct cpudl *cp, int cpu);
 void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
 void cpudl_cleanup(struct cpudl *cp);
 #endif /* CONFIG_SMP */
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@ -350,6 +350,11 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 	if (dl_se->dl_yielded)
 		dl_se->dl_yielded = 0;
 	if (dl_se->dl_throttled)
 		dl_se->dl_throttled = 0;
 }
 /*
@ -536,23 +541,19 @@ again:
 	sched_clock_tick();
 	update_rq_clock(rq);
-	dl_se->dl_throttled = 0;
+	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
-	dl_se->dl_yielded = 0;
+	if (dl_task(rq->curr))
-	if (task_on_rq_queued(p)) {
+		check_preempt_curr_dl(rq, p, 0);
-		enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
+	else
-		if (dl_task(rq->curr))
+		resched_curr(rq);
 			check_preempt_curr_dl(rq, p, 0);
 		else
 			resched_curr(rq);
 #ifdef CONFIG_SMP
-		/*
+	/*
-		 * Queueing this task back might have overloaded rq,
+	 * Queueing this task back might have overloaded rq,
-		 * check if we need to kick someone away.
+	 * check if we need to kick someone away.
-		 */
+	 */
-		if (has_pushable_dl_tasks(rq))
+	if (has_pushable_dl_tasks(rq))
-			push_dl_task(rq);
+		push_dl_task(rq);
 #endif
 	}
 unlock:
 	raw_spin_unlock(&rq->lock);
@ -613,10 +614,9 @@ static void update_curr_dl(struct rq *rq)
 	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
 	if (dl_runtime_exceeded(rq, dl_se)) {
 		dl_se->dl_throttled = 1;
 		__dequeue_task_dl(rq, curr, 0);
-		if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))
+		if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
 			dl_se->dl_throttled = 1;
 		else
 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
 		if (!is_leftmost(curr, &rq->dl))
@ -853,7 +853,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * its rq, the bandwidth timer callback (which clearly has not
 	 * run yet) will take care of this.
 	 */
-	if (p->dl.dl_throttled)
+	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
 		return;
 	enqueue_dl_entity(&p->dl, pi_se, flags);
@ -1073,7 +1073,13 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
 {
 	update_curr_dl(rq);
-	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)
+	/*
 	 * Even when we have runtime, update_curr_dl() might have resulted in us
 	 * not being the leftmost task anymore. In that case NEED_RESCHED will
 	 * be set and schedule() will start a new hrtick for the next task.
 	 */
 	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
 	    is_leftmost(p, &rq->dl))
 		start_hrtick_dl(rq, p);
 }
@ -1166,9 +1172,6 @@ static int find_later_rq(struct task_struct *task)
 	 * We have to consider system topology and task affinity
 	 * first, then we can look for a suitable cpu.
 	 */
 	cpumask_copy(later_mask, task_rq(task)->rd->span);
 	cpumask_and(later_mask, later_mask, cpu_active_mask);
 	cpumask_and(later_mask, later_mask, &task->cpus_allowed);
 	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
 			task, later_mask);
 	if (best_cpu == -1)
@ -1563,6 +1566,7 @@ static void rq_online_dl(struct rq *rq)
 	if (rq->dl.overloaded)
 		dl_set_overload(rq);
 	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
 	if (rq->dl.dl_nr_running > 0)
 		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
 }
@ -1574,6 +1578,7 @@ static void rq_offline_dl(struct rq *rq)
 		dl_clear_overload(rq);
 	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
 	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
 }
 void init_sched_dl_class(void)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@ -305,6 +305,7 @@ do {									\
 	PN(next_balance);
 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
 	PN(clock);
 	PN(clock_task);
 	P(cpu_load[0]);
 	P(cpu_load[1]);
 	P(cpu_load[2]);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@ -676,7 +676,6 @@ void init_task_runnable_average(struct task_struct *p)
 {
 	u32 slice;
 	p->se.avg.decay_count = 0;
 	slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
 	p->se.avg.runnable_avg_sum = slice;
 	p->se.avg.runnable_avg_period = slice;
@ -2574,11 +2573,11 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
 	u64 decays = atomic64_read(&cfs_rq->decay_counter);
 	decays -= se->avg.decay_count;
 	se->avg.decay_count = 0;
 	if (!decays)
 		return 0;
 	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
 	se->avg.decay_count = 0;
 	return decays;
 }
@ -5157,7 +5156,7 @@ static void yield_task_fair(struct rq *rq)
 		 * so we don't do microscopic update in schedule()
 		 * and double the fastpath cost.
 		 */
-		 rq->skip_clock_update = 1;
+		rq_clock_skip_update(rq, true);
 	}
 	set_skip_buddy(se);
@ -5949,8 +5948,8 @@ static unsigned long scale_rt_capacity(int cpu)
 	 */
 	age_stamp = ACCESS_ONCE(rq->age_stamp);
 	avg = ACCESS_ONCE(rq->rt_avg);
 	delta = __rq_clock_broken(rq) - age_stamp;
 	delta = rq_clock(rq) - age_stamp;
 	if (unlikely(delta < 0))
 		delta = 0;
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@ -47,7 +47,8 @@ static inline int cpu_idle_poll(void)
 	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
-	while (!tif_need_resched())
+	while (!tif_need_resched() &&
 		(cpu_idle_force_poll || tick_check_broadcast_expired()))
 		cpu_relax();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 				enqueue = 1;
 				/*
-				 * Force a clock update if the CPU was idle,
+				 * When we're idle and a woken (rt) task is
-				 * lest wakeup -> unthrottle time accumulate.
+				 * throttled check_preempt_curr() will set
 				 * skip_update and the time between the wakeup
 				 * and this unthrottle will get accounted as
 				 * 'runtime'.
 				 */
 				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
-					rq->skip_clock_update = -1;
+					rq_clock_skip_update(rq, false);
 			}
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
@ -1337,7 +1340,12 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 	     curr->prio <= p->prio)) {
 		int target = find_lowest_rq(p);
-		if (target != -1)
+		/*
 		 * Don't bother moving it if the destination CPU is
 		 * not running a lower priority task.
 		 */
 		if (target != -1 &&
 		    p->prio < cpu_rq(target)->rt.highest_prio.curr)
 			cpu = target;
 	}
 	rcu_read_unlock();
@ -1614,6 +1622,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 		lowest_rq = cpu_rq(cpu);
 		if (lowest_rq->rt.highest_prio.curr <= task->prio) {
 			/*
 			 * Target rq has tasks of equal or higher priority,
 			 * retrying does not release any lock and is unlikely
 			 * to yield a different result.
 			 */
 			lowest_rq = NULL;
 			break;
 		}
 		/* if the prio of this runqueue changed, try again */
 		if (double_lock_balance(rq, lowest_rq)) {
 			/*
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@ -558,8 +558,6 @@ struct rq {
 #ifdef CONFIG_NO_HZ_FULL
 	unsigned long last_sched_tick;
 #endif
 	int skip_clock_update;
 	/* capture load from *all* tasks on this cpu: */
 	struct load_weight load;
 	unsigned long nr_load_updates;
@ -588,6 +586,7 @@ struct rq {
 	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 	unsigned int clock_skip_update;
 	u64 clock;
 	u64 clock_task;
@ -687,16 +686,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 static inline u64 __rq_clock_broken(struct rq *rq)
 {
 	return ACCESS_ONCE(rq->clock);
 }
 static inline u64 rq_clock(struct rq *rq)
 {
 	lockdep_assert_held(&rq->lock);
 	return rq->clock;
 }
 static inline u64 rq_clock_task(struct rq *rq)
 {
 	lockdep_assert_held(&rq->lock);
 	return rq->clock_task;
 }
 #define RQCF_REQ_SKIP	0x01
 #define RQCF_ACT_SKIP	0x02
 static inline void rq_clock_skip_update(struct rq *rq, bool skip)
 {
 	lockdep_assert_held(&rq->lock);
 	if (skip)
 		rq->clock_skip_update |= RQCF_REQ_SKIP;
 	else
 		rq->clock_skip_update &= ~RQCF_REQ_SKIP;
 }
 #ifdef CONFIG_NUMA
 enum numa_topology_type {
 	NUMA_DIRECT,