From c3544e35ef382d2ae666464865c9ba779d2264d3 Mon Sep 17 00:00:00 2001 From: John Dias Date: Thu, 15 Sep 2016 08:52:27 -0700 Subject: [PATCH 1/2] sched: avoid scheduling RT threads on cores currently handling softirqs Bug: 31501544 Change-Id: I99dd7aaa12c11270b28dbabea484bcc8fb8ba0c1 Git-commit: 080ea011fd9f47315e1fc53185872ef813b59d00 Git-repo: https://android.googlesource.com/kernel/msm [pkondeti@codeaurora.org: resolved minor merge conflicts and fixed checkpatch warnings] Signed-off-by: Pavankumar Kondeti --- kernel/sched/cpupri.c | 37 +++++++++++++++++++++++++++++++++++-- kernel/sched/rt.c | 43 +++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 77 insertions(+), 8 deletions(-) diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 981fcd7dc394..1d00cf8c00fa 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -27,6 +27,8 @@ * of the License. */ +#include "sched.h" + #include #include #include @@ -50,6 +52,27 @@ static int convert_prio(int prio) return cpupri; } +/** + * drop_nopreempt_cpus - remove a cpu from the mask if it is likely + * non-preemptible + * @lowest_mask: mask with selected CPUs (non-NULL) + */ +static void +drop_nopreempt_cpus(struct cpumask *lowest_mask) +{ + unsigned int cpu = cpumask_first(lowest_mask); + + while (cpu < nr_cpu_ids) { + /* unlocked access */ + struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr); + + if (task_may_not_preempt(task, cpu)) + cpumask_clear_cpu(cpu, lowest_mask); + + cpu = cpumask_next(cpu, lowest_mask); + } +} + /** * cpupri_find - find the best (lowest-pri) CPU in the system * @cp: The cpupri context @@ -70,9 +93,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, { int idx = 0; int task_pri = convert_prio(p->prio); + bool drop_nopreempts = task_pri <= MAX_RT_PRIO; BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); +retry: for (idx = 0; idx < task_pri; idx++) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; int skip = 0; @@ -108,7 +133,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (lowest_mask) { cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); - + if (drop_nopreempts) + drop_nopreempt_cpus(lowest_mask); /* * We have to ensure that we have at least one bit * still set in the array, since the map could have @@ -123,7 +149,14 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, return 1; } - + /* + * If we can't find any non-preemptible cpu's, retry so we can + * find the lowest priority target and avoid priority inversion. + */ + if (drop_nopreempts) { + drop_nopreempts = false; + goto retry; + } return 0; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 29345ed74069..338d019d0f25 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -5,6 +5,7 @@ #include "sched.h" +#include #include #include #include @@ -1456,11 +1457,26 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags) } #endif +/* + * Return whether the task on the given cpu is currently non-preemptible + * while handling a softirq or is likely to block preemptions soon because + * it is a ksoftirq thread. + */ +bool +task_may_not_preempt(struct task_struct *task, int cpu) +{ + struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); + + return (task_thread_info(task)->preempt_count & SOFTIRQ_MASK) || + task == cpu_ksoftirqd; +} + static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; + bool may_not_preempt; #ifdef CONFIG_SCHED_HMP return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); @@ -1476,7 +1492,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) curr = READ_ONCE(rq->curr); /* unlocked access */ /* - * If the current task on @p's runqueue is an RT task, then + * If the current task on @p's runqueue is a softirq task, + * it may run without preemption for a time that is + * ill-suited for a waiting RT task. Therefore, try to + * wake this RT task on another runqueue. + * + * Also, if the current task on @p's runqueue is an RT task, then + * it may run without preemption for a time that is + * ill-suited for a waiting RT task. Therefore, try to + * wake this RT task on another runqueue. + * + * Also, if the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. @@ -1497,17 +1523,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. */ - if (curr && unlikely(rt_task(curr)) && + may_not_preempt = task_may_not_preempt(curr, cpu); + if (may_not_preempt || + (unlikely(rt_task(curr)) && (curr->nr_cpus_allowed < 2 || - curr->prio <= p->prio)) { + curr->prio <= p->prio))) { int target = find_lowest_rq(p); /* - * Don't bother moving it if the destination CPU is - * not running a lower priority task. + * If cpu is non-preemptible, prefer remote cpu + * even if it's running a higher-prio task. + * Otherwise: Don't bother moving it if the + * destination CPU is not running a lower priority task. */ if (target != -1 && - p->prio < cpu_rq(target)->rt.highest_prio.curr) + (may_not_preempt || + p->prio < cpu_rq(target)->rt.highest_prio.curr)) cpu = target; } rcu_read_unlock(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a6733b57bcbc..1580af0f0a35 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2671,6 +2671,11 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __release(rq2->lock); } +/* + * task_may_not_preempt - check whether a task may not be preemptible soon + */ +extern bool task_may_not_preempt(struct task_struct *task, int cpu); + #else /* CONFIG_SMP */ /* From 25e8ecf9daca3078043039ab56d4cb9e1a08bbbb Mon Sep 17 00:00:00 2001 From: John Dias Date: Wed, 5 Oct 2016 15:11:40 -0700 Subject: [PATCH 2/2] sched: avoid migrating when softint on tgt cpu should be short The scheduling change (bug 31501544) to avoid putting RT threads on cores that are handling softint's was catching cases where there was no reason to believe the softint would take a long time, resulting in unnecessary migration overhead. This patch reduces the migration to cases where the core has a softint that is actually likely to take a long time, as opposed to the RCU, SCHED, and TIMER softints that are rather quick. Bug: 31752786 Change-Id: Ib4e179f1e15c736b2fdba31070494e357e9fbbe2 Git-commit: ce05770bd37b8065b61ef650108ecef2b97b148b Git-repo: https://android.googlesource.com/kernel/msm [pkondeti@codeaurora.org: resolved minor merge conflicts] Signed-off-by: Pavankumar Kondeti --- include/linux/interrupt.h | 7 +++++++ kernel/sched/rt.c | 12 ++++++++---- kernel/softirq.c | 9 +++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809c8596..b3b1af8a8f8c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -423,6 +423,12 @@ enum }; #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* Softirq's where the handling might be long: */ +#define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ) | \ + (1 << NET_RX_SOFTIRQ) | \ + (1 << BLOCK_SOFTIRQ) | \ + (1 << BLOCK_IOPOLL_SOFTIRQ) | \ + (1 << TASKLET_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. @@ -458,6 +464,7 @@ extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); +DECLARE_PER_CPU(__u32, active_softirqs); static inline struct task_struct *this_cpu_ksoftirqd(void) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 338d019d0f25..4af75994f283 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1459,16 +1459,20 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags) /* * Return whether the task on the given cpu is currently non-preemptible - * while handling a softirq or is likely to block preemptions soon because - * it is a ksoftirq thread. + * while handling a potentially long softint, or if the task is likely + * to block preemptions soon because it is a ksoftirq thread that is + * handling slow softints. */ bool task_may_not_preempt(struct task_struct *task, int cpu) { + __u32 softirqs = per_cpu(active_softirqs, cpu) | + __IRQ_STAT(cpu, __softirq_pending); struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); - return (task_thread_info(task)->preempt_count & SOFTIRQ_MASK) || - task == cpu_ksoftirqd; + return ((softirqs & LONG_SOFTIRQ_MASK) && + (task == cpu_ksoftirqd || + task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); } static int diff --git a/kernel/softirq.c b/kernel/softirq.c index 479e4436f787..39ffd41594ce 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -57,6 +57,13 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +/* + * active_softirqs -- per cpu, a mask of softirqs that are being handled, + * with the expectation that approximate answers are acceptable and therefore + * no synchronization. + */ +DEFINE_PER_CPU(__u32, active_softirqs); + const char * const softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", "TASKLET", "SCHED", "HRTIMER", "RCU" @@ -253,6 +260,7 @@ asmlinkage __visible void __do_softirq(void) restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); + __this_cpu_write(active_softirqs, pending); local_irq_enable(); @@ -282,6 +290,7 @@ restart: pending >>= softirq_bit; } + __this_cpu_write(active_softirqs, 0); rcu_bh_qs(); local_irq_disable();