From b40bf941f61756bcca03a818b4c8fa857612f8cd Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 2 Mar 2015 11:25:17 -0800 Subject: [PATCH] sched: add scheduling latency tracking procfs node Add a new procfs node /proc/sys/kernel/sched_max_latency_us to track the worst scheduling latency. It provides easier way to identify maximum scheduling latency seen across the CPUs. Change-Id: I6e435bbf825c0a4dff2eded4a1256fb93f108d0e [joonwoop@codeaurora.org: fixed conflict in update_stats_wait_end().] Signed-off-by: Joonwoo Park --- Documentation/scheduler/sched-stats.txt | 7 +++ include/linux/sched/sysctl.h | 4 ++ kernel/sched/fair.c | 63 +++++++++++++++++++++++++ kernel/sysctl.c | 5 ++ 4 files changed, 79 insertions(+) diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 30635c5ac2f5..158e01697459 100644 --- a/Documentation/scheduler/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt @@ -156,6 +156,13 @@ Set sched_latency_warn_threshold_us or sched_latency_panic_threshold_us with non-zero threshold to warn or panic system when scheduling latency higher than configured threshold is detected. Default is 0 (disabled) for both. +/proc/sys/kernel/sched_max_latency_us +---------------- +/proc/sys/kernel/sched_max_latency_us shows the maximum scheduling latency seen +accross the CPUs. The file shows the maximum latency seen in microseconds along +with the cpu number or cpu id and the task that incurred maximum latency on that +cpu. The maximum latency can be reset by writing any value to the file. + A program could be easily written to make use of these extra fields to report on how well a particular process or set of processes is faring under the scheduler's policies. A simple version of such a program is diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index fdb75cab5243..c614b17c6069 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -132,6 +132,10 @@ extern unsigned int sysctl_sched_autogroup_enabled; #ifdef CONFIG_SCHEDSTATS extern unsigned int sysctl_sched_latency_panic_threshold; extern unsigned int sysctl_sched_latency_warn_threshold; + +extern int sched_max_latency_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); #endif extern int sched_rr_timeslice; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1da6091b54f3..ec3ee54800a8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -126,6 +126,14 @@ unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #ifdef CONFIG_SCHEDSTATS unsigned int sysctl_sched_latency_panic_threshold; unsigned int sysctl_sched_latency_warn_threshold; + +struct sched_max_latency { + unsigned int latency_us; + char comm[TASK_COMM_LEN]; + pid_t pid; +}; + +static DEFINE_PER_CPU(struct sched_max_latency, sched_max_latency); #endif /* CONFIG_SCHEDSTATS */ static inline void update_load_add(struct load_weight *lw, unsigned long inc) @@ -756,6 +764,54 @@ static void update_curr_fair(struct rq *rq) } #ifdef CONFIG_SCHEDSTATS +int sched_max_latency_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + int i, cpu = nr_cpu_ids; + char msg[256]; + unsigned long flags; + struct rq *rq; + struct sched_max_latency max, *lat; + + if (!write) { + max.latency_us = 0; + for_each_possible_cpu(i) { + rq = cpu_rq(i); + raw_spin_lock_irqsave(&rq->lock, flags); + + lat = &per_cpu(sched_max_latency, i); + if (max.latency_us < lat->latency_us) { + max = *lat; + cpu = i; + } + + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + + if (cpu != nr_cpu_ids) { + table->maxlen = + snprintf(msg, sizeof(msg), + "cpu%d comm=%s pid=%u latency=%u(us)", + cpu, max.comm, max.pid, max.latency_us); + table->data = msg; + ret = proc_dostring(table, write, buffer, lenp, ppos); + } + } else { + for_each_possible_cpu(i) { + rq = cpu_rq(i); + raw_spin_lock_irqsave(&rq->lock, flags); + + memset(&per_cpu(sched_max_latency, i), 0, + sizeof(struct sched_max_latency)); + + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + } + + return ret; +} + static inline void check_for_high_latency(struct task_struct *p, u64 latency_us) { int do_warn, do_panic; @@ -792,6 +848,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { struct task_struct *p; u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start; + struct sched_max_latency *max; if (entity_is_task(se)) { p = task_of(se); @@ -805,6 +862,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) return; } trace_sched_stat_wait(p, delta); + max = this_cpu_ptr(&sched_max_latency); + if (max->latency_us < delta >> 10) { + max->latency_us = delta; + max->pid = task_of(se)->pid; + memcpy(max->comm, task_of(se)->comm, TASK_COMM_LEN); + } check_for_high_latency(p, delta >> 10); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 188c7fabeec0..591cacea6a8d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -608,6 +608,11 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, }, + { + .procname = "sched_max_latency_us", + .mode = 0644, + .proc_handler = sched_max_latency_sysctl, + }, #endif #ifdef CONFIG_PROVE_LOCKING {