From af04b3a2bad55cbe37b013a41469fabecbf5636f Mon Sep 17 00:00:00 2001 From: Olav Haugan Date: Thu, 3 Nov 2016 15:10:57 -0700 Subject: [PATCH] sched: Ensure watchdog is enabled before disabling There is a race between watchdog being enabled by hotplug and core isolation disabling the watchdog. When a CPU is hotplugged in and the hotplug lock has been released the watchdog thread might not have run yet to enable the watchdog. We have to wait for the watchdog to be enabled before proceeding. Change-Id: I88f73603b6d389a46f8e819d9b490091d5ba4fe9 Signed-off-by: Olav Haugan --- include/linux/sched.h | 9 +++++++++ kernel/sched/core.c | 16 ++++++++++++++++ kernel/watchdog.c | 28 ++++++++++++++++++++-------- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b1351226b102..94635c8805c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,7 @@ extern unsigned int hardlockup_panic; void lockup_detector_init(void); extern void watchdog_enable(unsigned int cpu); extern void watchdog_disable(unsigned int cpu); +extern bool watchdog_configured(unsigned int cpu); #else static inline void touch_softlockup_watchdog_sched(void) { @@ -468,6 +469,14 @@ static inline void watchdog_enable(unsigned int cpu) static inline void watchdog_disable(unsigned int cpu) { } +static inline bool watchdog_configured(unsigned int cpu) +{ + /* + * Predend the watchdog is always configured. + * We will be waiting for the watchdog to be enabled in core isolation + */ + return true; +} #endif #ifdef CONFIG_DETECT_HUNG_TASK diff --git a/kernel/sched/core.c b/kernel/sched/core.c index be0a110cf788..1fb9b69f9be7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5704,6 +5704,22 @@ int sched_isolate_cpu(int cpu) if (++cpu_isolation_vote[cpu] > 1) goto out; + /* + * There is a race between watchdog being enabled by hotplug and + * core isolation disabling the watchdog. When a CPU is hotplugged in + * and the hotplug lock has been released the watchdog thread might + * not have run yet to enable the watchdog. + * We have to wait for the watchdog to be enabled before proceeding. + */ + if (!watchdog_configured(cpu)) { + msleep(20); + if (!watchdog_configured(cpu)) { + --cpu_isolation_vote[cpu]; + ret_code = -EBUSY; + goto out; + } + } + set_cpu_isolated(cpu, true); cpumask_clear_cpu(cpu, &avail_cpus); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f21591c8ec5..f2813e137b23 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -588,17 +588,13 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) sched_setscheduler(current, policy, ¶m); } -/* Must be called with hotplug lock (lock_device_hotplug()) held. */ void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); unsigned int *enabled = raw_cpu_ptr(&watchdog_en); - lock_device_hotplug_assert(); - if (*enabled) return; - *enabled = 1; /* kick off the timer for the hardlockup detector */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -614,24 +610,40 @@ void watchdog_enable(unsigned int cpu) /* initialize timestamp */ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); __touch_watchdog(); + + /* + * Need to ensure above operations are observed by other CPUs before + * indicating that timer is enabled. This is to synchronize core + * isolation and hotplug. Core isolation will wait for this flag to be + * set. + */ + mb(); + *enabled = 1; } -/* Must be called with hotplug lock (lock_device_hotplug()) held. */ void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); unsigned int *enabled = raw_cpu_ptr(&watchdog_en); - lock_device_hotplug_assert(); - if (!*enabled) return; - *enabled = 0; watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); /* disable the perf event */ watchdog_nmi_disable(cpu); + + /* + * No need for barrier here since disabling the watchdog is + * synchronized with hotplug lock + */ + *enabled = 0; +} + +bool watchdog_configured(unsigned int cpu) +{ + return *per_cpu_ptr(&watchdog_en, cpu); } static void watchdog_cleanup(unsigned int cpu, bool online)