diff --git a/include/linux/sched.h b/include/linux/sched.h index b1351226b102..94635c8805c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,7 @@ extern unsigned int hardlockup_panic; void lockup_detector_init(void); extern void watchdog_enable(unsigned int cpu); extern void watchdog_disable(unsigned int cpu); +extern bool watchdog_configured(unsigned int cpu); #else static inline void touch_softlockup_watchdog_sched(void) { @@ -468,6 +469,14 @@ static inline void watchdog_enable(unsigned int cpu) static inline void watchdog_disable(unsigned int cpu) { } +static inline bool watchdog_configured(unsigned int cpu) +{ + /* + * Predend the watchdog is always configured. + * We will be waiting for the watchdog to be enabled in core isolation + */ + return true; +} #endif #ifdef CONFIG_DETECT_HUNG_TASK diff --git a/kernel/sched/core.c b/kernel/sched/core.c index be0a110cf788..1fb9b69f9be7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5704,6 +5704,22 @@ int sched_isolate_cpu(int cpu) if (++cpu_isolation_vote[cpu] > 1) goto out; + /* + * There is a race between watchdog being enabled by hotplug and + * core isolation disabling the watchdog. When a CPU is hotplugged in + * and the hotplug lock has been released the watchdog thread might + * not have run yet to enable the watchdog. + * We have to wait for the watchdog to be enabled before proceeding. + */ + if (!watchdog_configured(cpu)) { + msleep(20); + if (!watchdog_configured(cpu)) { + --cpu_isolation_vote[cpu]; + ret_code = -EBUSY; + goto out; + } + } + set_cpu_isolated(cpu, true); cpumask_clear_cpu(cpu, &avail_cpus); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f21591c8ec5..f2813e137b23 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -588,17 +588,13 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) sched_setscheduler(current, policy, ¶m); } -/* Must be called with hotplug lock (lock_device_hotplug()) held. */ void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); unsigned int *enabled = raw_cpu_ptr(&watchdog_en); - lock_device_hotplug_assert(); - if (*enabled) return; - *enabled = 1; /* kick off the timer for the hardlockup detector */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -614,24 +610,40 @@ void watchdog_enable(unsigned int cpu) /* initialize timestamp */ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); __touch_watchdog(); + + /* + * Need to ensure above operations are observed by other CPUs before + * indicating that timer is enabled. This is to synchronize core + * isolation and hotplug. Core isolation will wait for this flag to be + * set. + */ + mb(); + *enabled = 1; } -/* Must be called with hotplug lock (lock_device_hotplug()) held. */ void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); unsigned int *enabled = raw_cpu_ptr(&watchdog_en); - lock_device_hotplug_assert(); - if (!*enabled) return; - *enabled = 0; watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); /* disable the perf event */ watchdog_nmi_disable(cpu); + + /* + * No need for barrier here since disabling the watchdog is + * synchronized with hotplug lock + */ + *enabled = 0; +} + +bool watchdog_configured(unsigned int cpu) +{ + return *per_cpu_ptr(&watchdog_en, cpu); } static void watchdog_cleanup(unsigned int cpu, bool online)