The following method of CPU hotplug callback registration is not safe due to the possibility of an ABBA deadlock involving the cpu_add_remove_lock and the cpu_hotplug.lock. get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); The deadlock is shown below: CPU 0 CPU 1 ----- ----- Acquire cpu_hotplug.lock [via get_online_cpus()] CPU online/offline operation takes cpu_add_remove_lock [via cpu_maps_update_begin()] Try to acquire cpu_add_remove_lock [via register_cpu_notifier()] CPU online/offline operation tries to acquire cpu_hotplug.lock [via cpu_hotplug_begin()] *** DEADLOCK! *** The problem here is that callback registration takes the locks in one order whereas the CPU hotplug operations take the same locks in the opposite order. To avoid this issue and to provide a race-free method to register CPU hotplug callbacks (along with initialization of already online CPUs), introduce new variants of the callback registration APIs that simply register the callbacks without holding the cpu_add_remove_lock during the registration. That way, we can avoid the ABBA scenario. However, we will need to hold the cpu_add_remove_lock throughout the entire critical section, to protect updates to the callback/notifier chain. This can be achieved by writing the callback registration code as follows: cpu_maps_update_begin(); [ or cpu_notifier_register_begin(); see below ] for_each_online_cpu(cpu) init_cpu(cpu); /* This doesn't take the cpu_add_remove_lock */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_maps_update_done(); [ or cpu_notifier_register_done(); see below ] Note that we can't use get_online_cpus() here instead of cpu_maps_update_begin() because the cpu_hotplug.lock is dropped during the invocation of CPU_POST_DEAD notifiers, and hence get_online_cpus() cannot provide the necessary synchronization to protect the callback/notifier chains against concurrent reads and writes. On the other hand, since the cpu_add_remove_lock protects the entire hotplug operation (including CPU_POST_DEAD), we can use cpu_maps_update_begin/done() to guarantee proper synchronization. Also, since cpu_maps_update_begin/done() is like a super-set of get/put_online_cpus(), the former naturally protects the critical sections from concurrent hotplug operations. Since the names cpu_maps_update_begin/done() don't make much sense in CPU hotplug callback registration scenarios, we'll introduce new APIs named cpu_notifier_register_begin/done() and map them to cpu_maps_update_begin/done(). In summary, introduce the lockless variants of un/register_cpu_notifier() and also export the cpu_notifier_register_begin/done() APIs for use by modules. This way, we provide a race-free way to register hotplug callbacks as well as perform initialization for the CPUs that are already online. Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Acked-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Toshi Kani <toshi.kani@hp.com> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
276 lines
8.5 KiB
C
276 lines
8.5 KiB
C
/*
|
|
* include/linux/cpu.h - generic cpu definition
|
|
*
|
|
* This is mainly for topological representation. We define the
|
|
* basic 'struct cpu' here, which can be embedded in per-arch
|
|
* definitions of processors.
|
|
*
|
|
* Basic handling of the devices is done in drivers/base/cpu.c
|
|
*
|
|
* CPUs are exported via sysfs in the devices/system/cpu
|
|
* directory.
|
|
*/
|
|
#ifndef _LINUX_CPU_H_
|
|
#define _LINUX_CPU_H_
|
|
|
|
#include <linux/node.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/cpumask.h>
|
|
|
|
struct device;
|
|
struct device_node;
|
|
|
|
struct cpu {
|
|
int node_id; /* The node which contains the CPU */
|
|
int hotpluggable; /* creates sysfs control file if hotpluggable */
|
|
struct device dev;
|
|
};
|
|
|
|
extern int register_cpu(struct cpu *cpu, int num);
|
|
extern struct device *get_cpu_device(unsigned cpu);
|
|
extern bool cpu_is_hotpluggable(unsigned cpu);
|
|
extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id);
|
|
extern bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
|
|
int cpu, unsigned int *thread);
|
|
|
|
extern int cpu_add_dev_attr(struct device_attribute *attr);
|
|
extern void cpu_remove_dev_attr(struct device_attribute *attr);
|
|
|
|
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
|
|
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
extern void unregister_cpu(struct cpu *cpu);
|
|
extern ssize_t arch_cpu_probe(const char *, size_t);
|
|
extern ssize_t arch_cpu_release(const char *, size_t);
|
|
#endif
|
|
struct notifier_block;
|
|
|
|
#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
|
|
extern int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env);
|
|
extern ssize_t arch_print_cpu_modalias(struct device *dev,
|
|
struct device_attribute *attr,
|
|
char *bufptr);
|
|
#endif
|
|
|
|
/*
|
|
* CPU notifier priorities.
|
|
*/
|
|
enum {
|
|
/*
|
|
* SCHED_ACTIVE marks a cpu which is coming up active during
|
|
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
|
|
* notifier. CPUSET_ACTIVE adjusts cpuset according to
|
|
* cpu_active mask right after SCHED_ACTIVE. During
|
|
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
|
|
* ordered in the similar way.
|
|
*
|
|
* This ordering guarantees consistent cpu_active mask and
|
|
* migration behavior to all cpu notifiers.
|
|
*/
|
|
CPU_PRI_SCHED_ACTIVE = INT_MAX,
|
|
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
|
|
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
|
|
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
|
|
|
|
/* migration should happen before other stuff but after perf */
|
|
CPU_PRI_PERF = 20,
|
|
CPU_PRI_MIGRATION = 10,
|
|
/* bring up workqueues before normal notifiers and down after */
|
|
CPU_PRI_WORKQUEUE_UP = 5,
|
|
CPU_PRI_WORKQUEUE_DOWN = -5,
|
|
};
|
|
|
|
#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */
|
|
#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */
|
|
#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */
|
|
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
|
|
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
|
|
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
|
|
#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
|
|
* not handling interrupts, soon dead.
|
|
* Called on the dying cpu, interrupts
|
|
* are already disabled. Must not
|
|
* sleep, must not fail */
|
|
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
|
|
* lock is dropped */
|
|
#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
|
|
* Called on the new cpu, just before
|
|
* enabling interrupts. Must not sleep,
|
|
* must not fail */
|
|
|
|
/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
|
|
* operation in progress
|
|
*/
|
|
#define CPU_TASKS_FROZEN 0x0010
|
|
|
|
#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN)
|
|
#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN)
|
|
#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN)
|
|
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
|
|
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
|
|
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
|
|
#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
|
|
#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
/* Need to know about CPUs going up/down? */
|
|
#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
|
|
#define cpu_notifier(fn, pri) { \
|
|
static struct notifier_block fn##_nb = \
|
|
{ .notifier_call = fn, .priority = pri }; \
|
|
register_cpu_notifier(&fn##_nb); \
|
|
}
|
|
|
|
#define __cpu_notifier(fn, pri) { \
|
|
static struct notifier_block fn##_nb = \
|
|
{ .notifier_call = fn, .priority = pri }; \
|
|
__register_cpu_notifier(&fn##_nb); \
|
|
}
|
|
#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
|
|
#define cpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
extern int register_cpu_notifier(struct notifier_block *nb);
|
|
extern int __register_cpu_notifier(struct notifier_block *nb);
|
|
extern void unregister_cpu_notifier(struct notifier_block *nb);
|
|
extern void __unregister_cpu_notifier(struct notifier_block *nb);
|
|
#else
|
|
|
|
#ifndef MODULE
|
|
extern int register_cpu_notifier(struct notifier_block *nb);
|
|
extern int __register_cpu_notifier(struct notifier_block *nb);
|
|
#else
|
|
static inline int register_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int __register_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static inline void unregister_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
|
|
static inline void __unregister_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
int cpu_up(unsigned int cpu);
|
|
void notify_cpu_starting(unsigned int cpu);
|
|
extern void cpu_maps_update_begin(void);
|
|
extern void cpu_maps_update_done(void);
|
|
|
|
#define cpu_notifier_register_begin cpu_maps_update_begin
|
|
#define cpu_notifier_register_done cpu_maps_update_done
|
|
|
|
#else /* CONFIG_SMP */
|
|
|
|
#define cpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
|
|
static inline int register_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int __register_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void unregister_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
|
|
static inline void __unregister_cpu_notifier(struct notifier_block *nb)
|
|
{
|
|
}
|
|
|
|
static inline void cpu_maps_update_begin(void)
|
|
{
|
|
}
|
|
|
|
static inline void cpu_maps_update_done(void)
|
|
{
|
|
}
|
|
|
|
static inline void cpu_notifier_register_begin(void)
|
|
{
|
|
}
|
|
|
|
static inline void cpu_notifier_register_done(void)
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_SMP */
|
|
extern struct bus_type cpu_subsys;
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
/* Stop CPUs going up and down. */
|
|
|
|
extern void cpu_hotplug_begin(void);
|
|
extern void cpu_hotplug_done(void);
|
|
extern void get_online_cpus(void);
|
|
extern void put_online_cpus(void);
|
|
extern void cpu_hotplug_disable(void);
|
|
extern void cpu_hotplug_enable(void);
|
|
#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
|
|
#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
|
|
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
|
|
#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb)
|
|
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
|
|
#define __unregister_hotcpu_notifier(nb) __unregister_cpu_notifier(nb)
|
|
void clear_tasks_mm_cpumask(int cpu);
|
|
int cpu_down(unsigned int cpu);
|
|
|
|
#else /* CONFIG_HOTPLUG_CPU */
|
|
|
|
static inline void cpu_hotplug_begin(void) {}
|
|
static inline void cpu_hotplug_done(void) {}
|
|
#define get_online_cpus() do { } while (0)
|
|
#define put_online_cpus() do { } while (0)
|
|
#define cpu_hotplug_disable() do { } while (0)
|
|
#define cpu_hotplug_enable() do { } while (0)
|
|
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
|
/* These aren't inline functions due to a GCC bug. */
|
|
#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
|
|
#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
|
|
#define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
|
|
#define __unregister_hotcpu_notifier(nb) ({ (void)(nb); })
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
|
|
#ifdef CONFIG_PM_SLEEP_SMP
|
|
extern int disable_nonboot_cpus(void);
|
|
extern void enable_nonboot_cpus(void);
|
|
#else /* !CONFIG_PM_SLEEP_SMP */
|
|
static inline int disable_nonboot_cpus(void) { return 0; }
|
|
static inline void enable_nonboot_cpus(void) {}
|
|
#endif /* !CONFIG_PM_SLEEP_SMP */
|
|
|
|
enum cpuhp_state {
|
|
CPUHP_OFFLINE,
|
|
CPUHP_ONLINE,
|
|
};
|
|
|
|
void cpu_startup_entry(enum cpuhp_state state);
|
|
void cpu_idle(void);
|
|
|
|
void cpu_idle_poll_ctrl(bool enable);
|
|
|
|
void arch_cpu_idle(void);
|
|
void arch_cpu_idle_prepare(void);
|
|
void arch_cpu_idle_enter(void);
|
|
void arch_cpu_idle_exit(void);
|
|
void arch_cpu_idle_dead(void);
|
|
|
|
#endif /* _LINUX_CPU_H_ */
|