From da781e2b84b23ffade73c9998b709cf57a98dee4 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Fri, 8 Sep 2017 13:10:55 +0530 Subject: [PATCH] cgroup/cpuset: remove circular dependency deadlock Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace. Process A => kthreadd => Process B => Process C => Process A Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); mutex_lock(&cpuhotplug.lock); //held __cpu_notify(); workqueue_cpu_down_callback(); queue_work_on(system_highpri_wq); __queue_work(); insert_work(); wake_up_worker(); //pool->nr_running = 0 flush_work(); wait_for_completion(); worker_thread(); need_more_worker(); // returns true manage_workers(); maybe_create_worker(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task); kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting Process B kernfs_fop_write(); cgroup_file_write(); cgroup_tasks_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_taskset_migrate(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); mutex_lock(&cpuhotplug.lock); //waiting Eliminate this dependecy by reordering locking of cpuset_mutex and cpuhotplug.lock. Change-Id: Ifd76373d717c53b531623a3be76b7d32e0d959fd Signed-off-by: Prateek Sood --- include/linux/cpu.h | 2 ++ kernel/cpu.c | 5 +++++ kernel/cpuset.c | 33 +++++++++++++++++++-------------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index a157a69097b5..a3bcdfbef9ca 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -234,6 +234,7 @@ extern struct bus_type cpu_subsys; extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); +extern void cpu_hotplug_mutex_held(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); @@ -256,6 +257,7 @@ static inline void cpu_hotplug_done(void) {} #define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define cpu_hotplug_mutex_held() do { } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) #define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1d6b0a209bc0..5b4440d57f89 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -91,6 +91,11 @@ static struct { #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) +void cpu_hotplug_mutex_held(void) +{ + lockdep_assert_held(&cpu_hotplug.lock); +} +EXPORT_SYMBOL(cpu_hotplug_mutex_held); void get_online_cpus(void) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6bdfe995e009..1656a48d5bee 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -807,16 +807,15 @@ done: * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_unlocked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; + cpu_hotplug_mutex_held(); lockdep_assert_held(&cpuset_mutex); - get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid @@ -824,27 +823,27 @@ static void rebuild_sched_domains_locked(void) * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_unlocked(void) { } #endif /* CONFIG_SMP */ void rebuild_sched_domains(void) { + get_online_cpus(); mutex_lock(&cpuset_mutex); - rebuild_sched_domains_locked(); + rebuild_sched_domains_unlocked(); mutex_unlock(&cpuset_mutex); + put_online_cpus(); } /** @@ -876,7 +875,6 @@ static void update_tasks_cpumask(struct cpuset *cs) * * On legacy hierachy, effective_cpus will be the same with cpu_allowed. * - * Called with cpuset_mutex held */ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { @@ -931,7 +929,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) rcu_read_unlock(); if (need_rebuild_sched_domains) - rebuild_sched_domains_locked(); + rebuild_sched_domains_unlocked(); } /** @@ -1290,7 +1288,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_locked(); + rebuild_sched_domains_unlocked(); } return 0; @@ -1321,7 +1319,6 @@ static void update_tasks_flags(struct cpuset *cs) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * - * Call with cpuset_mutex held. */ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, @@ -1356,7 +1353,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_locked(); + rebuild_sched_domains_unlocked(); if (spread_flag_changed) update_tasks_flags(cs); @@ -1621,6 +1618,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0; + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; @@ -1658,6 +1656,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, } out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); return retval; } @@ -1668,6 +1667,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV; + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1682,6 +1682,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, } out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); return retval; } @@ -1720,6 +1721,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1745,6 +1747,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2050,13 +2053,14 @@ out_unlock: /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_locked(). + * will call rebuild_sched_domains_unlocked(). */ static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); + get_online_cpus(); mutex_lock(&cpuset_mutex); if (is_sched_load_balance(cs)) @@ -2066,6 +2070,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); + put_online_cpus(); } static void cpuset_css_free(struct cgroup_subsys_state *css)