From 7437cd7c4bb7a463ec2cac7c37283f9eec5c01c2 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 28 Nov 2016 13:41:18 -0800 Subject: [PATCH] sched: pre-allocate colocation groups At present, sched_set_group_id() dynamically allocates structure for colocation group to assign the given task to the group. However this can cause deadlock as memory allocator can wakeup a task which also tries to acquire related_thread_group_lock. Avoid such deadlock by pre-allocating colocation structures. This limits maximum colocation groups to static number but it's fine as it's never expected to be a lot. Change-Id: Ifc32ab4ead63c382ae390358ed86f7cc5b6eb2dc Signed-off-by: Joonwoo Park --- include/linux/sched.h | 2 + kernel/sched/core.c | 3 + kernel/sched/hmp.c | 211 +++++++++++++++++++++--------------------- kernel/sched/sched.h | 3 + 4 files changed, 116 insertions(+), 103 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f6711f31939..9c3be2d56ac5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2401,6 +2401,8 @@ struct cpu_cycle_counter_cb { u64 (*get_cpu_cycle_counter)(int cpu); }; +#define MAX_NUM_CGROUP_COLOC_ID 20 + #ifdef CONFIG_SCHED_HMP extern void free_task_load_ptrs(struct task_struct *p); extern int sched_set_window(u64 window_start, unsigned int window_size); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f60117eb60fa..fd31af17836d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8071,6 +8071,9 @@ void __init sched_init(void) atomic_set(&rq->nr_iowait, 0); } + i = alloc_related_thread_groups(); + BUG_ON(i); + set_hmp_defaults(); set_load_weight(&init_task); diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 968a41e0e81e..fba4572a542f 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -784,11 +784,12 @@ __read_mostly unsigned int sched_major_task_runtime = 10000000; static unsigned int sync_cpu; -static LIST_HEAD(related_thread_groups); +struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; +static LIST_HEAD(active_related_thread_groups); static DEFINE_RWLOCK(related_thread_group_lock); #define for_each_related_thread_group(grp) \ - list_for_each_entry(grp, &related_thread_groups, list) + list_for_each_entry(grp, &active_related_thread_groups, list) /* * Task load is categorized into buckets for the purpose of top task tracking. @@ -3052,7 +3053,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) read_unlock(&tasklist_lock); - list_for_each_entry(grp, &related_thread_groups, list) { + list_for_each_entry(grp, &active_related_thread_groups, list) { int j; for_each_possible_cpu(j) { @@ -3965,47 +3966,54 @@ _group_cpu_time(struct related_thread_group *grp, int cpu) return grp ? per_cpu_ptr(grp->cpu_time, cpu) : NULL; } -struct related_thread_group *alloc_related_thread_group(int group_id) +static inline struct related_thread_group* +lookup_related_thread_group(unsigned int group_id) { - struct related_thread_group *grp; - - grp = kzalloc(sizeof(*grp), GFP_ATOMIC); - if (!grp) - return ERR_PTR(-ENOMEM); - - if (alloc_group_cputime(grp)) { - kfree(grp); - return ERR_PTR(-ENOMEM); - } - - grp->id = group_id; - INIT_LIST_HEAD(&grp->tasks); - INIT_LIST_HEAD(&grp->list); - raw_spin_lock_init(&grp->lock); - - return grp; + return related_thread_groups[group_id]; } -struct related_thread_group *lookup_related_thread_group(unsigned int group_id) +int alloc_related_thread_groups(void) { + int i, ret; struct related_thread_group *grp; - list_for_each_entry(grp, &related_thread_groups, list) { - if (grp->id == group_id) - return grp; + /* groupd_id = 0 is invalid as it's special id to remove group. */ + for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { + grp = kzalloc(sizeof(*grp), GFP_NOWAIT); + if (!grp) { + ret = -ENOMEM; + goto err; + } + + if (alloc_group_cputime(grp)) { + kfree(grp); + ret = -ENOMEM; + goto err; + } + + grp->id = i; + INIT_LIST_HEAD(&grp->tasks); + INIT_LIST_HEAD(&grp->list); + raw_spin_lock_init(&grp->lock); + + related_thread_groups[i] = grp; } - return NULL; -} + return 0; -/* See comments before preferred_cluster() */ -static void free_related_thread_group(struct rcu_head *rcu) -{ - struct related_thread_group *grp = container_of(rcu, struct - related_thread_group, rcu); +err: + for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { + grp = lookup_related_thread_group(i); + if (grp) { + free_group_cputime(grp); + kfree(grp); + related_thread_groups[i] = NULL; + } else { + break; + } + } - free_group_cputime(grp); - kfree(grp); + return ret; } static void remove_task_from_group(struct task_struct *p) @@ -4030,10 +4038,12 @@ static void remove_task_from_group(struct task_struct *p) raw_spin_unlock(&grp->lock); /* Reserved groups cannot be destroyed */ - if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) { - list_del(&grp->list); - call_rcu(&grp->rcu, free_related_thread_group); - } + if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) + /* + * We test whether grp->list is attached with list_empty() + * hence re-init the list after deletion. + */ + list_del_init(&grp->list); } static int @@ -4105,6 +4115,63 @@ void add_new_task_to_grp(struct task_struct *new) write_unlock_irqrestore(&related_thread_group_lock, flags); } +static int __sched_set_group_id(struct task_struct *p, unsigned int group_id) +{ + int rc = 0; + unsigned long flags; + struct related_thread_group *grp = NULL; + + if (group_id >= MAX_NUM_CGROUP_COLOC_ID) + return -EINVAL; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + write_lock(&related_thread_group_lock); + + /* Switching from one group to another directly is not permitted */ + if ((current != p && p->flags & PF_EXITING) || + (!p->grp && !group_id) || + (p->grp && group_id)) + goto done; + + if (!group_id) { + remove_task_from_group(p); + goto done; + } + + grp = lookup_related_thread_group(group_id); + if (list_empty(&grp->list)) + list_add(&grp->list, &active_related_thread_groups); + + rc = add_task_to_group(p, grp); +done: + write_unlock(&related_thread_group_lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + return rc; +} + +int sched_set_group_id(struct task_struct *p, unsigned int group_id) +{ + /* DEFAULT_CGROUP_COLOC_ID is a reserved id */ + if (group_id == DEFAULT_CGROUP_COLOC_ID) + return -EINVAL; + + return __sched_set_group_id(p, group_id); +} + +unsigned int sched_get_group_id(struct task_struct *p) +{ + unsigned int group_id; + struct related_thread_group *grp; + + rcu_read_lock(); + grp = task_related_thread_group(p); + group_id = grp ? grp->id : 0; + rcu_read_unlock(); + + return group_id; +} + #if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE) /* * We create a default colocation group at boot. There is no need to @@ -4123,14 +4190,9 @@ static int __init create_default_coloc_group(void) struct related_thread_group *grp = NULL; unsigned long flags; - grp = alloc_related_thread_group(DEFAULT_CGROUP_COLOC_ID); - if (IS_ERR(grp)) { - WARN_ON(1); - return -ENOMEM; - } - + grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID); write_lock_irqsave(&related_thread_group_lock, flags); - list_add(&grp->list, &related_thread_groups); + list_add(&grp->list, &active_related_thread_groups); write_unlock_irqrestore(&related_thread_group_lock, flags); update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH); @@ -4142,67 +4204,10 @@ int sync_cgroup_colocation(struct task_struct *p, bool insert) { unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0; - return sched_set_group_id(p, grp_id); + return __sched_set_group_id(p, grp_id); } #endif -int sched_set_group_id(struct task_struct *p, unsigned int group_id) -{ - int rc = 0; - unsigned long flags; - struct related_thread_group *grp = NULL; - - raw_spin_lock_irqsave(&p->pi_lock, flags); - write_lock(&related_thread_group_lock); - - /* Switching from one group to another directly is not permitted */ - if ((current != p && p->flags & PF_EXITING) || - (!p->grp && !group_id) || - (p->grp && group_id)) - goto done; - - if (!group_id) { - remove_task_from_group(p); - goto done; - } - - grp = lookup_related_thread_group(group_id); - if (!grp) { - /* This is a reserved id */ - if (group_id == DEFAULT_CGROUP_COLOC_ID) { - rc = -EINVAL; - goto done; - } - - grp = alloc_related_thread_group(group_id); - if (IS_ERR(grp)) { - rc = -ENOMEM; - goto done; - } - - list_add(&grp->list, &related_thread_groups); - } - - rc = add_task_to_group(p, grp); -done: - write_unlock(&related_thread_group_lock); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - return rc; -} - -unsigned int sched_get_group_id(struct task_struct *p) -{ - unsigned int group_id; - struct related_thread_group *grp; - - rcu_read_lock(); - grp = task_related_thread_group(p); - group_id = grp ? grp->id : 0; - rcu_read_unlock(); - - return group_id; -} - static void update_cpu_cluster_capacity(const cpumask_t *cpus) { int i; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 30838bb9b442..f569c6fe3cbb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1448,6 +1448,8 @@ static inline void update_cgroup_boost_settings(void) { } static inline void restore_cgroup_boost_settings(void) { } #endif +extern int alloc_related_thread_groups(void); + #else /* CONFIG_SCHED_HMP */ struct hmp_sched_stats; @@ -1638,6 +1640,7 @@ static inline void set_hmp_defaults(void) { } static inline void clear_reserved(int cpu) { } static inline void sched_boost_parse_dt(void) {} +static inline int alloc_related_thread_groups(void) { return 0; } #define trace_sched_cpu_load(...) #define trace_sched_cpu_load_lb(...)