From 92cc35433ddd0cba7a03fc19e88f8c854ee02055 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 9 May 2017 12:30:56 +0800 Subject: [PATCH 001/176] ANDROID: sdcardfs: use mount_nodev and fix a issue in sdcardfs_kill_sb Use the VFS mount_nodev instead of customized mount_nodev_with_options and fix generic_shutdown_super to kill_anon_super because of set_anon_super Signed-off-by: Gao Xiang Change-Id: Ibe46647aa2ce49d79291aa9d0295e9625cfccd80 --- fs/sdcardfs/main.c | 47 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 3c5b51d49d21..80825b287836 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -364,41 +364,34 @@ out: return err; } -/* A feature which supports mount_nodev() with options */ -static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, - struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct vfsmount *, struct super_block *, - const char *, void *, int)) +struct sdcardfs_mount_private { + struct vfsmount *mnt; + const char *dev_name; + void *raw_data; +}; +static int __sdcardfs_fill_super( + struct super_block *sb, + void *_priv, int silent) { - int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); + struct sdcardfs_mount_private *priv = _priv; - if (IS_ERR(s)) - return ERR_CAST(s); - - s->s_flags = flags; - - error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return dget(s->s_root); + return sdcardfs_read_super(priv->mnt, + sb, priv->dev_name, priv->raw_data, silent); } static struct dentry *sdcardfs_mount(struct vfsmount *mnt, struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - /* - * dev_name is a lower_path_name, - * raw_data is a option string. - */ - return mount_nodev_with_options(mnt, fs_type, flags, dev_name, - raw_data, sdcardfs_read_super); + struct sdcardfs_mount_private priv = { + .mnt = mnt, + .dev_name = dev_name, + .raw_data = raw_data + }; + + return mount_nodev(fs_type, flags, + &priv, __sdcardfs_fill_super); } static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, @@ -423,7 +416,7 @@ void sdcardfs_kill_sb(struct super_block *sb) list_del(&sbi->list); mutex_unlock(&sdcardfs_super_list_lock); } - generic_shutdown_super(sb); + kill_anon_super(sb); } static struct file_system_type sdcardfs_fs_type = { From 97841e574afb85dca93016347efee005d8150efc Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Tue, 11 Jul 2017 10:06:37 -0700 Subject: [PATCH 002/176] ANDROID: android-base.cfg: remove CONFIG_CGROUP_DEBUG This config option is not required by Android. Bug: 63578267 Change-Id: I163fa19183734a1a343d525e885a000a495c242e Signed-off-by: Steve Muckle --- android/configs/android-base.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index d675e712fe8c..28dce6c0516d 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -17,7 +17,6 @@ CONFIG_AUDIT=y CONFIG_BLK_DEV_INITRD=y CONFIG_CGROUPS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_DEFAULT_SECURITY_SELINUX=y From d368c6faa19ba98d26e713d3bf258cc6466d8ac2 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Thu, 1 Jun 2017 10:59:12 -0700 Subject: [PATCH 003/176] sched: walt: fix window misalignment when HZ=300 Due to rounding error hrtimer tick interval becomes 3333333 ns when HZ=300. Consequently the tick time stamp nearest to the WALT's default window size 20ms will be also 19999998 (3333333 * 6). Change-Id: I08f9bd2dbecccbb683e4490d06d8b0da703d3ab2 Suggested-by: Joel Fernandes Signed-off-by: Joonwoo Park --- kernel/sched/walt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 6e053bd9830c..92c3aae8e056 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -72,7 +72,15 @@ static cpumask_t mpc_mask = CPU_MASK_ALL; __read_mostly unsigned int walt_ravg_window = 20000000; /* Min window size (in ns) = 10ms */ +#ifdef CONFIG_HZ_300 +/* + * Tick interval becomes to 3333333 due to + * rounding error when HZ=300. + */ +#define MIN_SCHED_RAVG_WINDOW (3333333 * 6) +#else #define MIN_SCHED_RAVG_WINDOW 10000000 +#endif /* Max window size (in ns) = 1s */ #define MAX_SCHED_RAVG_WINDOW 1000000000 From ec49bb00cd72c190005a850a5832d2cab9aeb5ae Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 24 Mar 2017 15:53:53 -0700 Subject: [PATCH 004/176] Revert "android: binder: move global binder state into context struct." This reverts commit d6bbb3276728e4a13c832c55f948a40a27bc4a33. Signed-off-by: Todd Kjos Change-Id: Ib507d62803f2beba7178c3f6f3f78bd1095b25b8 --- drivers/android/binder.c | 392 +++++++++++++-------------------------- 1 file changed, 133 insertions(+), 259 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9cf4f9bbc711..08cde76875d2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -18,7 +18,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -47,11 +46,19 @@ #include #include "binder_trace.h" +static DEFINE_MUTEX(binder_main_lock); +static DEFINE_MUTEX(binder_deferred_lock); +static DEFINE_MUTEX(binder_mmap_lock); + static HLIST_HEAD(binder_devices); +static HLIST_HEAD(binder_procs); +static HLIST_HEAD(binder_deferred_list); +static HLIST_HEAD(binder_dead_nodes); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; -atomic_t binder_last_id; +static int binder_last_id; +static struct workqueue_struct *binder_deferred_workqueue; #define BINDER_DEBUG_ENTRY(name) \ static int binder_##name##_open(struct inode *inode, struct file *file) \ @@ -166,24 +173,20 @@ enum binder_stat_types { struct binder_stats { int br[_IOC_NR(BR_FAILED_REPLY) + 1]; int bc[_IOC_NR(BC_REPLY_SG) + 1]; + int obj_created[BINDER_STAT_COUNT]; + int obj_deleted[BINDER_STAT_COUNT]; }; -/* These are still global, since it's not always easy to get the context */ -struct binder_obj_stats { - atomic_t obj_created[BINDER_STAT_COUNT]; - atomic_t obj_deleted[BINDER_STAT_COUNT]; -}; - -static struct binder_obj_stats binder_obj_stats; +static struct binder_stats binder_stats; static inline void binder_stats_deleted(enum binder_stat_types type) { - atomic_inc(&binder_obj_stats.obj_deleted[type]); + binder_stats.obj_deleted[type]++; } static inline void binder_stats_created(enum binder_stat_types type) { - atomic_inc(&binder_obj_stats.obj_created[type]); + binder_stats.obj_created[type]++; } struct binder_transaction_log_entry { @@ -204,6 +207,8 @@ struct binder_transaction_log { int full; struct binder_transaction_log_entry entry[32]; }; +static struct binder_transaction_log binder_transaction_log; +static struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) @@ -224,21 +229,6 @@ struct binder_context { struct binder_node *binder_context_mgr_node; kuid_t binder_context_mgr_uid; const char *name; - - struct mutex binder_main_lock; - struct mutex binder_deferred_lock; - struct mutex binder_mmap_lock; - - struct hlist_head binder_procs; - struct hlist_head binder_dead_nodes; - struct hlist_head binder_deferred_list; - - struct work_struct deferred_work; - struct workqueue_struct *binder_deferred_workqueue; - struct binder_transaction_log transaction_log; - struct binder_transaction_log transaction_log_failed; - - struct binder_stats binder_stats; }; struct binder_device { @@ -461,18 +451,17 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) return retval; } -static inline void binder_lock(struct binder_context *context, const char *tag) +static inline void binder_lock(const char *tag) { trace_binder_lock(tag); - mutex_lock(&context->binder_main_lock); + mutex_lock(&binder_main_lock); trace_binder_locked(tag); } -static inline void binder_unlock(struct binder_context *context, - const char *tag) +static inline void binder_unlock(const char *tag) { trace_binder_unlock(tag); - mutex_unlock(&context->binder_main_lock); + mutex_unlock(&binder_main_lock); } static void binder_set_nice(long nice) @@ -957,7 +946,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, binder_stats_created(BINDER_STAT_NODE); rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); - node->debug_id = atomic_inc_return(&binder_last_id); + node->debug_id = ++binder_last_id; node->proc = proc; node->ptr = ptr; node->cookie = cookie; @@ -1099,7 +1088,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, if (new_ref == NULL) return NULL; binder_stats_created(BINDER_STAT_REF); - new_ref->debug_id = atomic_inc_return(&binder_last_id); + new_ref->debug_id = ++binder_last_id; new_ref->proc = proc; new_ref->node = node; rb_link_node(&new_ref->rb_node_node, parent, p); @@ -1859,7 +1848,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; - e = binder_transaction_log_add(&context->transaction_log); + e = binder_transaction_log_add(&binder_transaction_log); e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); e->from_proc = proc->pid; e->from_thread = thread->pid; @@ -1981,7 +1970,7 @@ static void binder_transaction(struct binder_proc *proc, } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); - t->debug_id = atomic_inc_return(&binder_last_id); + t->debug_id = ++binder_last_id; e->debug_id = t->debug_id; if (reply) @@ -2245,8 +2234,7 @@ err_no_context_mgr_node: { struct binder_transaction_log_entry *fe; - fe = binder_transaction_log_add( - &context->transaction_log_failed); + fe = binder_transaction_log_add(&binder_transaction_log_failed); *fe = *e; } @@ -2274,8 +2262,8 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(uint32_t); trace_binder_command(cmd); - if (_IOC_NR(cmd) < ARRAY_SIZE(context->binder_stats.bc)) { - context->binder_stats.bc[_IOC_NR(cmd)]++; + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { + binder_stats.bc[_IOC_NR(cmd)]++; proc->stats.bc[_IOC_NR(cmd)]++; thread->stats.bc[_IOC_NR(cmd)]++; } @@ -2640,8 +2628,8 @@ static void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread, uint32_t cmd) { trace_binder_return(cmd); - if (_IOC_NR(cmd) < ARRAY_SIZE(proc->stats.br)) { - proc->context->binder_stats.br[_IOC_NR(cmd)]++; + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { + binder_stats.br[_IOC_NR(cmd)]++; proc->stats.br[_IOC_NR(cmd)]++; thread->stats.br[_IOC_NR(cmd)]++; } @@ -2705,7 +2693,7 @@ retry: if (wait_for_proc_work) proc->ready_threads++; - binder_unlock(proc->context, __func__); + binder_unlock(__func__); trace_binder_wait_for_work(wait_for_proc_work, !!thread->transaction_stack, @@ -2732,7 +2720,7 @@ retry: ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } - binder_lock(proc->context, __func__); + binder_lock(__func__); if (wait_for_proc_work) proc->ready_threads--; @@ -3119,14 +3107,14 @@ static unsigned int binder_poll(struct file *filp, struct binder_thread *thread = NULL; int wait_for_proc_work; - binder_lock(proc->context, __func__); + binder_lock(__func__); thread = binder_get_thread(proc); wait_for_proc_work = thread->transaction_stack == NULL && list_empty(&thread->todo) && thread->return_error == BR_OK; - binder_unlock(proc->context, __func__); + binder_unlock(__func__); if (wait_for_proc_work) { if (binder_has_proc_work(proc, thread)) @@ -3253,7 +3241,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; struct binder_proc *proc = filp->private_data; - struct binder_context *context = proc->context; struct binder_thread *thread; unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; @@ -3267,7 +3254,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) goto err_unlocked; - binder_lock(context, __func__); + binder_lock(__func__); thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; @@ -3319,7 +3306,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err: if (thread) thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN; - binder_unlock(context, __func__); + binder_unlock(__func__); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); @@ -3391,7 +3378,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; - mutex_lock(&proc->context->binder_mmap_lock); + mutex_lock(&binder_mmap_lock); if (proc->buffer) { ret = -EBUSY; failure_string = "already mapped"; @@ -3406,7 +3393,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } proc->buffer = area->addr; proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; - mutex_unlock(&proc->context->binder_mmap_lock); + mutex_unlock(&binder_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { @@ -3451,12 +3438,12 @@ err_alloc_small_buf_failed: kfree(proc->pages); proc->pages = NULL; err_alloc_pages_failed: - mutex_lock(&proc->context->binder_mmap_lock); + mutex_lock(&binder_mmap_lock); vfree(proc->buffer); proc->buffer = NULL; err_get_vm_area_failed: err_already_mapped: - mutex_unlock(&proc->context->binder_mmap_lock); + mutex_unlock(&binder_mmap_lock); err_bad_arg: pr_err("binder_mmap: %d %lx-%lx %s failed %d\n", proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); @@ -3483,15 +3470,15 @@ static int binder_open(struct inode *nodp, struct file *filp) miscdev); proc->context = &binder_dev->context; - binder_lock(proc->context, __func__); + binder_lock(__func__); binder_stats_created(BINDER_STAT_PROC); - hlist_add_head(&proc->proc_node, &proc->context->binder_procs); + hlist_add_head(&proc->proc_node, &binder_procs); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); filp->private_data = proc; - binder_unlock(proc->context, __func__); + binder_unlock(__func__); if (binder_debugfs_dir_entry_proc) { char strbuf[11]; @@ -3556,7 +3543,6 @@ static int binder_release(struct inode *nodp, struct file *filp) static int binder_node_release(struct binder_node *node, int refs) { struct binder_ref *ref; - struct binder_context *context = node->proc->context; int death = 0; list_del_init(&node->work.entry); @@ -3572,7 +3558,7 @@ static int binder_node_release(struct binder_node *node, int refs) node->proc = NULL; node->local_strong_refs = 0; node->local_weak_refs = 0; - hlist_add_head(&node->dead_node, &context->binder_dead_nodes); + hlist_add_head(&node->dead_node, &binder_dead_nodes); hlist_for_each_entry(ref, &node->refs, node_entry) { refs++; @@ -3637,8 +3623,7 @@ static void binder_deferred_release(struct binder_proc *proc) node = rb_entry(n, struct binder_node, rb_node); nodes++; rb_erase(&node->rb_node, &proc->nodes); - incoming_refs = binder_node_release(node, - incoming_refs); + incoming_refs = binder_node_release(node, incoming_refs); } outgoing_refs = 0; @@ -3710,16 +3695,14 @@ static void binder_deferred_func(struct work_struct *work) { struct binder_proc *proc; struct files_struct *files; - struct binder_context *context = - container_of(work, struct binder_context, deferred_work); int defer; do { - binder_lock(context, __func__); - mutex_lock(&context->binder_deferred_lock); - if (!hlist_empty(&context->binder_deferred_list)) { - proc = hlist_entry(context->binder_deferred_list.first, + binder_lock(__func__); + mutex_lock(&binder_deferred_lock); + if (!hlist_empty(&binder_deferred_list)) { + proc = hlist_entry(binder_deferred_list.first, struct binder_proc, deferred_work_node); hlist_del_init(&proc->deferred_work_node); defer = proc->deferred_work; @@ -3728,7 +3711,7 @@ static void binder_deferred_func(struct work_struct *work) proc = NULL; defer = 0; } - mutex_unlock(&context->binder_deferred_lock); + mutex_unlock(&binder_deferred_lock); files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { @@ -3743,24 +3726,24 @@ static void binder_deferred_func(struct work_struct *work) if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ - binder_unlock(context, __func__); + binder_unlock(__func__); if (files) put_files_struct(files); } while (proc); } +static DECLARE_WORK(binder_deferred_work, binder_deferred_func); static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) { - mutex_lock(&proc->context->binder_deferred_lock); + mutex_lock(&binder_deferred_lock); proc->deferred_work |= defer; if (hlist_unhashed(&proc->deferred_work_node)) { hlist_add_head(&proc->deferred_work_node, - &proc->context->binder_deferred_list); - queue_work(proc->context->binder_deferred_workqueue, - &proc->context->deferred_work); + &binder_deferred_list); + queue_work(binder_deferred_workqueue, &binder_deferred_work); } - mutex_unlock(&proc->context->binder_deferred_lock); + mutex_unlock(&binder_deferred_lock); } static void print_binder_transaction(struct seq_file *m, const char *prefix, @@ -3991,20 +3974,8 @@ static const char * const binder_objstat_strings[] = { "transaction_complete" }; -static void add_binder_stats(struct binder_stats *from, struct binder_stats *to) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(to->bc); i++) - to->bc[i] += from->bc[i]; - - for (i = 0; i < ARRAY_SIZE(to->br); i++) - to->br[i] += from->br[i]; -} - static void print_binder_stats(struct seq_file *m, const char *prefix, - struct binder_stats *stats, - struct binder_obj_stats *obj_stats) + struct binder_stats *stats) { int i; @@ -4024,21 +3995,16 @@ static void print_binder_stats(struct seq_file *m, const char *prefix, binder_return_strings[i], stats->br[i]); } - if (!obj_stats) - return; - - BUILD_BUG_ON(ARRAY_SIZE(obj_stats->obj_created) != + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != ARRAY_SIZE(binder_objstat_strings)); - BUILD_BUG_ON(ARRAY_SIZE(obj_stats->obj_created) != - ARRAY_SIZE(obj_stats->obj_deleted)); - for (i = 0; i < ARRAY_SIZE(obj_stats->obj_created); i++) { - int obj_created = atomic_read(&obj_stats->obj_created[i]); - int obj_deleted = atomic_read(&obj_stats->obj_deleted[i]); - - if (obj_created || obj_deleted) + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != + ARRAY_SIZE(stats->obj_deleted)); + for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { + if (stats->obj_created[i] || stats->obj_deleted[i]) seq_printf(m, "%s%s: active %d total %d\n", prefix, - binder_objstat_strings[i], - obj_created - obj_deleted, obj_created); + binder_objstat_strings[i], + stats->obj_created[i] - stats->obj_deleted[i], + stats->obj_created[i]); } } @@ -4093,131 +4059,85 @@ static void print_binder_proc_stats(struct seq_file *m, } seq_printf(m, " pending transactions: %d\n", count); - print_binder_stats(m, " ", &proc->stats, NULL); + print_binder_stats(m, " ", &proc->stats); } static int binder_state_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; struct binder_node *node; int do_lock = !binder_debug_no_lock; - bool wrote_dead_nodes_header = false; + + if (do_lock) + binder_lock(__func__); seq_puts(m, "binder state:\n"); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - if (!wrote_dead_nodes_header && - !hlist_empty(&context->binder_dead_nodes)) { - seq_puts(m, "dead nodes:\n"); - wrote_dead_nodes_header = true; - } - hlist_for_each_entry(node, &context->binder_dead_nodes, - dead_node) - print_binder_node(m, node); + if (!hlist_empty(&binder_dead_nodes)) + seq_puts(m, "dead nodes:\n"); + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) + print_binder_node(m, node); - if (do_lock) - binder_unlock(context, __func__); - } - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc(m, proc, 1); - if (do_lock) - binder_unlock(context, __func__); - } + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, 1); + if (do_lock) + binder_unlock(__func__); return 0; } static int binder_stats_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; - struct binder_stats total_binder_stats; int do_lock = !binder_debug_no_lock; - memset(&total_binder_stats, 0, sizeof(struct binder_stats)); - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); - - add_binder_stats(&context->binder_stats, &total_binder_stats); - - if (do_lock) - binder_unlock(context, __func__); - } + if (do_lock) + binder_lock(__func__); seq_puts(m, "binder stats:\n"); - print_binder_stats(m, "", &total_binder_stats, &binder_obj_stats); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); + print_binder_stats(m, "", &binder_stats); - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc_stats(m, proc); - if (do_lock) - binder_unlock(context, __func__); - } + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc_stats(m, proc); + if (do_lock) + binder_unlock(__func__); return 0; } static int binder_transactions_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *proc; int do_lock = !binder_debug_no_lock; - seq_puts(m, "binder transactions:\n"); - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); + if (do_lock) + binder_lock(__func__); - hlist_for_each_entry(proc, &context->binder_procs, proc_node) - print_binder_proc(m, proc, 0); - if (do_lock) - binder_unlock(context, __func__); - } + seq_puts(m, "binder transactions:\n"); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, 0); + if (do_lock) + binder_unlock(__func__); return 0; } static int binder_proc_show(struct seq_file *m, void *unused) { - struct binder_device *device; - struct binder_context *context; struct binder_proc *itr; int pid = (unsigned long)m->private; int do_lock = !binder_debug_no_lock; - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - if (do_lock) - binder_lock(context, __func__); + if (do_lock) + binder_lock(__func__); - hlist_for_each_entry(itr, &context->binder_procs, proc_node) { - if (itr->pid == pid) { - seq_puts(m, "binder proc state:\n"); - print_binder_proc(m, itr, 1); - } + hlist_for_each_entry(itr, &binder_procs, proc_node) { + if (itr->pid == pid) { + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, itr, 1); } - if (do_lock) - binder_unlock(context, __func__); } + if (do_lock) + binder_unlock(__func__); return 0; } @@ -4232,10 +4152,11 @@ static void print_binder_transaction_log_entry(struct seq_file *m, e->to_node, e->target_handle, e->data_size, e->offsets_size); } -static int print_binder_transaction_log(struct seq_file *m, - struct binder_transaction_log *log) +static int binder_transaction_log_show(struct seq_file *m, void *unused) { + struct binder_transaction_log *log = m->private; int i; + if (log->full) { for (i = log->next; i < ARRAY_SIZE(log->entry); i++) print_binder_transaction_log_entry(m, &log->entry[i]); @@ -4245,31 +4166,6 @@ static int print_binder_transaction_log(struct seq_file *m, return 0; } -static int binder_transaction_log_show(struct seq_file *m, void *unused) -{ - struct binder_device *device; - struct binder_context *context; - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - print_binder_transaction_log(m, &context->transaction_log); - } - return 0; -} - -static int binder_failed_transaction_log_show(struct seq_file *m, void *unused) -{ - struct binder_device *device; - struct binder_context *context; - - hlist_for_each_entry(device, &binder_devices, hlist) { - context = &device->context; - print_binder_transaction_log(m, - &context->transaction_log_failed); - } - return 0; -} - static const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, @@ -4285,20 +4181,11 @@ BINDER_DEBUG_ENTRY(state); BINDER_DEBUG_ENTRY(stats); BINDER_DEBUG_ENTRY(transactions); BINDER_DEBUG_ENTRY(transaction_log); -BINDER_DEBUG_ENTRY(failed_transaction_log); - -static void __init free_binder_device(struct binder_device *device) -{ - if (device->context.binder_deferred_workqueue) - destroy_workqueue(device->context.binder_deferred_workqueue); - kfree(device); -} static int __init init_binder_device(const char *name) { int ret; struct binder_device *binder_device; - struct binder_context *context; binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); if (!binder_device) @@ -4308,65 +4195,31 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; - context = &binder_device->context; - context->binder_context_mgr_uid = INVALID_UID; - context->name = name; - - mutex_init(&context->binder_main_lock); - mutex_init(&context->binder_deferred_lock); - mutex_init(&context->binder_mmap_lock); - - context->binder_deferred_workqueue = - create_singlethread_workqueue(name); - - if (!context->binder_deferred_workqueue) { - ret = -ENOMEM; - goto err_create_singlethread_workqueue_failed; - } - - INIT_HLIST_HEAD(&context->binder_procs); - INIT_HLIST_HEAD(&context->binder_dead_nodes); - INIT_HLIST_HEAD(&context->binder_deferred_list); - INIT_WORK(&context->deferred_work, binder_deferred_func); + binder_device->context.binder_context_mgr_uid = INVALID_UID; + binder_device->context.name = name; ret = misc_register(&binder_device->miscdev); if (ret < 0) { - goto err_misc_register_failed; + kfree(binder_device); + return ret; } hlist_add_head(&binder_device->hlist, &binder_devices); - return ret; - -err_create_singlethread_workqueue_failed: -err_misc_register_failed: - free_binder_device(binder_device); return ret; } static int __init binder_init(void) { - int ret = 0; + int ret; char *device_name, *device_names; struct binder_device *device; struct hlist_node *tmp; - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); - if (!device_names) + binder_deferred_workqueue = create_singlethread_workqueue("binder"); + if (!binder_deferred_workqueue) return -ENOMEM; - strcpy(device_names, binder_devices_param); - - while ((device_name = strsep(&device_names, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; - } - binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", @@ -4391,13 +4244,30 @@ static int __init binder_init(void) debugfs_create_file("transaction_log", S_IRUGO, binder_debugfs_dir_entry_root, - NULL, + &binder_transaction_log, &binder_transaction_log_fops); debugfs_create_file("failed_transaction_log", S_IRUGO, binder_debugfs_dir_entry_root, - NULL, - &binder_failed_transaction_log_fops); + &binder_transaction_log_failed, + &binder_transaction_log_fops); + } + + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } + strcpy(device_names, binder_devices_param); + + while ((device_name = strsep(&device_names, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; } return ret; @@ -4406,8 +4276,12 @@ err_init_binder_device_failed: hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { misc_deregister(&device->miscdev); hlist_del(&device->hlist); - free_binder_device(device); + kfree(device); } +err_alloc_device_names_failed: + debugfs_remove_recursive(binder_debugfs_dir_entry_root); + + destroy_workqueue(binder_deferred_workqueue); return ret; } From 0cebb407b2f431d748d9cc85cf7e4232a9223342 Mon Sep 17 00:00:00 2001 From: Riley Andrews Date: Tue, 1 Sep 2015 12:42:07 -0700 Subject: [PATCH 005/176] FROMLIST: binder: Use wake up hint for synchronous transactions. (from https://patchwork.kernel.org/patch/9817747) Use wake_up_interruptible_sync() to hint to the scheduler binder transactions are synchronous wakeups. Disable preemption while waking to avoid ping-ponging on the binder lock. Change-Id: Ic406a232d0873662f80148e37acefe5243d912a0 Signed-off-by: Todd Kjos Git-commit: 443c026e90820170aa3db2c21d2933ae5922f900 Git-repo: https://android.googlesource.com/kernel/msm Signed-off-by: Omprakash Dhyade --- drivers/android/binder.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 08cde76875d2..211262071198 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2201,8 +2201,12 @@ static void binder_transaction(struct binder_proc *proc, list_add_tail(&t->work.entry, target_list); tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; list_add_tail(&tcomplete->entry, &thread->todo); - if (target_wait) - wake_up_interruptible(target_wait); + if (target_wait) { + if (reply || !(t->flags & TF_ONE_WAY)) + wake_up_interruptible_sync(target_wait); + else + wake_up_interruptible(target_wait); + } return; err_translate_failed: From 19a3948b3c02642a29df6339212a40424e24a570 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 10 Oct 2016 10:39:59 -0700 Subject: [PATCH 006/176] FROMLIST: binder: separate binder allocator structure from binder proc (from https://patchwork.kernel.org/patch/9817745/) The binder allocator is logically separate from the rest of the binder drivers. Separating the data structures to prepare for splitting into separate file with separate locking. Change-Id: I5ca4df567ac4b8c8d6ee2116ae67c0c1d75c9747 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 216 ++++++++++++++++++++------------- drivers/android/binder_trace.h | 2 +- 2 files changed, 132 insertions(+), 86 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 211262071198..734edbd67410 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -319,6 +319,41 @@ enum binder_deferred_state { BINDER_DEFERRED_RELEASE = 0x04, }; +/** + * struct binder_alloc - per-binder proc state for binder allocator + * @vma: vm_area_struct passed to mmap_handler + * (invarient after mmap) + * @vma_vm_mm: copy of vma->vm_mm (invarient after mmap) + * @buffer: base of per-proc address space mapped via mmap + * @user_buffer_offset: offset between user and kernel VAs for buffer + * @buffers: list of all buffers for this proc + * @free_buffers: rb tree of buffers available for allocation + * sorted by size + * @allocated_buffers: rb tree of allocated buffers sorted by address + * @free_async_space: VA space available for async buffers. This is + * initialized at mmap time to 1/2 the full VA space + * @pages: array of physical page addresses for each page of + * mmap'd space + * @buffer_size: size of address space (could be less than requested) + * + * Bookkeeping structure for per-proc address space management for binder + * buffers. It is normally initialized during binder_init() and binder_mmap() + * calls. The address space is used for both user-visible buffers and for + * struct binder_buffer objects used to track the user buffers + */ +struct binder_alloc { + struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; + void *buffer; + ptrdiff_t user_buffer_offset; + struct list_head buffers; + struct rb_root free_buffers; + struct rb_root allocated_buffers; + size_t free_async_space; + struct page **pages; + size_t buffer_size; +}; + struct binder_proc { struct hlist_node proc_node; struct rb_root threads; @@ -326,23 +361,11 @@ struct binder_proc { struct rb_root refs_by_desc; struct rb_root refs_by_node; int pid; - struct vm_area_struct *vma; - struct mm_struct *vma_vm_mm; struct task_struct *tsk; struct files_struct *files; struct hlist_node deferred_work_node; int deferred_work; - void *buffer; - ptrdiff_t user_buffer_offset; - struct list_head buffers; - struct rb_root free_buffers; - struct rb_root allocated_buffers; - size_t free_async_space; - - struct page **pages; - size_t buffer_size; - uint32_t buffer_free; struct list_head todo; wait_queue_head_t wait; struct binder_stats stats; @@ -353,6 +376,7 @@ struct binder_proc { int ready_threads; long default_priority; struct dentry *debugfs_entry; + struct binder_alloc alloc; struct binder_context *context; }; @@ -485,8 +509,10 @@ static void binder_set_nice(long nice) static size_t binder_buffer_size(struct binder_proc *proc, struct binder_buffer *buffer) { - if (list_is_last(&buffer->entry, &proc->buffers)) - return proc->buffer + proc->buffer_size - (void *)buffer->data; + if (list_is_last(&buffer->entry, &proc->alloc.buffers)) + return proc->alloc.buffer + + proc->alloc.buffer_size - + (void *)buffer->data; return (size_t)list_entry(buffer->entry.next, struct binder_buffer, entry) - (size_t)buffer->data; } @@ -494,7 +520,7 @@ static size_t binder_buffer_size(struct binder_proc *proc, static void binder_insert_free_buffer(struct binder_proc *proc, struct binder_buffer *new_buffer) { - struct rb_node **p = &proc->free_buffers.rb_node; + struct rb_node **p = &proc->alloc.free_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; size_t buffer_size; @@ -521,13 +547,13 @@ static void binder_insert_free_buffer(struct binder_proc *proc, p = &parent->rb_right; } rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->free_buffers); + rb_insert_color(&new_buffer->rb_node, &proc->alloc.free_buffers); } static void binder_insert_allocated_buffer(struct binder_proc *proc, struct binder_buffer *new_buffer) { - struct rb_node **p = &proc->allocated_buffers.rb_node; + struct rb_node **p = &proc->alloc.allocated_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; @@ -546,18 +572,19 @@ static void binder_insert_allocated_buffer(struct binder_proc *proc, BUG(); } rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->allocated_buffers); + rb_insert_color(&new_buffer->rb_node, &proc->alloc.allocated_buffers); } static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc, uintptr_t user_ptr) { - struct rb_node *n = proc->allocated_buffers.rb_node; + struct rb_node *n = proc->alloc.allocated_buffers.rb_node; struct binder_buffer *buffer; struct binder_buffer *kern_ptr; - kern_ptr = (struct binder_buffer *)(user_ptr - proc->user_buffer_offset - - offsetof(struct binder_buffer, data)); + kern_ptr = (struct binder_buffer *) + (user_ptr - proc->alloc.user_buffer_offset - + offsetof(struct binder_buffer, data)); while (n) { buffer = rb_entry(n, struct binder_buffer, rb_node); @@ -598,8 +625,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (mm) { down_write(&mm->mmap_sem); - vma = proc->vma; - if (vma && mm != proc->vma_vm_mm) { + vma = proc->alloc.vma; + if (vma && mm != proc->alloc.vma_vm_mm) { pr_err("%d: vma mm and task mm mismatch\n", proc->pid); vma = NULL; @@ -618,7 +645,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; - page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; + page = &proc->alloc.pages[ + (page_addr - proc->alloc.buffer) / PAGE_SIZE]; BUG_ON(*page); *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); @@ -637,7 +665,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, goto err_map_kernel_failed; } user_page_addr = - (uintptr_t)page_addr + proc->user_buffer_offset; + (uintptr_t)page_addr + proc->alloc.user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", @@ -655,10 +683,14 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, free_range: for (page_addr = end - PAGE_SIZE; page_addr >= start; page_addr -= PAGE_SIZE) { - page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; + + page = &proc->alloc.pages[ + (page_addr - proc->alloc.buffer) / PAGE_SIZE]; if (vma) - zap_page_range(vma, (uintptr_t)page_addr + - proc->user_buffer_offset, PAGE_SIZE, NULL); + zap_page_range(vma, + (uintptr_t)page_addr + + proc->alloc.user_buffer_offset, + PAGE_SIZE, NULL); err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: @@ -681,7 +713,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, size_t extra_buffers_size, int is_async) { - struct rb_node *n = proc->free_buffers.rb_node; + struct rb_node *n = proc->alloc.free_buffers.rb_node; struct binder_buffer *buffer; size_t buffer_size; struct rb_node *best_fit = NULL; @@ -689,7 +721,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, void *end_page_addr; size_t size, data_offsets_size; - if (proc->vma == NULL) { + if (proc->alloc.vma == NULL) { pr_err("%d: binder_alloc_buf, no vma\n", proc->pid); return NULL; @@ -710,7 +742,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, return NULL; } if (is_async && - proc->free_async_space < size + sizeof(struct binder_buffer)) { + proc->alloc.free_async_space < + size + sizeof(struct binder_buffer)) { binder_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd failed, no async space left\n", proc->pid, size); @@ -762,7 +795,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) return NULL; - rb_erase(best_fit, &proc->free_buffers); + rb_erase(best_fit, &proc->alloc.free_buffers); buffer->free = 0; binder_insert_allocated_buffer(proc, buffer); if (buffer_size != size) { @@ -780,10 +813,11 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, buffer->extra_buffers_size = extra_buffers_size; buffer->async_transaction = is_async; if (is_async) { - proc->free_async_space -= size + sizeof(struct binder_buffer); + proc->alloc.free_async_space -= + size + sizeof(struct binder_buffer); binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_alloc_buf size %zd async free %zd\n", - proc->pid, size, proc->free_async_space); + proc->pid, size, proc->alloc.free_async_space); } return buffer; @@ -806,7 +840,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc, int free_page_end = 1; int free_page_start = 1; - BUG_ON(proc->buffers.next == &buffer->entry); + BUG_ON(proc->alloc.buffers.next == &buffer->entry); prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); BUG_ON(!prev->free); if (buffer_end_page(prev) == buffer_start_page(buffer)) { @@ -818,7 +852,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc, proc->pid, buffer, prev); } - if (!list_is_last(&buffer->entry, &proc->buffers)) { + if (!list_is_last(&buffer->entry, &proc->alloc.buffers)) { next = list_entry(buffer->entry.next, struct binder_buffer, entry); if (buffer_start_page(next) == buffer_end_page(buffer)) { @@ -862,39 +896,40 @@ static void binder_free_buf(struct binder_proc *proc, BUG_ON(buffer->free); BUG_ON(size > buffer_size); BUG_ON(buffer->transaction != NULL); - BUG_ON((void *)buffer < proc->buffer); - BUG_ON((void *)buffer > proc->buffer + proc->buffer_size); + BUG_ON((void *)buffer < proc->alloc.buffer); + BUG_ON((void *)buffer > proc->alloc.buffer + proc->alloc.buffer_size); if (buffer->async_transaction) { - proc->free_async_space += size + sizeof(struct binder_buffer); + proc->alloc.free_async_space += + size + sizeof(struct binder_buffer); binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", - proc->pid, size, proc->free_async_space); + proc->pid, size, proc->alloc.free_async_space); } binder_update_page_range(proc, 0, (void *)PAGE_ALIGN((uintptr_t)buffer->data), (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), NULL); - rb_erase(&buffer->rb_node, &proc->allocated_buffers); + rb_erase(&buffer->rb_node, &proc->alloc.allocated_buffers); buffer->free = 1; - if (!list_is_last(&buffer->entry, &proc->buffers)) { + if (!list_is_last(&buffer->entry, &proc->alloc.buffers)) { struct binder_buffer *next = list_entry(buffer->entry.next, struct binder_buffer, entry); if (next->free) { - rb_erase(&next->rb_node, &proc->free_buffers); + rb_erase(&next->rb_node, &proc->alloc.free_buffers); binder_delete_free_buffer(proc, next); } } - if (proc->buffers.next != &buffer->entry) { + if (proc->alloc.buffers.next != &buffer->entry) { struct binder_buffer *prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); if (prev->free) { binder_delete_free_buffer(proc, buffer); - rb_erase(&prev->rb_node, &proc->free_buffers); + rb_erase(&prev->rb_node, &proc->alloc.free_buffers); buffer = prev; } } @@ -1533,7 +1568,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, * back to kernel address space to access it */ parent_buffer = parent->buffer - - proc->user_buffer_offset; + proc->alloc.user_buffer_offset; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -1751,7 +1786,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, * Since the parent was already fixed up, convert it * back to the kernel address space to access it */ - parent_buffer = parent->buffer - target_proc->user_buffer_offset; + parent_buffer = parent->buffer - target_proc->alloc.user_buffer_offset; fd_array = (u32 *)(parent_buffer + fda->parent_offset); if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", @@ -1819,7 +1854,7 @@ static int binder_fixup_parent(struct binder_transaction *t, return -EINVAL; } parent_buffer = (u8 *)(parent->buffer - - target_proc->user_buffer_offset); + target_proc->alloc.user_buffer_offset); *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; return 0; @@ -2159,7 +2194,7 @@ static void binder_transaction(struct binder_proc *proc, } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t)sg_bufp + - target_proc->user_buffer_offset; + target_proc->alloc.user_buffer_offset; sg_bufp += ALIGN(bp->length, sizeof(u64)); ret = binder_fixup_parent(t, thread, bp, off_start, @@ -2921,7 +2956,7 @@ retry: tr.offsets_size = t->buffer->offsets_size; tr.data.ptr.buffer = (binder_uintptr_t)( (uintptr_t)t->buffer->data + - proc->user_buffer_offset); + proc->alloc.user_buffer_offset); tr.data.ptr.offsets = tr.data.ptr.buffer + ALIGN(t->buffer->data_size, sizeof(void *)); @@ -3339,8 +3374,8 @@ static void binder_vma_close(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - proc->vma = NULL; - proc->vma_vm_mm = NULL; + proc->alloc.vma = NULL; + proc->alloc.vma_vm_mm = NULL; binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -3383,7 +3418,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; mutex_lock(&binder_mmap_lock); - if (proc->buffer) { + if (proc->alloc.buffer) { ret = -EBUSY; failure_string = "already mapped"; goto err_already_mapped; @@ -3395,56 +3430,65 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) failure_string = "get_vm_area"; goto err_get_vm_area_failed; } - proc->buffer = area->addr; - proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; + proc->alloc.buffer = area->addr; + proc->alloc.user_buffer_offset = + vma->vm_start - (uintptr_t)proc->alloc.buffer; mutex_unlock(&binder_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { - while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { - pr_info("binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + while (CACHE_COLOUR( + (vma->vm_start ^ (uint32_t)proc->alloc.buffer))) { + pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", + proc->pid, vma->vm_start, + vma->vm_end, proc->alloc.buffer); vma->vm_start += PAGE_SIZE; } } #endif - proc->pages = kzalloc(sizeof(proc->pages[0]) * ((vma->vm_end - vma->vm_start) / PAGE_SIZE), GFP_KERNEL); - if (proc->pages == NULL) { + proc->alloc.pages = + kzalloc(sizeof(proc->alloc.pages[0]) * + ((vma->vm_end - vma->vm_start) / PAGE_SIZE), + GFP_KERNEL); + if (proc->alloc.pages == NULL) { ret = -ENOMEM; failure_string = "alloc page array"; goto err_alloc_pages_failed; } - proc->buffer_size = vma->vm_end - vma->vm_start; + proc->alloc.buffer_size = vma->vm_end - vma->vm_start; vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; - if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) { + if (binder_update_page_range(proc, 1, proc->alloc.buffer, + proc->alloc.buffer + PAGE_SIZE, vma)) { ret = -ENOMEM; failure_string = "alloc small buf"; goto err_alloc_small_buf_failed; } - buffer = proc->buffer; - INIT_LIST_HEAD(&proc->buffers); - list_add(&buffer->entry, &proc->buffers); + buffer = proc->alloc.buffer; + INIT_LIST_HEAD(&proc->alloc.buffers); + list_add(&buffer->entry, &proc->alloc.buffers); buffer->free = 1; binder_insert_free_buffer(proc, buffer); - proc->free_async_space = proc->buffer_size / 2; + proc->alloc.free_async_space = proc->alloc.buffer_size / 2; barrier(); proc->files = get_files_struct(current); - proc->vma = vma; - proc->vma_vm_mm = vma->vm_mm; + proc->alloc.vma = vma; + proc->alloc.vma_vm_mm = vma->vm_mm; - /*pr_info("binder_mmap: %d %lx-%lx maps %p\n", - proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ + /*pr_info("binder_mmap: %d %lx-%lx maps %pK\n", + * proc->pid, vma->vm_start, vma->vm_end, proc->alloc.buffer); + */ return 0; err_alloc_small_buf_failed: - kfree(proc->pages); - proc->pages = NULL; + kfree(proc->alloc.pages); + proc->alloc.pages = NULL; err_alloc_pages_failed: mutex_lock(&binder_mmap_lock); - vfree(proc->buffer); - proc->buffer = NULL; + vfree(proc->alloc.buffer); + proc->alloc.buffer = NULL; err_get_vm_area_failed: err_already_mapped: mutex_unlock(&binder_mmap_lock); @@ -3596,7 +3640,7 @@ static void binder_deferred_release(struct binder_proc *proc) int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; - BUG_ON(proc->vma); + BUG_ON(proc->alloc.vma); BUG_ON(proc->files); hlist_del(&proc->proc_node); @@ -3643,7 +3687,7 @@ static void binder_deferred_release(struct binder_proc *proc) binder_release_work(&proc->delivered_death); buffers = 0; - while ((n = rb_first(&proc->allocated_buffers))) { + while ((n = rb_first(&proc->alloc.allocated_buffers))) { struct binder_buffer *buffer; buffer = rb_entry(n, struct binder_buffer, rb_node); @@ -3664,25 +3708,25 @@ static void binder_deferred_release(struct binder_proc *proc) binder_stats_deleted(BINDER_STAT_PROC); page_count = 0; - if (proc->pages) { + if (proc->alloc.pages) { int i; - for (i = 0; i < proc->buffer_size / PAGE_SIZE; i++) { + for (i = 0; i < proc->alloc.buffer_size / PAGE_SIZE; i++) { void *page_addr; - if (!proc->pages[i]) + if (!proc->alloc.pages[i]) continue; - page_addr = proc->buffer + i * PAGE_SIZE; + page_addr = proc->alloc.buffer + i * PAGE_SIZE; binder_debug(BINDER_DEBUG_BUFFER_ALLOC, "%s: %d: page %d at %p not freed\n", __func__, proc->pid, i, page_addr); unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); - __free_page(proc->pages[i]); + __free_page(proc->alloc.pages[i]); page_count++; } - kfree(proc->pages); - vfree(proc->buffer); + kfree(proc->alloc.pages); + vfree(proc->alloc.buffer); } put_task_struct(proc->tsk); @@ -3912,7 +3956,8 @@ static void print_binder_proc(struct seq_file *m, print_binder_ref(m, rb_entry(n, struct binder_ref, rb_node_desc)); } - for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) + for (n = rb_first(&proc->alloc.allocated_buffers); + n != NULL; n = rb_next(n)) print_binder_buffer(m, " buffer", rb_entry(n, struct binder_buffer, rb_node)); list_for_each_entry(w, &proc->todo, entry) @@ -4029,7 +4074,7 @@ static void print_binder_proc_stats(struct seq_file *m, " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, - proc->ready_threads, proc->free_async_space); + proc->ready_threads, proc->alloc.free_async_space); count = 0; for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; @@ -4047,7 +4092,8 @@ static void print_binder_proc_stats(struct seq_file *m, seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); count = 0; - for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) + for (n = rb_first(&proc->alloc.allocated_buffers); + n != NULL; n = rb_next(n)) count++; seq_printf(m, " buffers: %d\n", count); diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index 7f20f3dc8369..c835f09656c1 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -280,7 +280,7 @@ TRACE_EVENT(binder_update_page_range, TP_fast_assign( __entry->proc = proc->pid; __entry->allocate = allocate; - __entry->offset = start - proc->buffer; + __entry->offset = start - proc->alloc.buffer; __entry->size = end - start; ), TP_printk("proc=%d allocate=%d offset=%zu size=%zu", From b582e88aa5c657c588da128821f434991f3ba86b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 21 Apr 2017 11:18:12 -0700 Subject: [PATCH 007/176] FROMLIST: binder: remove unneeded cleanup code (from https://patchwork.kernel.org/patch/9817817/) The buffer's transaction has already been freed before binder_deferred_release. No need to do it again. Change-Id: I412709c23879c5e45a6c7304a588bba0a5223fc3 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 734edbd67410..07be2833250d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3634,7 +3634,6 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { - struct binder_transaction *t; struct binder_context *context = proc->context; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, @@ -3692,14 +3691,8 @@ static void binder_deferred_release(struct binder_proc *proc) buffer = rb_entry(n, struct binder_buffer, rb_node); - t = buffer->transaction; - if (t) { - t->buffer = NULL; - buffer->transaction = NULL; - pr_err("release proc %d, transaction %d, not freed\n", - proc->pid, t->debug_id); - /*BUG();*/ - } + /* Transaction should already have been freed */ + BUG_ON(buffer->transaction); binder_free_buf(proc, buffer); buffers++; From 467545d8424090762c38d0bb2ea88da35b94fb09 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 10 Oct 2016 10:40:53 -0700 Subject: [PATCH 008/176] FROMLIST: binder: separate out binder_alloc functions (from https://patchwork.kernel.org/patch/9817753/) Continuation of splitting the binder allocator from the binder driver. Separate binder_alloc functions from normal binder functions. Protect the allocator with a separate mutex. Bug: 33250092 32225111 Change-Id: I634637415aa03c74145159d84f1749bbe785a8f3 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 655 ++++++++++++++++++++------------- drivers/android/binder_trace.h | 9 +- 2 files changed, 412 insertions(+), 252 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 07be2833250d..e73481a8ea2c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -48,7 +48,7 @@ static DEFINE_MUTEX(binder_main_lock); static DEFINE_MUTEX(binder_deferred_lock); -static DEFINE_MUTEX(binder_mmap_lock); +static DEFINE_MUTEX(binder_alloc_mmap_lock); static HLIST_HEAD(binder_devices); static HLIST_HEAD(binder_procs); @@ -104,9 +104,7 @@ enum { BINDER_DEBUG_TRANSACTION_COMPLETE = 1U << 10, BINDER_DEBUG_FREE_BUFFER = 1U << 11, BINDER_DEBUG_INTERNAL_REFS = 1U << 12, - BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, - BINDER_DEBUG_PRIORITY_CAP = 1U << 14, - BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, + BINDER_DEBUG_PRIORITY_CAP = 1U << 13, }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; @@ -159,6 +157,27 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, #define to_binder_fd_array_object(hdr) \ container_of(hdr, struct binder_fd_array_object, hdr) +/* + * debug declarations for binder_alloc. To be + * moved to binder_alloc.c + */ +enum { + BINDER_ALLOC_DEBUG_OPEN_CLOSE = 1U << 1, + BINDER_ALLOC_DEBUG_BUFFER_ALLOC = 1U << 2, + BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, +}; +static uint32_t binder_alloc_debug_mask; + +module_param_named(alloc_debug_mask, binder_alloc_debug_mask, + uint, S_IWUSR | S_IRUGO); + +#define binder_alloc_debug(mask, x...) \ + do { \ + if (binder_alloc_debug_mask & mask) \ + pr_info(x); \ + } while (0) +/* end of binder_alloc debug declarations */ + enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -342,6 +361,8 @@ enum binder_deferred_state { * struct binder_buffer objects used to track the user buffers */ struct binder_alloc { + struct mutex mutex; + struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *vma_vm_mm; void *buffer; @@ -352,6 +373,7 @@ struct binder_alloc { size_t free_async_space; struct page **pages; size_t buffer_size; + int pid; }; struct binder_proc { @@ -423,6 +445,56 @@ struct binder_transaction { kuid_t sender_euid; }; +/* + * Forward declarations of binder_alloc functions. + * These will be moved to binder_alloc.h when + * binder_alloc is moved to its own files. + */ +extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async); +extern void binder_alloc_init(struct binder_alloc *alloc); +extern void binder_alloc_vma_close(struct binder_alloc *alloc); +extern struct binder_buffer * +binder_alloc_buffer_lookup(struct binder_alloc *alloc, + uintptr_t user_ptr); +extern void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); +extern int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma); +extern void binder_alloc_deferred_release(struct binder_alloc *alloc); +extern int binder_alloc_get_allocated_count(struct binder_alloc *alloc); +extern void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc); + +static inline size_t +binder_alloc_get_free_async_space(struct binder_alloc *alloc) +{ + size_t free_async_space; + + mutex_lock(&alloc->mutex); + free_async_space = alloc->free_async_space; + mutex_unlock(&alloc->mutex); + return free_async_space; +} + +static inline ptrdiff_t +binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc) +{ + /* + * user_buffer_offset is constant if vma is set and + * undefined if vma is not set. It is possible to + * get here with !alloc->vma if the target process + * is dying while a transaction is being initiated. + * Returning the old value is ok in this case and + * the transaction will fail. + */ + return alloc->user_buffer_offset; +} +/* end of binder_alloc declarations */ + static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); @@ -506,21 +578,20 @@ static void binder_set_nice(long nice) binder_user_error("%d RLIMIT_NICE not set\n", current->pid); } -static size_t binder_buffer_size(struct binder_proc *proc, - struct binder_buffer *buffer) +static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, + struct binder_buffer *buffer) { - if (list_is_last(&buffer->entry, &proc->alloc.buffers)) - return proc->alloc.buffer + - proc->alloc.buffer_size - - (void *)buffer->data; + if (list_is_last(&buffer->entry, &alloc->buffers)) + return alloc->buffer + + alloc->buffer_size - (void *)buffer->data; return (size_t)list_entry(buffer->entry.next, struct binder_buffer, entry) - (size_t)buffer->data; } -static void binder_insert_free_buffer(struct binder_proc *proc, +static void binder_insert_free_buffer(struct binder_alloc *alloc, struct binder_buffer *new_buffer) { - struct rb_node **p = &proc->alloc.free_buffers.rb_node; + struct rb_node **p = &alloc->free_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; size_t buffer_size; @@ -528,18 +599,18 @@ static void binder_insert_free_buffer(struct binder_proc *proc, BUG_ON(!new_buffer->free); - new_buffer_size = binder_buffer_size(proc, new_buffer); + new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: add free buffer, size %zd, at %p\n", - proc->pid, new_buffer_size, new_buffer); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: add free buffer, size %zd, at %pK\n", + alloc->pid, new_buffer_size, new_buffer); while (*p) { parent = *p; buffer = rb_entry(parent, struct binder_buffer, rb_node); BUG_ON(!buffer->free); - buffer_size = binder_buffer_size(proc, buffer); + buffer_size = binder_alloc_buffer_size(alloc, buffer); if (new_buffer_size < buffer_size) p = &parent->rb_left; @@ -547,13 +618,13 @@ static void binder_insert_free_buffer(struct binder_proc *proc, p = &parent->rb_right; } rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->alloc.free_buffers); + rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); } -static void binder_insert_allocated_buffer(struct binder_proc *proc, +static void binder_insert_allocated_buffer(struct binder_alloc *alloc, struct binder_buffer *new_buffer) { - struct rb_node **p = &proc->alloc.allocated_buffers.rb_node; + struct rb_node **p = &alloc->allocated_buffers.rb_node; struct rb_node *parent = NULL; struct binder_buffer *buffer; @@ -572,19 +643,19 @@ static void binder_insert_allocated_buffer(struct binder_proc *proc, BUG(); } rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &proc->alloc.allocated_buffers); + rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); } -static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc, - uintptr_t user_ptr) +static struct binder_buffer *binder_alloc_buffer_lookup_locked( + struct binder_alloc *alloc, + uintptr_t user_ptr) { - struct rb_node *n = proc->alloc.allocated_buffers.rb_node; + struct rb_node *n = alloc->allocated_buffers.rb_node; struct binder_buffer *buffer; struct binder_buffer *kern_ptr; - kern_ptr = (struct binder_buffer *) - (user_ptr - proc->alloc.user_buffer_offset - - offsetof(struct binder_buffer, data)); + kern_ptr = (struct binder_buffer *)(user_ptr - alloc->user_buffer_offset + - offsetof(struct binder_buffer, data)); while (n) { buffer = rb_entry(n, struct binder_buffer, rb_node); @@ -600,7 +671,18 @@ static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc, return NULL; } -static int binder_update_page_range(struct binder_proc *proc, int allocate, +struct binder_buffer *binder_alloc_buffer_lookup(struct binder_alloc *alloc, + uintptr_t user_ptr) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_buffer_lookup_locked(alloc, user_ptr); + mutex_unlock(&alloc->mutex); + return buffer; +} + +static int binder_update_page_range(struct binder_alloc *alloc, int allocate, void *start, void *end, struct vm_area_struct *vma) { @@ -609,26 +691,26 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, struct page **page; struct mm_struct *mm; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: %s pages %p-%p\n", proc->pid, + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: %s pages %pK-%pK\n", alloc->pid, allocate ? "allocate" : "free", start, end); if (end <= start) return 0; - trace_binder_update_page_range(proc, allocate, start, end); + trace_binder_update_page_range(alloc, allocate, start, end); if (vma) mm = NULL; else - mm = get_task_mm(proc->tsk); + mm = get_task_mm(alloc->tsk); if (mm) { down_write(&mm->mmap_sem); - vma = proc->alloc.vma; - if (vma && mm != proc->alloc.vma_vm_mm) { + vma = alloc->vma; + if (vma && mm != alloc->vma_vm_mm) { pr_err("%d: vma mm and task mm mismatch\n", - proc->pid); + alloc->pid); vma = NULL; } } @@ -638,21 +720,20 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (vma == NULL) { pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", - proc->pid); + alloc->pid); goto err_no_vma; } for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; - page = &proc->alloc.pages[ - (page_addr - proc->alloc.buffer) / PAGE_SIZE]; + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; BUG_ON(*page); *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (*page == NULL) { - pr_err("%d: binder_alloc_buf failed for page at %p\n", - proc->pid, page_addr); + pr_err("%d: binder_alloc_buf failed for page at %pK\n", + alloc->pid, page_addr); goto err_alloc_page_failed; } ret = map_kernel_range_noflush((unsigned long)page_addr, @@ -660,16 +741,16 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, flush_cache_vmap((unsigned long)page_addr, (unsigned long)page_addr + PAGE_SIZE); if (ret != 1) { - pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", - proc->pid, page_addr); + pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", + alloc->pid, page_addr); goto err_map_kernel_failed; } user_page_addr = - (uintptr_t)page_addr + proc->alloc.user_buffer_offset; + (uintptr_t)page_addr + alloc->user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", - proc->pid, user_page_addr); + alloc->pid, user_page_addr); goto err_vm_insert_page_failed; } /* vm_insert_page does not seem to increment the refcount */ @@ -683,14 +764,10 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, free_range: for (page_addr = end - PAGE_SIZE; page_addr >= start; page_addr -= PAGE_SIZE) { - - page = &proc->alloc.pages[ - (page_addr - proc->alloc.buffer) / PAGE_SIZE]; + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; if (vma) - zap_page_range(vma, - (uintptr_t)page_addr + - proc->alloc.user_buffer_offset, - PAGE_SIZE, NULL); + zap_page_range(vma, (uintptr_t)page_addr + + alloc->user_buffer_offset, PAGE_SIZE, NULL); err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: @@ -707,13 +784,11 @@ err_no_vma: return -ENOMEM; } -static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, - size_t data_size, - size_t offsets_size, - size_t extra_buffers_size, - int is_async) +static struct binder_buffer *binder_alloc_new_buf_locked( + struct binder_alloc *alloc, size_t data_size, + size_t offsets_size, size_t extra_buffers_size, int is_async) { - struct rb_node *n = proc->alloc.free_buffers.rb_node; + struct rb_node *n = alloc->free_buffers.rb_node; struct binder_buffer *buffer; size_t buffer_size; struct rb_node *best_fit = NULL; @@ -721,9 +796,9 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, void *end_page_addr; size_t size, data_offsets_size; - if (proc->alloc.vma == NULL) { + if (alloc->vma == NULL) { pr_err("%d: binder_alloc_buf, no vma\n", - proc->pid); + alloc->pid); return NULL; } @@ -731,29 +806,30 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, ALIGN(offsets_size, sizeof(void *)); if (data_offsets_size < data_size || data_offsets_size < offsets_size) { - binder_user_error("%d: got transaction with invalid size %zd-%zd\n", - proc->pid, data_size, offsets_size); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid size %zd-%zd\n", + alloc->pid, data_size, offsets_size); return NULL; } size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); if (size < data_offsets_size || size < extra_buffers_size) { - binder_user_error("%d: got transaction with invalid extra_buffers_size %zd\n", - proc->pid, extra_buffers_size); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid extra_buffers_size %zd\n", + alloc->pid, extra_buffers_size); return NULL; } if (is_async && - proc->alloc.free_async_space < - size + sizeof(struct binder_buffer)) { - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, + alloc->free_async_space < size + sizeof(struct binder_buffer)) { + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd failed, no async space left\n", - proc->pid, size); + alloc->pid, size); return NULL; } while (n) { buffer = rb_entry(n, struct binder_buffer, rb_node); BUG_ON(!buffer->free); - buffer_size = binder_buffer_size(proc, buffer); + buffer_size = binder_alloc_buffer_size(alloc, buffer); if (size < buffer_size) { best_fit = n; @@ -767,17 +843,17 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, } if (best_fit == NULL) { pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", - proc->pid, size); + alloc->pid, size); return NULL; } if (n == NULL) { buffer = rb_entry(best_fit, struct binder_buffer, rb_node); - buffer_size = binder_buffer_size(proc, buffer); + buffer_size = binder_alloc_buffer_size(alloc, buffer); } - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got buffer %p size %zd\n", - proc->pid, size, buffer, buffer_size); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", + alloc->pid, size, buffer, buffer_size); has_page_addr = (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK); @@ -791,38 +867,52 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); if (end_page_addr > has_page_addr) end_page_addr = has_page_addr; - if (binder_update_page_range(proc, 1, + if (binder_update_page_range(alloc, 1, (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) return NULL; - rb_erase(best_fit, &proc->alloc.free_buffers); + rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; - binder_insert_allocated_buffer(proc, buffer); + binder_insert_allocated_buffer(alloc, buffer); if (buffer_size != size) { struct binder_buffer *new_buffer = (void *)buffer->data + size; list_add(&new_buffer->entry, &buffer->entry); new_buffer->free = 1; - binder_insert_free_buffer(proc, new_buffer); + binder_insert_free_buffer(alloc, new_buffer); } - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got %p\n", - proc->pid, size, buffer); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got %pK\n", + alloc->pid, size, buffer); buffer->data_size = data_size; buffer->offsets_size = offsets_size; - buffer->extra_buffers_size = extra_buffers_size; buffer->async_transaction = is_async; + buffer->extra_buffers_size = extra_buffers_size; if (is_async) { - proc->alloc.free_async_space -= - size + sizeof(struct binder_buffer); - binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + alloc->free_async_space -= size + sizeof(struct binder_buffer); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_alloc_buf size %zd async free %zd\n", - proc->pid, size, proc->alloc.free_async_space); + alloc->pid, size, alloc->free_async_space); } return buffer; } +struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size, + extra_buffers_size, is_async); + mutex_unlock(&alloc->mutex); + return buffer; +} + static void *buffer_start_page(struct binder_buffer *buffer) { return (void *)((uintptr_t)buffer & PAGE_MASK); @@ -833,26 +923,26 @@ static void *buffer_end_page(struct binder_buffer *buffer) return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK); } -static void binder_delete_free_buffer(struct binder_proc *proc, +static void binder_delete_free_buffer(struct binder_alloc *alloc, struct binder_buffer *buffer) { struct binder_buffer *prev, *next = NULL; int free_page_end = 1; int free_page_start = 1; - BUG_ON(proc->alloc.buffers.next == &buffer->entry); + BUG_ON(alloc->buffers.next == &buffer->entry); prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); BUG_ON(!prev->free); if (buffer_end_page(prev) == buffer_start_page(buffer)) { free_page_start = 0; if (buffer_end_page(prev) == buffer_end_page(buffer)) free_page_end = 0; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", - proc->pid, buffer, prev); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); } - if (!list_is_last(&buffer->entry, &proc->alloc.buffers)) { + if (!list_is_last(&buffer->entry, &alloc->buffers)) { next = list_entry(buffer->entry.next, struct binder_buffer, entry); if (buffer_start_page(next) == buffer_end_page(buffer)) { @@ -860,80 +950,88 @@ static void binder_delete_free_buffer(struct binder_proc *proc, if (buffer_start_page(next) == buffer_start_page(buffer)) free_page_start = 0; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", - proc->pid, buffer, prev); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); } } list_del(&buffer->entry); if (free_page_start || free_page_end) { - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p do not share page%s%s with %p or %p\n", - proc->pid, buffer, free_page_start ? "" : " end", + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", + alloc->pid, buffer, free_page_start ? "" : " end", free_page_end ? "" : " start", prev, next); - binder_update_page_range(proc, 0, free_page_start ? + binder_update_page_range(alloc, 0, free_page_start ? buffer_start_page(buffer) : buffer_end_page(buffer), (free_page_end ? buffer_end_page(buffer) : buffer_start_page(buffer)) + PAGE_SIZE, NULL); } } -static void binder_free_buf(struct binder_proc *proc, - struct binder_buffer *buffer) +static void binder_free_buf_locked(struct binder_alloc *alloc, + struct binder_buffer *buffer) { size_t size, buffer_size; - buffer_size = binder_buffer_size(proc, buffer); + buffer_size = binder_alloc_buffer_size(alloc, buffer); size = ALIGN(buffer->data_size, sizeof(void *)) + ALIGN(buffer->offsets_size, sizeof(void *)) + ALIGN(buffer->extra_buffers_size, sizeof(void *)); - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_free_buf %p size %zd buffer_size %zd\n", - proc->pid, buffer, size, buffer_size); + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%d: binder_free_buf %pK size %zd buffer_size %zd\n", + alloc->pid, buffer, size, buffer_size); BUG_ON(buffer->free); BUG_ON(size > buffer_size); BUG_ON(buffer->transaction != NULL); - BUG_ON((void *)buffer < proc->alloc.buffer); - BUG_ON((void *)buffer > proc->alloc.buffer + proc->alloc.buffer_size); + BUG_ON((void *)buffer < alloc->buffer); + BUG_ON((void *)buffer > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - proc->alloc.free_async_space += - size + sizeof(struct binder_buffer); + alloc->free_async_space += size + sizeof(struct binder_buffer); - binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", - proc->pid, size, proc->alloc.free_async_space); + alloc->pid, size, alloc->free_async_space); } - binder_update_page_range(proc, 0, + binder_update_page_range(alloc, 0, (void *)PAGE_ALIGN((uintptr_t)buffer->data), (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), NULL); - rb_erase(&buffer->rb_node, &proc->alloc.allocated_buffers); + + rb_erase(&buffer->rb_node, &alloc->allocated_buffers); buffer->free = 1; - if (!list_is_last(&buffer->entry, &proc->alloc.buffers)) { + if (!list_is_last(&buffer->entry, &alloc->buffers)) { struct binder_buffer *next = list_entry(buffer->entry.next, struct binder_buffer, entry); if (next->free) { - rb_erase(&next->rb_node, &proc->alloc.free_buffers); - binder_delete_free_buffer(proc, next); + rb_erase(&next->rb_node, &alloc->free_buffers); + binder_delete_free_buffer(alloc, next); } } - if (proc->alloc.buffers.next != &buffer->entry) { + if (alloc->buffers.next != &buffer->entry) { struct binder_buffer *prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); if (prev->free) { - binder_delete_free_buffer(proc, buffer); - rb_erase(&prev->rb_node, &proc->alloc.free_buffers); + binder_delete_free_buffer(alloc, buffer); + rb_erase(&prev->rb_node, &alloc->free_buffers); buffer = prev; } } - binder_insert_free_buffer(proc, buffer); + binder_insert_free_buffer(alloc, buffer); +} + +void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + mutex_lock(&alloc->mutex); + binder_free_buf_locked(alloc, buffer); + mutex_unlock(&alloc->mutex); } static struct binder_node *binder_get_node(struct binder_proc *proc, @@ -1568,7 +1666,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, * back to kernel address space to access it */ parent_buffer = parent->buffer - - proc->alloc.user_buffer_offset; + binder_alloc_get_user_buffer_offset( + &proc->alloc); fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -1786,7 +1885,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, * Since the parent was already fixed up, convert it * back to the kernel address space to access it */ - parent_buffer = parent->buffer - target_proc->alloc.user_buffer_offset; + parent_buffer = parent->buffer - + binder_alloc_get_user_buffer_offset(&target_proc->alloc); fd_array = (u32 *)(parent_buffer + fda->parent_offset); if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", @@ -1854,7 +1954,8 @@ static int binder_fixup_parent(struct binder_transaction *t, return -EINVAL; } parent_buffer = (u8 *)(parent->buffer - - target_proc->alloc.user_buffer_offset); + binder_alloc_get_user_buffer_offset( + &target_proc->alloc)); *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; return 0; @@ -2040,7 +2141,7 @@ static void binder_transaction(struct binder_proc *proc, trace_binder_transaction(reply, t, target_node); - t->buffer = binder_alloc_buf(target_proc, tr->data_size, + t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, tr->offsets_size, extra_buffers_size, !reply && (t->flags & TF_ONE_WAY)); if (t->buffer == NULL) { @@ -2194,7 +2295,8 @@ static void binder_transaction(struct binder_proc *proc, } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t)sg_bufp + - target_proc->alloc.user_buffer_offset; + binder_alloc_get_user_buffer_offset( + &target_proc->alloc); sg_bufp += ALIGN(bp->length, sizeof(u64)); ret = binder_fixup_parent(t, thread, bp, off_start, @@ -2252,7 +2354,7 @@ err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); t->buffer->transaction = NULL; - binder_free_buf(target_proc, t->buffer); + binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: kfree(tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); @@ -2432,7 +2534,8 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(binder_uintptr_t); - buffer = binder_buffer_lookup(proc, data_ptr); + buffer = binder_alloc_buffer_lookup(&proc->alloc, + data_ptr); if (buffer == NULL) { binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", proc->pid, thread->pid, (u64)data_ptr); @@ -2462,7 +2565,7 @@ static int binder_thread_write(struct binder_proc *proc, } trace_binder_transaction_buffer_release(buffer); binder_transaction_buffer_release(proc, buffer, NULL); - binder_free_buf(proc, buffer); + binder_alloc_free_buf(&proc->alloc, buffer); break; } @@ -2954,9 +3057,9 @@ retry: tr.data_size = t->buffer->data_size; tr.offsets_size = t->buffer->offsets_size; - tr.data.ptr.buffer = (binder_uintptr_t)( - (uintptr_t)t->buffer->data + - proc->alloc.user_buffer_offset); + tr.data.ptr.buffer = (binder_uintptr_t) + ((uintptr_t)t->buffer->data + + binder_alloc_get_user_buffer_offset(&proc->alloc)); tr.data.ptr.offsets = tr.data.ptr.buffer + ALIGN(t->buffer->data_size, sizeof(void *)); @@ -3365,6 +3468,12 @@ static void binder_vma_open(struct vm_area_struct *vma) (unsigned long)pgprot_val(vma->vm_page_prot)); } +void binder_alloc_vma_close(struct binder_alloc *alloc) +{ + WRITE_ONCE(alloc->vma, NULL); + WRITE_ONCE(alloc->vma_vm_mm, NULL); +} + static void binder_vma_close(struct vm_area_struct *vma) { struct binder_proc *proc = vma->vm_private_data; @@ -3374,8 +3483,7 @@ static void binder_vma_close(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - proc->alloc.vma = NULL; - proc->alloc.vma_vm_mm = NULL; + binder_alloc_vma_close(&proc->alloc); binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -3390,35 +3498,16 @@ static const struct vm_operations_struct binder_vm_ops = { .fault = binder_vm_fault, }; -static int binder_mmap(struct file *filp, struct vm_area_struct *vma) +int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma) { int ret; struct vm_struct *area; - struct binder_proc *proc = filp->private_data; const char *failure_string; struct binder_buffer *buffer; - if (proc->tsk != current->group_leader) - return -EINVAL; - - if ((vma->vm_end - vma->vm_start) > SZ_4M) - vma->vm_end = vma->vm_start + SZ_4M; - - binder_debug(BINDER_DEBUG_OPEN_CLOSE, - "binder_mmap: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", - proc->pid, vma->vm_start, vma->vm_end, - (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, - (unsigned long)pgprot_val(vma->vm_page_prot)); - - if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) { - ret = -EPERM; - failure_string = "bad vm_flags"; - goto err_bad_arg; - } - vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; - - mutex_lock(&binder_mmap_lock); - if (proc->alloc.buffer) { + mutex_lock(&binder_alloc_mmap_lock); + if (alloc->buffer) { ret = -EBUSY; failure_string = "already mapped"; goto err_already_mapped; @@ -3430,74 +3519,111 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) failure_string = "get_vm_area"; goto err_get_vm_area_failed; } - proc->alloc.buffer = area->addr; - proc->alloc.user_buffer_offset = - vma->vm_start - (uintptr_t)proc->alloc.buffer; - mutex_unlock(&binder_mmap_lock); + alloc->buffer = area->addr; + alloc->user_buffer_offset = + vma->vm_start - (uintptr_t)alloc->buffer; + mutex_unlock(&binder_alloc_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { while (CACHE_COLOUR( - (vma->vm_start ^ (uint32_t)proc->alloc.buffer))) { + (vma->vm_start ^ (uint32_t)alloc->buffer))) { pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", - proc->pid, vma->vm_start, - vma->vm_end, proc->alloc.buffer); + alloc->pid, vma->vm_start, vma->vm_end, + alloc->buffer); vma->vm_start += PAGE_SIZE; } } #endif - proc->alloc.pages = - kzalloc(sizeof(proc->alloc.pages[0]) * - ((vma->vm_end - vma->vm_start) / PAGE_SIZE), - GFP_KERNEL); - if (proc->alloc.pages == NULL) { + alloc->pages = kzalloc(sizeof(alloc->pages[0]) * + ((vma->vm_end - vma->vm_start) / PAGE_SIZE), + GFP_KERNEL); + if (alloc->pages == NULL) { ret = -ENOMEM; failure_string = "alloc page array"; goto err_alloc_pages_failed; } - proc->alloc.buffer_size = vma->vm_end - vma->vm_start; + alloc->buffer_size = vma->vm_end - vma->vm_start; - vma->vm_ops = &binder_vm_ops; - vma->vm_private_data = proc; - - if (binder_update_page_range(proc, 1, proc->alloc.buffer, - proc->alloc.buffer + PAGE_SIZE, vma)) { + if (binder_update_page_range(alloc, 1, alloc->buffer, + alloc->buffer + PAGE_SIZE, vma)) { ret = -ENOMEM; failure_string = "alloc small buf"; goto err_alloc_small_buf_failed; } - buffer = proc->alloc.buffer; - INIT_LIST_HEAD(&proc->alloc.buffers); - list_add(&buffer->entry, &proc->alloc.buffers); + buffer = alloc->buffer; + INIT_LIST_HEAD(&alloc->buffers); + list_add(&buffer->entry, &alloc->buffers); buffer->free = 1; - binder_insert_free_buffer(proc, buffer); - proc->alloc.free_async_space = proc->alloc.buffer_size / 2; + binder_insert_free_buffer(alloc, buffer); + alloc->free_async_space = alloc->buffer_size / 2; barrier(); - proc->files = get_files_struct(current); - proc->alloc.vma = vma; - proc->alloc.vma_vm_mm = vma->vm_mm; + alloc->vma = vma; + alloc->vma_vm_mm = vma->vm_mm; - /*pr_info("binder_mmap: %d %lx-%lx maps %pK\n", - * proc->pid, vma->vm_start, vma->vm_end, proc->alloc.buffer); - */ return 0; err_alloc_small_buf_failed: - kfree(proc->alloc.pages); - proc->alloc.pages = NULL; + kfree(alloc->pages); + alloc->pages = NULL; err_alloc_pages_failed: - mutex_lock(&binder_mmap_lock); - vfree(proc->alloc.buffer); - proc->alloc.buffer = NULL; + mutex_lock(&binder_alloc_mmap_lock); + vfree(alloc->buffer); + alloc->buffer = NULL; err_get_vm_area_failed: err_already_mapped: - mutex_unlock(&binder_mmap_lock); + mutex_unlock(&binder_alloc_mmap_lock); + pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, + alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + return ret; +} + +static int binder_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + struct binder_proc *proc = filp->private_data; + const char *failure_string; + + if (proc->tsk != current->group_leader) + return -EINVAL; + + if ((vma->vm_end - vma->vm_start) > SZ_4M) + vma->vm_end = vma->vm_start + SZ_4M; + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", + __func__, proc->pid, vma->vm_start, vma->vm_end, + (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, + (unsigned long)pgprot_val(vma->vm_page_prot)); + + if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) { + ret = -EPERM; + failure_string = "bad vm_flags"; + goto err_bad_arg; + } + vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; + vma->vm_ops = &binder_vm_ops; + vma->vm_private_data = proc; + + ret = binder_alloc_mmap_handler(&proc->alloc, vma); + if (ret) + return ret; + proc->files = get_files_struct(current); + return 0; + err_bad_arg: pr_err("binder_mmap: %d %lx-%lx %s failed %d\n", proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); return ret; } +void binder_alloc_init(struct binder_alloc *alloc) +{ + alloc->tsk = current->group_leader; + alloc->pid = current->group_leader->pid; + mutex_init(&alloc->mutex); +} + static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; @@ -3517,6 +3643,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_dev = container_of(filp->private_data, struct binder_device, miscdev); proc->context = &binder_dev->context; + binder_alloc_init(&proc->alloc); binder_lock(__func__); @@ -3632,14 +3759,61 @@ static int binder_node_release(struct binder_node *node, int refs) return refs; } +void binder_alloc_deferred_release(struct binder_alloc *alloc) +{ + struct rb_node *n; + int buffers, page_count; + + BUG_ON(alloc->vma); + + buffers = 0; + mutex_lock(&alloc->mutex); + while ((n = rb_first(&alloc->allocated_buffers))) { + struct binder_buffer *buffer; + + buffer = rb_entry(n, struct binder_buffer, rb_node); + + /* Transaction should already have been freed */ + BUG_ON(buffer->transaction); + + binder_free_buf_locked(alloc, buffer); + buffers++; + } + + page_count = 0; + if (alloc->pages) { + int i; + + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + void *page_addr; + + if (!alloc->pages[i]) + continue; + + page_addr = alloc->buffer + i * PAGE_SIZE; + binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, + "%s: %d: page %d at %pK not freed\n", + __func__, alloc->pid, i, page_addr); + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); + __free_page(alloc->pages[i]); + page_count++; + } + kfree(alloc->pages); + vfree(alloc->buffer); + } + mutex_unlock(&alloc->mutex); + + binder_alloc_debug(BINDER_ALLOC_DEBUG_OPEN_CLOSE, + "%s: %d buffers %d, pages %d\n", + __func__, alloc->pid, buffers, page_count); +} + static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; struct rb_node *n; - int threads, nodes, incoming_refs, outgoing_refs, buffers, - active_transactions, page_count; + int threads, nodes, incoming_refs, outgoing_refs, active_transactions; - BUG_ON(proc->alloc.vma); BUG_ON(proc->files); hlist_del(&proc->proc_node); @@ -3685,49 +3859,15 @@ static void binder_deferred_release(struct binder_proc *proc) binder_release_work(&proc->todo); binder_release_work(&proc->delivered_death); - buffers = 0; - while ((n = rb_first(&proc->alloc.allocated_buffers))) { - struct binder_buffer *buffer; - - buffer = rb_entry(n, struct binder_buffer, rb_node); - - /* Transaction should already have been freed */ - BUG_ON(buffer->transaction); - - binder_free_buf(proc, buffer); - buffers++; - } - + binder_alloc_deferred_release(&proc->alloc); binder_stats_deleted(BINDER_STAT_PROC); - page_count = 0; - if (proc->alloc.pages) { - int i; - - for (i = 0; i < proc->alloc.buffer_size / PAGE_SIZE; i++) { - void *page_addr; - - if (!proc->alloc.pages[i]) - continue; - - page_addr = proc->alloc.buffer + i * PAGE_SIZE; - binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%s: %d: page %d at %p not freed\n", - __func__, proc->pid, i, page_addr); - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); - __free_page(proc->alloc.pages[i]); - page_count++; - } - kfree(proc->alloc.pages); - vfree(proc->alloc.buffer); - } - put_task_struct(proc->tsk); binder_debug(BINDER_DEBUG_OPEN_CLOSE, - "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d, buffers %d, pages %d\n", + "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", __func__, proc->pid, threads, nodes, incoming_refs, - outgoing_refs, active_transactions, buffers, page_count); + outgoing_refs, active_transactions); kfree(proc); } @@ -3810,15 +3950,6 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, t->buffer->data); } -static void print_binder_buffer(struct seq_file *m, const char *prefix, - struct binder_buffer *buffer) -{ - seq_printf(m, "%s %d: %p size %zd:%zd %s\n", - prefix, buffer->debug_id, buffer->data, - buffer->data_size, buffer->offsets_size, - buffer->transaction ? "active" : "delivered"); -} - static void print_binder_work(struct seq_file *m, const char *prefix, const char *transaction_prefix, struct binder_work *w) @@ -3921,6 +4052,27 @@ static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) ref->node->debug_id, ref->strong, ref->weak, ref->death); } +static void print_binder_buffer(struct seq_file *m, const char *prefix, + struct binder_buffer *buffer) +{ + seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", + prefix, buffer->debug_id, buffer->data, + buffer->data_size, buffer->offsets_size, + buffer->transaction ? "active" : "delivered"); +} + +void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc) +{ + struct rb_node *n; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + print_binder_buffer(m, " buffer", + rb_entry(n, struct binder_buffer, rb_node)); + mutex_unlock(&alloc->mutex); +} + static void print_binder_proc(struct seq_file *m, struct binder_proc *proc, int print_all) { @@ -3949,10 +4101,7 @@ static void print_binder_proc(struct seq_file *m, print_binder_ref(m, rb_entry(n, struct binder_ref, rb_node_desc)); } - for (n = rb_first(&proc->alloc.allocated_buffers); - n != NULL; n = rb_next(n)) - print_binder_buffer(m, " buffer", - rb_entry(n, struct binder_buffer, rb_node)); + binder_alloc_print_allocated(m, &proc->alloc); list_for_each_entry(w, &proc->todo, entry) print_binder_work(m, " ", " pending transaction", w); list_for_each_entry(w, &proc->delivered_death, entry) { @@ -4050,6 +4199,18 @@ static void print_binder_stats(struct seq_file *m, const char *prefix, } } +int binder_alloc_get_allocated_count(struct binder_alloc *alloc) +{ + struct rb_node *n; + int count = 0; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + count++; + mutex_unlock(&alloc->mutex); + return count; +} + static void print_binder_proc_stats(struct seq_file *m, struct binder_proc *proc) { @@ -4067,7 +4228,8 @@ static void print_binder_proc_stats(struct seq_file *m, " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, - proc->ready_threads, proc->alloc.free_async_space); + proc->ready_threads, + binder_alloc_get_free_async_space(&proc->alloc)); count = 0; for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; @@ -4084,10 +4246,7 @@ static void print_binder_proc_stats(struct seq_file *m, } seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); - count = 0; - for (n = rb_first(&proc->alloc.allocated_buffers); - n != NULL; n = rb_next(n)) - count++; + count = binder_alloc_get_allocated_count(&proc->alloc); seq_printf(m, " buffers: %d\n", count); count = 0; diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index c835f09656c1..50b0d21f42cf 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -23,6 +23,7 @@ struct binder_buffer; struct binder_node; struct binder_proc; +struct binder_alloc; struct binder_ref; struct binder_thread; struct binder_transaction; @@ -268,9 +269,9 @@ DEFINE_EVENT(binder_buffer_class, binder_transaction_failed_buffer_release, TP_ARGS(buffer)); TRACE_EVENT(binder_update_page_range, - TP_PROTO(struct binder_proc *proc, bool allocate, + TP_PROTO(struct binder_alloc *alloc, bool allocate, void *start, void *end), - TP_ARGS(proc, allocate, start, end), + TP_ARGS(alloc, allocate, start, end), TP_STRUCT__entry( __field(int, proc) __field(bool, allocate) @@ -278,9 +279,9 @@ TRACE_EVENT(binder_update_page_range, __field(size_t, size) ), TP_fast_assign( - __entry->proc = proc->pid; + __entry->proc = alloc->pid; __entry->allocate = allocate; - __entry->offset = start - proc->alloc.buffer; + __entry->offset = start - alloc->buffer; __entry->size = end - start; ), TP_printk("proc=%d allocate=%d offset=%zu size=%zu", From 2324f70c5aab8b8e5f4c100a80d1dd4999991285 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 10 Oct 2016 10:40:53 -0700 Subject: [PATCH 009/176] FROMLIST: binder: move binder_alloc to separate file (from https://patchwork.kernel.org/patch/9817753/) Move the binder allocator functionality to its own file Continuation of splitting the binder allocator from the binder driver. Split binder_alloc functions from normal binder functions. Add kernel doc comments to functions declared extern in binder_alloc.h Change-Id: I8f1a967375359078b8e63c7b6b88a752c374a64a Signed-off-by: Todd Kjos --- drivers/android/Makefile | 2 +- drivers/android/binder.c | 763 +-------------------------------- drivers/android/binder_alloc.c | 759 ++++++++++++++++++++++++++++++++ drivers/android/binder_alloc.h | 162 +++++++ 4 files changed, 923 insertions(+), 763 deletions(-) create mode 100644 drivers/android/binder_alloc.c create mode 100644 drivers/android/binder_alloc.h diff --git a/drivers/android/Makefile b/drivers/android/Makefile index 3b7e4b072c58..4b7c726bb560 100644 --- a/drivers/android/Makefile +++ b/drivers/android/Makefile @@ -1,3 +1,3 @@ ccflags-y += -I$(src) # needed for trace events -obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o +obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e73481a8ea2c..3251b7cdd717 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -34,8 +33,6 @@ #include #include #include -#include -#include #include #include @@ -44,11 +41,11 @@ #endif #include +#include "binder_alloc.h" #include "binder_trace.h" static DEFINE_MUTEX(binder_main_lock); static DEFINE_MUTEX(binder_deferred_lock); -static DEFINE_MUTEX(binder_alloc_mmap_lock); static HLIST_HEAD(binder_devices); static HLIST_HEAD(binder_procs); @@ -157,27 +154,6 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, #define to_binder_fd_array_object(hdr) \ container_of(hdr, struct binder_fd_array_object, hdr) -/* - * debug declarations for binder_alloc. To be - * moved to binder_alloc.c - */ -enum { - BINDER_ALLOC_DEBUG_OPEN_CLOSE = 1U << 1, - BINDER_ALLOC_DEBUG_BUFFER_ALLOC = 1U << 2, - BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, -}; -static uint32_t binder_alloc_debug_mask; - -module_param_named(alloc_debug_mask, binder_alloc_debug_mask, - uint, S_IWUSR | S_IRUGO); - -#define binder_alloc_debug(mask, x...) \ - do { \ - if (binder_alloc_debug_mask & mask) \ - pr_info(x); \ - } while (0) -/* end of binder_alloc debug declarations */ - enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -314,68 +290,12 @@ struct binder_ref { struct binder_ref_death *death; }; -struct binder_buffer { - struct list_head entry; /* free and allocated entries by address */ - struct rb_node rb_node; /* free entry by size or allocated entry */ - /* by address */ - unsigned free:1; - unsigned allow_user_free:1; - unsigned async_transaction:1; - unsigned debug_id:29; - - struct binder_transaction *transaction; - - struct binder_node *target_node; - size_t data_size; - size_t offsets_size; - size_t extra_buffers_size; - uint8_t data[0]; -}; - enum binder_deferred_state { BINDER_DEFERRED_PUT_FILES = 0x01, BINDER_DEFERRED_FLUSH = 0x02, BINDER_DEFERRED_RELEASE = 0x04, }; -/** - * struct binder_alloc - per-binder proc state for binder allocator - * @vma: vm_area_struct passed to mmap_handler - * (invarient after mmap) - * @vma_vm_mm: copy of vma->vm_mm (invarient after mmap) - * @buffer: base of per-proc address space mapped via mmap - * @user_buffer_offset: offset between user and kernel VAs for buffer - * @buffers: list of all buffers for this proc - * @free_buffers: rb tree of buffers available for allocation - * sorted by size - * @allocated_buffers: rb tree of allocated buffers sorted by address - * @free_async_space: VA space available for async buffers. This is - * initialized at mmap time to 1/2 the full VA space - * @pages: array of physical page addresses for each page of - * mmap'd space - * @buffer_size: size of address space (could be less than requested) - * - * Bookkeeping structure for per-proc address space management for binder - * buffers. It is normally initialized during binder_init() and binder_mmap() - * calls. The address space is used for both user-visible buffers and for - * struct binder_buffer objects used to track the user buffers - */ -struct binder_alloc { - struct mutex mutex; - struct task_struct *tsk; - struct vm_area_struct *vma; - struct mm_struct *vma_vm_mm; - void *buffer; - ptrdiff_t user_buffer_offset; - struct list_head buffers; - struct rb_root free_buffers; - struct rb_root allocated_buffers; - size_t free_async_space; - struct page **pages; - size_t buffer_size; - int pid; -}; - struct binder_proc { struct hlist_node proc_node; struct rb_root threads; @@ -445,56 +365,6 @@ struct binder_transaction { kuid_t sender_euid; }; -/* - * Forward declarations of binder_alloc functions. - * These will be moved to binder_alloc.h when - * binder_alloc is moved to its own files. - */ -extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, - size_t data_size, - size_t offsets_size, - size_t extra_buffers_size, - int is_async); -extern void binder_alloc_init(struct binder_alloc *alloc); -extern void binder_alloc_vma_close(struct binder_alloc *alloc); -extern struct binder_buffer * -binder_alloc_buffer_lookup(struct binder_alloc *alloc, - uintptr_t user_ptr); -extern void binder_alloc_free_buf(struct binder_alloc *alloc, - struct binder_buffer *buffer); -extern int binder_alloc_mmap_handler(struct binder_alloc *alloc, - struct vm_area_struct *vma); -extern void binder_alloc_deferred_release(struct binder_alloc *alloc); -extern int binder_alloc_get_allocated_count(struct binder_alloc *alloc); -extern void binder_alloc_print_allocated(struct seq_file *m, - struct binder_alloc *alloc); - -static inline size_t -binder_alloc_get_free_async_space(struct binder_alloc *alloc) -{ - size_t free_async_space; - - mutex_lock(&alloc->mutex); - free_async_space = alloc->free_async_space; - mutex_unlock(&alloc->mutex); - return free_async_space; -} - -static inline ptrdiff_t -binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc) -{ - /* - * user_buffer_offset is constant if vma is set and - * undefined if vma is not set. It is possible to - * get here with !alloc->vma if the target process - * is dying while a transaction is being initiated. - * Returning the old value is ok in this case and - * the transaction will fail. - */ - return alloc->user_buffer_offset; -} -/* end of binder_alloc declarations */ - static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); @@ -578,462 +448,6 @@ static void binder_set_nice(long nice) binder_user_error("%d RLIMIT_NICE not set\n", current->pid); } -static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, - struct binder_buffer *buffer) -{ - if (list_is_last(&buffer->entry, &alloc->buffers)) - return alloc->buffer + - alloc->buffer_size - (void *)buffer->data; - return (size_t)list_entry(buffer->entry.next, - struct binder_buffer, entry) - (size_t)buffer->data; -} - -static void binder_insert_free_buffer(struct binder_alloc *alloc, - struct binder_buffer *new_buffer) -{ - struct rb_node **p = &alloc->free_buffers.rb_node; - struct rb_node *parent = NULL; - struct binder_buffer *buffer; - size_t buffer_size; - size_t new_buffer_size; - - BUG_ON(!new_buffer->free); - - new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); - - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: add free buffer, size %zd, at %pK\n", - alloc->pid, new_buffer_size, new_buffer); - - while (*p) { - parent = *p; - buffer = rb_entry(parent, struct binder_buffer, rb_node); - BUG_ON(!buffer->free); - - buffer_size = binder_alloc_buffer_size(alloc, buffer); - - if (new_buffer_size < buffer_size) - p = &parent->rb_left; - else - p = &parent->rb_right; - } - rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); -} - -static void binder_insert_allocated_buffer(struct binder_alloc *alloc, - struct binder_buffer *new_buffer) -{ - struct rb_node **p = &alloc->allocated_buffers.rb_node; - struct rb_node *parent = NULL; - struct binder_buffer *buffer; - - BUG_ON(new_buffer->free); - - while (*p) { - parent = *p; - buffer = rb_entry(parent, struct binder_buffer, rb_node); - BUG_ON(buffer->free); - - if (new_buffer < buffer) - p = &parent->rb_left; - else if (new_buffer > buffer) - p = &parent->rb_right; - else - BUG(); - } - rb_link_node(&new_buffer->rb_node, parent, p); - rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); -} - -static struct binder_buffer *binder_alloc_buffer_lookup_locked( - struct binder_alloc *alloc, - uintptr_t user_ptr) -{ - struct rb_node *n = alloc->allocated_buffers.rb_node; - struct binder_buffer *buffer; - struct binder_buffer *kern_ptr; - - kern_ptr = (struct binder_buffer *)(user_ptr - alloc->user_buffer_offset - - offsetof(struct binder_buffer, data)); - - while (n) { - buffer = rb_entry(n, struct binder_buffer, rb_node); - BUG_ON(buffer->free); - - if (kern_ptr < buffer) - n = n->rb_left; - else if (kern_ptr > buffer) - n = n->rb_right; - else - return buffer; - } - return NULL; -} - -struct binder_buffer *binder_alloc_buffer_lookup(struct binder_alloc *alloc, - uintptr_t user_ptr) -{ - struct binder_buffer *buffer; - - mutex_lock(&alloc->mutex); - buffer = binder_alloc_buffer_lookup_locked(alloc, user_ptr); - mutex_unlock(&alloc->mutex); - return buffer; -} - -static int binder_update_page_range(struct binder_alloc *alloc, int allocate, - void *start, void *end, - struct vm_area_struct *vma) -{ - void *page_addr; - unsigned long user_page_addr; - struct page **page; - struct mm_struct *mm; - - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: %s pages %pK-%pK\n", alloc->pid, - allocate ? "allocate" : "free", start, end); - - if (end <= start) - return 0; - - trace_binder_update_page_range(alloc, allocate, start, end); - - if (vma) - mm = NULL; - else - mm = get_task_mm(alloc->tsk); - - if (mm) { - down_write(&mm->mmap_sem); - vma = alloc->vma; - if (vma && mm != alloc->vma_vm_mm) { - pr_err("%d: vma mm and task mm mismatch\n", - alloc->pid); - vma = NULL; - } - } - - if (allocate == 0) - goto free_range; - - if (vma == NULL) { - pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", - alloc->pid); - goto err_no_vma; - } - - for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { - int ret; - - page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; - - BUG_ON(*page); - *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); - if (*page == NULL) { - pr_err("%d: binder_alloc_buf failed for page at %pK\n", - alloc->pid, page_addr); - goto err_alloc_page_failed; - } - ret = map_kernel_range_noflush((unsigned long)page_addr, - PAGE_SIZE, PAGE_KERNEL, page); - flush_cache_vmap((unsigned long)page_addr, - (unsigned long)page_addr + PAGE_SIZE); - if (ret != 1) { - pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", - alloc->pid, page_addr); - goto err_map_kernel_failed; - } - user_page_addr = - (uintptr_t)page_addr + alloc->user_buffer_offset; - ret = vm_insert_page(vma, user_page_addr, page[0]); - if (ret) { - pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", - alloc->pid, user_page_addr); - goto err_vm_insert_page_failed; - } - /* vm_insert_page does not seem to increment the refcount */ - } - if (mm) { - up_write(&mm->mmap_sem); - mmput(mm); - } - return 0; - -free_range: - for (page_addr = end - PAGE_SIZE; page_addr >= start; - page_addr -= PAGE_SIZE) { - page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; - if (vma) - zap_page_range(vma, (uintptr_t)page_addr + - alloc->user_buffer_offset, PAGE_SIZE, NULL); -err_vm_insert_page_failed: - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); -err_map_kernel_failed: - __free_page(*page); - *page = NULL; -err_alloc_page_failed: - ; - } -err_no_vma: - if (mm) { - up_write(&mm->mmap_sem); - mmput(mm); - } - return -ENOMEM; -} - -static struct binder_buffer *binder_alloc_new_buf_locked( - struct binder_alloc *alloc, size_t data_size, - size_t offsets_size, size_t extra_buffers_size, int is_async) -{ - struct rb_node *n = alloc->free_buffers.rb_node; - struct binder_buffer *buffer; - size_t buffer_size; - struct rb_node *best_fit = NULL; - void *has_page_addr; - void *end_page_addr; - size_t size, data_offsets_size; - - if (alloc->vma == NULL) { - pr_err("%d: binder_alloc_buf, no vma\n", - alloc->pid); - return NULL; - } - - data_offsets_size = ALIGN(data_size, sizeof(void *)) + - ALIGN(offsets_size, sizeof(void *)); - - if (data_offsets_size < data_size || data_offsets_size < offsets_size) { - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: got transaction with invalid size %zd-%zd\n", - alloc->pid, data_size, offsets_size); - return NULL; - } - size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); - if (size < data_offsets_size || size < extra_buffers_size) { - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: got transaction with invalid extra_buffers_size %zd\n", - alloc->pid, extra_buffers_size); - return NULL; - } - if (is_async && - alloc->free_async_space < size + sizeof(struct binder_buffer)) { - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd failed, no async space left\n", - alloc->pid, size); - return NULL; - } - - while (n) { - buffer = rb_entry(n, struct binder_buffer, rb_node); - BUG_ON(!buffer->free); - buffer_size = binder_alloc_buffer_size(alloc, buffer); - - if (size < buffer_size) { - best_fit = n; - n = n->rb_left; - } else if (size > buffer_size) - n = n->rb_right; - else { - best_fit = n; - break; - } - } - if (best_fit == NULL) { - pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", - alloc->pid, size); - return NULL; - } - if (n == NULL) { - buffer = rb_entry(best_fit, struct binder_buffer, rb_node); - buffer_size = binder_alloc_buffer_size(alloc, buffer); - } - - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", - alloc->pid, size, buffer, buffer_size); - - has_page_addr = - (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK); - if (n == NULL) { - if (size + sizeof(struct binder_buffer) + 4 >= buffer_size) - buffer_size = size; /* no room for other buffers */ - else - buffer_size = size + sizeof(struct binder_buffer); - } - end_page_addr = - (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); - if (end_page_addr > has_page_addr) - end_page_addr = has_page_addr; - if (binder_update_page_range(alloc, 1, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) - return NULL; - - rb_erase(best_fit, &alloc->free_buffers); - buffer->free = 0; - binder_insert_allocated_buffer(alloc, buffer); - if (buffer_size != size) { - struct binder_buffer *new_buffer = (void *)buffer->data + size; - - list_add(&new_buffer->entry, &buffer->entry); - new_buffer->free = 1; - binder_insert_free_buffer(alloc, new_buffer); - } - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got %pK\n", - alloc->pid, size, buffer); - buffer->data_size = data_size; - buffer->offsets_size = offsets_size; - buffer->async_transaction = is_async; - buffer->extra_buffers_size = extra_buffers_size; - if (is_async) { - alloc->free_async_space -= size + sizeof(struct binder_buffer); - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC, - "%d: binder_alloc_buf size %zd async free %zd\n", - alloc->pid, size, alloc->free_async_space); - } - - return buffer; -} - -struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, - size_t data_size, - size_t offsets_size, - size_t extra_buffers_size, - int is_async) -{ - struct binder_buffer *buffer; - - mutex_lock(&alloc->mutex); - buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size, - extra_buffers_size, is_async); - mutex_unlock(&alloc->mutex); - return buffer; -} - -static void *buffer_start_page(struct binder_buffer *buffer) -{ - return (void *)((uintptr_t)buffer & PAGE_MASK); -} - -static void *buffer_end_page(struct binder_buffer *buffer) -{ - return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK); -} - -static void binder_delete_free_buffer(struct binder_alloc *alloc, - struct binder_buffer *buffer) -{ - struct binder_buffer *prev, *next = NULL; - int free_page_end = 1; - int free_page_start = 1; - - BUG_ON(alloc->buffers.next == &buffer->entry); - prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); - BUG_ON(!prev->free); - if (buffer_end_page(prev) == buffer_start_page(buffer)) { - free_page_start = 0; - if (buffer_end_page(prev) == buffer_end_page(buffer)) - free_page_end = 0; - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %pK share page with %pK\n", - alloc->pid, buffer, prev); - } - - if (!list_is_last(&buffer->entry, &alloc->buffers)) { - next = list_entry(buffer->entry.next, - struct binder_buffer, entry); - if (buffer_start_page(next) == buffer_end_page(buffer)) { - free_page_end = 0; - if (buffer_start_page(next) == - buffer_start_page(buffer)) - free_page_start = 0; - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %pK share page with %pK\n", - alloc->pid, buffer, prev); - } - } - list_del(&buffer->entry); - if (free_page_start || free_page_end) { - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", - alloc->pid, buffer, free_page_start ? "" : " end", - free_page_end ? "" : " start", prev, next); - binder_update_page_range(alloc, 0, free_page_start ? - buffer_start_page(buffer) : buffer_end_page(buffer), - (free_page_end ? buffer_end_page(buffer) : - buffer_start_page(buffer)) + PAGE_SIZE, NULL); - } -} - -static void binder_free_buf_locked(struct binder_alloc *alloc, - struct binder_buffer *buffer) -{ - size_t size, buffer_size; - - buffer_size = binder_alloc_buffer_size(alloc, buffer); - - size = ALIGN(buffer->data_size, sizeof(void *)) + - ALIGN(buffer->offsets_size, sizeof(void *)) + - ALIGN(buffer->extra_buffers_size, sizeof(void *)); - - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%d: binder_free_buf %pK size %zd buffer_size %zd\n", - alloc->pid, buffer, size, buffer_size); - - BUG_ON(buffer->free); - BUG_ON(size > buffer_size); - BUG_ON(buffer->transaction != NULL); - BUG_ON((void *)buffer < alloc->buffer); - BUG_ON((void *)buffer > alloc->buffer + alloc->buffer_size); - - if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); - - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC_ASYNC, - "%d: binder_free_buf size %zd async free %zd\n", - alloc->pid, size, alloc->free_async_space); - } - - binder_update_page_range(alloc, 0, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), - (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), - NULL); - - rb_erase(&buffer->rb_node, &alloc->allocated_buffers); - buffer->free = 1; - if (!list_is_last(&buffer->entry, &alloc->buffers)) { - struct binder_buffer *next = list_entry(buffer->entry.next, - struct binder_buffer, entry); - - if (next->free) { - rb_erase(&next->rb_node, &alloc->free_buffers); - binder_delete_free_buffer(alloc, next); - } - } - if (alloc->buffers.next != &buffer->entry) { - struct binder_buffer *prev = list_entry(buffer->entry.prev, - struct binder_buffer, entry); - - if (prev->free) { - binder_delete_free_buffer(alloc, buffer); - rb_erase(&prev->rb_node, &alloc->free_buffers); - buffer = prev; - } - } - binder_insert_free_buffer(alloc, buffer); -} - -void binder_alloc_free_buf(struct binder_alloc *alloc, - struct binder_buffer *buffer) -{ - mutex_lock(&alloc->mutex); - binder_free_buf_locked(alloc, buffer); - mutex_unlock(&alloc->mutex); -} - static struct binder_node *binder_get_node(struct binder_proc *proc, binder_uintptr_t ptr) { @@ -3468,12 +2882,6 @@ static void binder_vma_open(struct vm_area_struct *vma) (unsigned long)pgprot_val(vma->vm_page_prot)); } -void binder_alloc_vma_close(struct binder_alloc *alloc) -{ - WRITE_ONCE(alloc->vma, NULL); - WRITE_ONCE(alloc->vma_vm_mm, NULL); -} - static void binder_vma_close(struct vm_area_struct *vma) { struct binder_proc *proc = vma->vm_private_data; @@ -3498,86 +2906,6 @@ static const struct vm_operations_struct binder_vm_ops = { .fault = binder_vm_fault, }; -int binder_alloc_mmap_handler(struct binder_alloc *alloc, - struct vm_area_struct *vma) -{ - int ret; - struct vm_struct *area; - const char *failure_string; - struct binder_buffer *buffer; - - mutex_lock(&binder_alloc_mmap_lock); - if (alloc->buffer) { - ret = -EBUSY; - failure_string = "already mapped"; - goto err_already_mapped; - } - - area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); - if (area == NULL) { - ret = -ENOMEM; - failure_string = "get_vm_area"; - goto err_get_vm_area_failed; - } - alloc->buffer = area->addr; - alloc->user_buffer_offset = - vma->vm_start - (uintptr_t)alloc->buffer; - mutex_unlock(&binder_alloc_mmap_lock); - -#ifdef CONFIG_CPU_CACHE_VIPT - if (cache_is_vipt_aliasing()) { - while (CACHE_COLOUR( - (vma->vm_start ^ (uint32_t)alloc->buffer))) { - pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", - alloc->pid, vma->vm_start, vma->vm_end, - alloc->buffer); - vma->vm_start += PAGE_SIZE; - } - } -#endif - alloc->pages = kzalloc(sizeof(alloc->pages[0]) * - ((vma->vm_end - vma->vm_start) / PAGE_SIZE), - GFP_KERNEL); - if (alloc->pages == NULL) { - ret = -ENOMEM; - failure_string = "alloc page array"; - goto err_alloc_pages_failed; - } - alloc->buffer_size = vma->vm_end - vma->vm_start; - - if (binder_update_page_range(alloc, 1, alloc->buffer, - alloc->buffer + PAGE_SIZE, vma)) { - ret = -ENOMEM; - failure_string = "alloc small buf"; - goto err_alloc_small_buf_failed; - } - buffer = alloc->buffer; - INIT_LIST_HEAD(&alloc->buffers); - list_add(&buffer->entry, &alloc->buffers); - buffer->free = 1; - binder_insert_free_buffer(alloc, buffer); - alloc->free_async_space = alloc->buffer_size / 2; - barrier(); - alloc->vma = vma; - alloc->vma_vm_mm = vma->vm_mm; - - return 0; - -err_alloc_small_buf_failed: - kfree(alloc->pages); - alloc->pages = NULL; -err_alloc_pages_failed: - mutex_lock(&binder_alloc_mmap_lock); - vfree(alloc->buffer); - alloc->buffer = NULL; -err_get_vm_area_failed: -err_already_mapped: - mutex_unlock(&binder_alloc_mmap_lock); - pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, - alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret); - return ret; -} - static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; @@ -3617,13 +2945,6 @@ err_bad_arg: return ret; } -void binder_alloc_init(struct binder_alloc *alloc) -{ - alloc->tsk = current->group_leader; - alloc->pid = current->group_leader->pid; - mutex_init(&alloc->mutex); -} - static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; @@ -3759,55 +3080,6 @@ static int binder_node_release(struct binder_node *node, int refs) return refs; } -void binder_alloc_deferred_release(struct binder_alloc *alloc) -{ - struct rb_node *n; - int buffers, page_count; - - BUG_ON(alloc->vma); - - buffers = 0; - mutex_lock(&alloc->mutex); - while ((n = rb_first(&alloc->allocated_buffers))) { - struct binder_buffer *buffer; - - buffer = rb_entry(n, struct binder_buffer, rb_node); - - /* Transaction should already have been freed */ - BUG_ON(buffer->transaction); - - binder_free_buf_locked(alloc, buffer); - buffers++; - } - - page_count = 0; - if (alloc->pages) { - int i; - - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - void *page_addr; - - if (!alloc->pages[i]) - continue; - - page_addr = alloc->buffer + i * PAGE_SIZE; - binder_alloc_debug(BINDER_ALLOC_DEBUG_BUFFER_ALLOC, - "%s: %d: page %d at %pK not freed\n", - __func__, alloc->pid, i, page_addr); - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); - __free_page(alloc->pages[i]); - page_count++; - } - kfree(alloc->pages); - vfree(alloc->buffer); - } - mutex_unlock(&alloc->mutex); - - binder_alloc_debug(BINDER_ALLOC_DEBUG_OPEN_CLOSE, - "%s: %d buffers %d, pages %d\n", - __func__, alloc->pid, buffers, page_count); -} - static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; @@ -4052,27 +3324,6 @@ static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) ref->node->debug_id, ref->strong, ref->weak, ref->death); } -static void print_binder_buffer(struct seq_file *m, const char *prefix, - struct binder_buffer *buffer) -{ - seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", - prefix, buffer->debug_id, buffer->data, - buffer->data_size, buffer->offsets_size, - buffer->transaction ? "active" : "delivered"); -} - -void binder_alloc_print_allocated(struct seq_file *m, - struct binder_alloc *alloc) -{ - struct rb_node *n; - - mutex_lock(&alloc->mutex); - for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) - print_binder_buffer(m, " buffer", - rb_entry(n, struct binder_buffer, rb_node)); - mutex_unlock(&alloc->mutex); -} - static void print_binder_proc(struct seq_file *m, struct binder_proc *proc, int print_all) { @@ -4199,18 +3450,6 @@ static void print_binder_stats(struct seq_file *m, const char *prefix, } } -int binder_alloc_get_allocated_count(struct binder_alloc *alloc) -{ - struct rb_node *n; - int count = 0; - - mutex_lock(&alloc->mutex); - for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) - count++; - mutex_unlock(&alloc->mutex); - return count; -} - static void print_binder_proc_stats(struct seq_file *m, struct binder_proc *proc) { diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c new file mode 100644 index 000000000000..eaef8f8fb859 --- /dev/null +++ b/drivers/android/binder_alloc.c @@ -0,0 +1,759 @@ +/* binder_alloc.c + * + * Android IPC Subsystem + * + * Copyright (C) 2007-2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "binder_alloc.h" +#include "binder_trace.h" + +static DEFINE_MUTEX(binder_alloc_mmap_lock); + +enum { + BINDER_DEBUG_OPEN_CLOSE = 1U << 1, + BINDER_DEBUG_BUFFER_ALLOC = 1U << 2, + BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, +}; +static uint32_t binder_alloc_debug_mask; + +module_param_named(debug_mask, binder_alloc_debug_mask, + uint, S_IWUSR | S_IRUGO); + +#define binder_alloc_debug(mask, x...) \ + do { \ + if (binder_alloc_debug_mask & mask) \ + pr_info(x); \ + } while (0) + +static size_t binder_alloc_buffer_size(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + if (list_is_last(&buffer->entry, &alloc->buffers)) + return alloc->buffer + + alloc->buffer_size - (void *)buffer->data; + return (size_t)list_entry(buffer->entry.next, + struct binder_buffer, entry) - (size_t)buffer->data; +} + +static void binder_insert_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->free_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + size_t buffer_size; + size_t new_buffer_size; + + BUG_ON(!new_buffer->free); + + new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: add free buffer, size %zd, at %pK\n", + alloc->pid, new_buffer_size, new_buffer); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (new_buffer_size < buffer_size) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); +} + +static void binder_insert_allocated_buffer_locked( + struct binder_alloc *alloc, struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->allocated_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + + BUG_ON(new_buffer->free); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (new_buffer < buffer) + p = &parent->rb_left; + else if (new_buffer > buffer) + p = &parent->rb_right; + else + BUG(); + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); +} + +static struct binder_buffer *binder_alloc_buffer_lookup_locked( + struct binder_alloc *alloc, + uintptr_t user_ptr) +{ + struct rb_node *n = alloc->allocated_buffers.rb_node; + struct binder_buffer *buffer; + struct binder_buffer *kern_ptr; + + kern_ptr = (struct binder_buffer *)(user_ptr - alloc->user_buffer_offset + - offsetof(struct binder_buffer, data)); + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (kern_ptr < buffer) + n = n->rb_left; + else if (kern_ptr > buffer) + n = n->rb_right; + else + return buffer; + } + return NULL; +} + +/** + * binder_alloc_buffer_lookup() - get buffer given user ptr + * @alloc: binder_alloc for this proc + * @user_ptr: User pointer to buffer data + * + * Validate userspace pointer to buffer data and return buffer corresponding to + * that user pointer. Search the rb tree for buffer that matches user data + * pointer. + * + * Return: Pointer to buffer or NULL + */ +struct binder_buffer *binder_alloc_buffer_lookup(struct binder_alloc *alloc, + uintptr_t user_ptr) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_buffer_lookup_locked(alloc, user_ptr); + mutex_unlock(&alloc->mutex); + return buffer; +} + +static int binder_update_page_range(struct binder_alloc *alloc, int allocate, + void *start, void *end, + struct vm_area_struct *vma) +{ + void *page_addr; + unsigned long user_page_addr; + struct page **page; + struct mm_struct *mm; + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: %s pages %pK-%pK\n", alloc->pid, + allocate ? "allocate" : "free", start, end); + + if (end <= start) + return 0; + + trace_binder_update_page_range(alloc, allocate, start, end); + + if (vma) + mm = NULL; + else + mm = get_task_mm(alloc->tsk); + + if (mm) { + down_write(&mm->mmap_sem); + vma = alloc->vma; + if (vma && mm != alloc->vma_vm_mm) { + pr_err("%d: vma mm and task mm mismatch\n", + alloc->pid); + vma = NULL; + } + } + + if (allocate == 0) + goto free_range; + + if (vma == NULL) { + pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", + alloc->pid); + goto err_no_vma; + } + + for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { + int ret; + + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; + + BUG_ON(*page); + *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (*page == NULL) { + pr_err("%d: binder_alloc_buf failed for page at %pK\n", + alloc->pid, page_addr); + goto err_alloc_page_failed; + } + ret = map_kernel_range_noflush((unsigned long)page_addr, + PAGE_SIZE, PAGE_KERNEL, page); + flush_cache_vmap((unsigned long)page_addr, + (unsigned long)page_addr + PAGE_SIZE); + if (ret != 1) { + pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", + alloc->pid, page_addr); + goto err_map_kernel_failed; + } + user_page_addr = + (uintptr_t)page_addr + alloc->user_buffer_offset; + ret = vm_insert_page(vma, user_page_addr, page[0]); + if (ret) { + pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", + alloc->pid, user_page_addr); + goto err_vm_insert_page_failed; + } + /* vm_insert_page does not seem to increment the refcount */ + } + if (mm) { + up_write(&mm->mmap_sem); + mmput(mm); + } + return 0; + +free_range: + for (page_addr = end - PAGE_SIZE; page_addr >= start; + page_addr -= PAGE_SIZE) { + page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; + if (vma) + zap_page_range(vma, (uintptr_t)page_addr + + alloc->user_buffer_offset, PAGE_SIZE, NULL); +err_vm_insert_page_failed: + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); +err_map_kernel_failed: + __free_page(*page); + *page = NULL; +err_alloc_page_failed: + ; + } +err_no_vma: + if (mm) { + up_write(&mm->mmap_sem); + mmput(mm); + } + return -ENOMEM; +} + +struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct rb_node *n = alloc->free_buffers.rb_node; + struct binder_buffer *buffer; + size_t buffer_size; + struct rb_node *best_fit = NULL; + void *has_page_addr; + void *end_page_addr; + size_t size, data_offsets_size; + + if (alloc->vma == NULL) { + pr_err("%d: binder_alloc_buf, no vma\n", + alloc->pid); + return NULL; + } + + data_offsets_size = ALIGN(data_size, sizeof(void *)) + + ALIGN(offsets_size, sizeof(void *)); + + if (data_offsets_size < data_size || data_offsets_size < offsets_size) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid size %zd-%zd\n", + alloc->pid, data_size, offsets_size); + return NULL; + } + size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); + if (size < data_offsets_size || size < extra_buffers_size) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid extra_buffers_size %zd\n", + alloc->pid, extra_buffers_size); + return NULL; + } + if (is_async && + alloc->free_async_space < size + sizeof(struct binder_buffer)) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd failed, no async space left\n", + alloc->pid, size); + return NULL; + } + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (size < buffer_size) { + best_fit = n; + n = n->rb_left; + } else if (size > buffer_size) + n = n->rb_right; + else { + best_fit = n; + break; + } + } + if (best_fit == NULL) { + pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", + alloc->pid, size); + return NULL; + } + if (n == NULL) { + buffer = rb_entry(best_fit, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + } + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", + alloc->pid, size, buffer, buffer_size); + + has_page_addr = + (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK); + if (n == NULL) { + if (size + sizeof(struct binder_buffer) + 4 >= buffer_size) + buffer_size = size; /* no room for other buffers */ + else + buffer_size = size + sizeof(struct binder_buffer); + } + end_page_addr = + (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); + if (end_page_addr > has_page_addr) + end_page_addr = has_page_addr; + if (binder_update_page_range(alloc, 1, + (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) + return NULL; + + rb_erase(best_fit, &alloc->free_buffers); + buffer->free = 0; + binder_insert_allocated_buffer_locked(alloc, buffer); + if (buffer_size != size) { + struct binder_buffer *new_buffer = (void *)buffer->data + size; + + list_add(&new_buffer->entry, &buffer->entry); + new_buffer->free = 1; + binder_insert_free_buffer(alloc, new_buffer); + } + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got %pK\n", + alloc->pid, size, buffer); + buffer->data_size = data_size; + buffer->offsets_size = offsets_size; + buffer->async_transaction = is_async; + buffer->extra_buffers_size = extra_buffers_size; + if (is_async) { + alloc->free_async_space -= size + sizeof(struct binder_buffer); + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_alloc_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + } + return buffer; +} + +/** + * binder_alloc_new_buf() - Allocate a new binder buffer + * @alloc: binder_alloc for this proc + * @data_size: size of user data buffer + * @offsets_size: user specified buffer offset + * @extra_buffers_size: size of extra space for meta-data (eg, security context) + * @is_async: buffer for async transaction + * + * Allocate a new buffer given the requested sizes. Returns + * the kernel version of the buffer pointer. The size allocated + * is the sum of the three given sizes (each rounded up to + * pointer-sized boundary) + * + * Return: The allocated buffer or %NULL if error + */ +struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct binder_buffer *buffer; + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size, + extra_buffers_size, is_async); + mutex_unlock(&alloc->mutex); + return buffer; +} + +static void *buffer_start_page(struct binder_buffer *buffer) +{ + return (void *)((uintptr_t)buffer & PAGE_MASK); +} + +static void *buffer_end_page(struct binder_buffer *buffer) +{ + return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK); +} + +static void binder_delete_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + struct binder_buffer *prev, *next = NULL; + int free_page_end = 1; + int free_page_start = 1; + + BUG_ON(alloc->buffers.next == &buffer->entry); + prev = list_entry(buffer->entry.prev, struct binder_buffer, entry); + BUG_ON(!prev->free); + if (buffer_end_page(prev) == buffer_start_page(buffer)) { + free_page_start = 0; + if (buffer_end_page(prev) == buffer_end_page(buffer)) + free_page_end = 0; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); + } + + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + next = list_entry(buffer->entry.next, + struct binder_buffer, entry); + if (buffer_start_page(next) == buffer_end_page(buffer)) { + free_page_end = 0; + if (buffer_start_page(next) == + buffer_start_page(buffer)) + free_page_start = 0; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK share page with %pK\n", + alloc->pid, buffer, prev); + } + } + list_del(&buffer->entry); + if (free_page_start || free_page_end) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", + alloc->pid, buffer, free_page_start ? "" : " end", + free_page_end ? "" : " start", prev, next); + binder_update_page_range(alloc, 0, free_page_start ? + buffer_start_page(buffer) : buffer_end_page(buffer), + (free_page_end ? buffer_end_page(buffer) : + buffer_start_page(buffer)) + PAGE_SIZE, NULL); + } +} + +static void binder_free_buf_locked(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t size, buffer_size; + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + size = ALIGN(buffer->data_size, sizeof(void *)) + + ALIGN(buffer->offsets_size, sizeof(void *)) + + ALIGN(buffer->extra_buffers_size, sizeof(void *)); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_free_buf %pK size %zd buffer_size %zd\n", + alloc->pid, buffer, size, buffer_size); + + BUG_ON(buffer->free); + BUG_ON(size > buffer_size); + BUG_ON(buffer->transaction != NULL); + BUG_ON((void *)buffer < alloc->buffer); + BUG_ON((void *)buffer > alloc->buffer + alloc->buffer_size); + + if (buffer->async_transaction) { + alloc->free_async_space += size + sizeof(struct binder_buffer); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_free_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + } + + binder_update_page_range(alloc, 0, + (void *)PAGE_ALIGN((uintptr_t)buffer->data), + (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), + NULL); + + rb_erase(&buffer->rb_node, &alloc->allocated_buffers); + buffer->free = 1; + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + struct binder_buffer *next = list_entry(buffer->entry.next, + struct binder_buffer, entry); + + if (next->free) { + rb_erase(&next->rb_node, &alloc->free_buffers); + binder_delete_free_buffer(alloc, next); + } + } + if (alloc->buffers.next != &buffer->entry) { + struct binder_buffer *prev = list_entry(buffer->entry.prev, + struct binder_buffer, entry); + + if (prev->free) { + binder_delete_free_buffer(alloc, buffer); + rb_erase(&prev->rb_node, &alloc->free_buffers); + buffer = prev; + } + } + binder_insert_free_buffer(alloc, buffer); +} + +/** + * binder_alloc_free_buf() - free a binder buffer + * @alloc: binder_alloc for this proc + * @buffer: kernel pointer to buffer + * + * Free the buffer allocated via binder_alloc_new_buffer() + */ +void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + mutex_lock(&alloc->mutex); + binder_free_buf_locked(alloc, buffer); + mutex_unlock(&alloc->mutex); +} + +/** + * binder_alloc_mmap_handler() - map virtual address space for proc + * @alloc: alloc structure for this proc + * @vma: vma passed to mmap() + * + * Called by binder_mmap() to initialize the space specified in + * vma for allocating binder buffers + * + * Return: + * 0 = success + * -EBUSY = address space already mapped + * -ENOMEM = failed to map memory to given address space + */ +int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma) +{ + int ret; + struct vm_struct *area; + const char *failure_string; + struct binder_buffer *buffer; + + mutex_lock(&binder_alloc_mmap_lock); + if (alloc->buffer) { + ret = -EBUSY; + failure_string = "already mapped"; + goto err_already_mapped; + } + + area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); + if (area == NULL) { + ret = -ENOMEM; + failure_string = "get_vm_area"; + goto err_get_vm_area_failed; + } + alloc->buffer = area->addr; + alloc->user_buffer_offset = + vma->vm_start - (uintptr_t)alloc->buffer; + mutex_unlock(&binder_alloc_mmap_lock); + +#ifdef CONFIG_CPU_CACHE_VIPT + if (cache_is_vipt_aliasing()) { + while (CACHE_COLOUR( + (vma->vm_start ^ (uint32_t)alloc->buffer))) { + pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", + alloc->pid, vma->vm_start, vma->vm_end, + alloc->buffer); + vma->vm_start += PAGE_SIZE; + } + } +#endif + alloc->pages = kzalloc(sizeof(alloc->pages[0]) * + ((vma->vm_end - vma->vm_start) / PAGE_SIZE), + GFP_KERNEL); + if (alloc->pages == NULL) { + ret = -ENOMEM; + failure_string = "alloc page array"; + goto err_alloc_pages_failed; + } + alloc->buffer_size = vma->vm_end - vma->vm_start; + + if (binder_update_page_range(alloc, 1, alloc->buffer, + alloc->buffer + PAGE_SIZE, vma)) { + ret = -ENOMEM; + failure_string = "alloc small buf"; + goto err_alloc_small_buf_failed; + } + buffer = alloc->buffer; + INIT_LIST_HEAD(&alloc->buffers); + list_add(&buffer->entry, &alloc->buffers); + buffer->free = 1; + binder_insert_free_buffer(alloc, buffer); + alloc->free_async_space = alloc->buffer_size / 2; + barrier(); + alloc->vma = vma; + alloc->vma_vm_mm = vma->vm_mm; + + return 0; + +err_alloc_small_buf_failed: + kfree(alloc->pages); + alloc->pages = NULL; +err_alloc_pages_failed: + mutex_lock(&binder_alloc_mmap_lock); + vfree(alloc->buffer); + alloc->buffer = NULL; +err_get_vm_area_failed: +err_already_mapped: + mutex_unlock(&binder_alloc_mmap_lock); + pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, + alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + return ret; +} + + +void binder_alloc_deferred_release(struct binder_alloc *alloc) +{ + struct rb_node *n; + int buffers, page_count; + + BUG_ON(alloc->vma); + + buffers = 0; + mutex_lock(&alloc->mutex); + while ((n = rb_first(&alloc->allocated_buffers))) { + struct binder_buffer *buffer; + + buffer = rb_entry(n, struct binder_buffer, rb_node); + + /* Transaction should already have been freed */ + BUG_ON(buffer->transaction); + + binder_free_buf_locked(alloc, buffer); + buffers++; + } + + page_count = 0; + if (alloc->pages) { + int i; + + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + void *page_addr; + + if (!alloc->pages[i]) + continue; + + page_addr = alloc->buffer + i * PAGE_SIZE; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%s: %d: page %d at %pK not freed\n", + __func__, alloc->pid, i, page_addr); + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); + __free_page(alloc->pages[i]); + page_count++; + } + kfree(alloc->pages); + vfree(alloc->buffer); + } + mutex_unlock(&alloc->mutex); + + binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d buffers %d, pages %d\n", + __func__, alloc->pid, buffers, page_count); +} + +static void print_binder_buffer(struct seq_file *m, const char *prefix, + struct binder_buffer *buffer) +{ + seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", + prefix, buffer->debug_id, buffer->data, + buffer->data_size, buffer->offsets_size, + buffer->transaction ? "active" : "delivered"); +} + +/** + * binder_alloc_print_allocated() - print buffer info + * @m: seq_file for output via seq_printf() + * @alloc: binder_alloc for this proc + * + * Prints information about every buffer associated with + * the binder_alloc state to the given seq_file + */ +void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc) +{ + struct rb_node *n; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + print_binder_buffer(m, " buffer", + rb_entry(n, struct binder_buffer, rb_node)); + mutex_unlock(&alloc->mutex); +} + +/** + * binder_alloc_get_allocated_count() - return count of buffers + * @alloc: binder_alloc for this proc + * + * Return: count of allocated buffers + */ +int binder_alloc_get_allocated_count(struct binder_alloc *alloc) +{ + struct rb_node *n; + int count = 0; + + mutex_lock(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + count++; + mutex_unlock(&alloc->mutex); + return count; +} + + +/** + * binder_alloc_vma_close() - invalidate address space + * @alloc: binder_alloc for this proc + * + * Called from binder_vma_close() when releasing address space. + * Clears alloc->vma to prevent new incoming transactions from + * allocating more buffers. + */ +void binder_alloc_vma_close(struct binder_alloc *alloc) +{ + WRITE_ONCE(alloc->vma, NULL); + WRITE_ONCE(alloc->vma_vm_mm, NULL); +} + +/** + * binder_alloc_init() - called by binder_open() for per-proc initialization + * @alloc: binder_alloc for this proc + * + * Called from binder_open() to initialize binder_alloc fields for + * new binder proc + */ +void binder_alloc_init(struct binder_alloc *alloc) +{ + alloc->tsk = current->group_leader; + alloc->pid = current->group_leader->pid; + mutex_init(&alloc->mutex); +} + diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h new file mode 100644 index 000000000000..721c511431f9 --- /dev/null +++ b/drivers/android/binder_alloc.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_BINDER_ALLOC_H +#define _LINUX_BINDER_ALLOC_H + +#include +#include +#include +#include +#include +#include + +struct binder_transaction; + +/** + * struct binder_buffer - buffer used for binder transactions + * @entry: entry alloc->buffers + * @rb_node: node for allocated_buffers/free_buffers rb trees + * @free: true if buffer is free + * @allow_user_free: describe the second member of struct blah, + * @async_transaction: describe the second member of struct blah, + * @debug_id: describe the second member of struct blah, + * @transaction: describe the second member of struct blah, + * @target_node: describe the second member of struct blah, + * @data_size: describe the second member of struct blah, + * @offsets_size: describe the second member of struct blah, + * @extra_buffers_size: describe the second member of struct blah, + * @data:i describe the second member of struct blah, + * + * Bookkeeping structure for binder transaction buffers + */ +struct binder_buffer { + struct list_head entry; /* free and allocated entries by address */ + struct rb_node rb_node; /* free entry by size or allocated entry */ + /* by address */ + unsigned free:1; + unsigned allow_user_free:1; + unsigned async_transaction:1; + unsigned debug_id:29; + + struct binder_transaction *transaction; + + struct binder_node *target_node; + size_t data_size; + size_t offsets_size; + size_t extra_buffers_size; + uint8_t data[0]; +}; + +/** + * struct binder_alloc - per-binder proc state for binder allocator + * @vma: vm_area_struct passed to mmap_handler + * (invarient after mmap) + * @tsk: tid for task that called init for this proc + * (invariant after init) + * @vma_vm_mm: copy of vma->vm_mm (invarient after mmap) + * @buffer: base of per-proc address space mapped via mmap + * @user_buffer_offset: offset between user and kernel VAs for buffer + * @buffers: list of all buffers for this proc + * @free_buffers: rb tree of buffers available for allocation + * sorted by size + * @allocated_buffers: rb tree of allocated buffers sorted by address + * @free_async_space: VA space available for async buffers. This is + * initialized at mmap time to 1/2 the full VA space + * @pages: array of physical page addresses for each + * page of mmap'd space + * @buffer_size: size of address space specified via mmap + * @pid: pid for associated binder_proc (invariant after init) + * + * Bookkeeping structure for per-proc address space management for binder + * buffers. It is normally initialized during binder_init() and binder_mmap() + * calls. The address space is used for both user-visible buffers and for + * struct binder_buffer objects used to track the user buffers + */ +struct binder_alloc { + struct mutex mutex; + struct task_struct *tsk; + struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; + void *buffer; + ptrdiff_t user_buffer_offset; + struct list_head buffers; + struct rb_root free_buffers; + struct rb_root allocated_buffers; + size_t free_async_space; + struct page **pages; + size_t buffer_size; + uint32_t buffer_free; + int pid; +}; + +extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async); +extern void binder_alloc_init(struct binder_alloc *alloc); +extern void binder_alloc_vma_close(struct binder_alloc *alloc); +extern struct binder_buffer * +binder_alloc_buffer_lookup(struct binder_alloc *alloc, + uintptr_t user_ptr); +extern void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); +extern int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma); +extern void binder_alloc_deferred_release(struct binder_alloc *alloc); +extern int binder_alloc_get_allocated_count(struct binder_alloc *alloc); +extern void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc); + +/** + * binder_alloc_get_free_async_space() - get free space available for async + * @alloc: binder_alloc for this proc + * + * Return: the bytes remaining in the address-space for async transactions + */ +static inline size_t +binder_alloc_get_free_async_space(struct binder_alloc *alloc) +{ + size_t free_async_space; + + mutex_lock(&alloc->mutex); + free_async_space = alloc->free_async_space; + mutex_unlock(&alloc->mutex); + return free_async_space; +} + +/** + * binder_alloc_get_user_buffer_offset() - get offset between kernel/user addrs + * @alloc: binder_alloc for this proc + * + * Return: the offset between kernel and user-space addresses to use for + * virtual address conversion + */ +static inline ptrdiff_t +binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc) +{ + /* + * user_buffer_offset is constant if vma is set and + * undefined if vma is not set. It is possible to + * get here with !alloc->vma if the target process + * is dying while a transaction is being initiated. + * Returning the old value is ok in this case and + * the transaction will fail. + */ + return alloc->user_buffer_offset; +} + +#endif /* _LINUX_BINDER_ALLOC_H */ + From a19f3efc4f54593d9e17f3fe7afc1ea78e0ae5f0 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 24 May 2017 11:53:13 -0700 Subject: [PATCH 010/176] FROMLIST: binder: remove binder_debug_no_lock mechanism (from https://patchwork.kernel.org/patch/9817811/) With the global lock, there was a mechanism to acceess binder driver debugging information with the global lock disabled to debug deadlocks or other issues. This mechanism is rarely (if ever) used anymore and wasn't needed during the development of fine-grained locking in the binder driver. Removing it. Change-Id: Ie1cf45748cefa433607a97c2ef322e7906efe0f7 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3251b7cdd717..9fdf1ed9d7f6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -107,9 +107,6 @@ static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); -static bool binder_debug_no_lock; -module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); - static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, S_IRUGO); @@ -3508,10 +3505,8 @@ static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; - int do_lock = !binder_debug_no_lock; - if (do_lock) - binder_lock(__func__); + binder_lock(__func__); seq_puts(m, "binder state:\n"); @@ -3522,18 +3517,15 @@ static int binder_state_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); - if (do_lock) - binder_unlock(__func__); + binder_unlock(__func__); return 0; } static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - int do_lock = !binder_debug_no_lock; - if (do_lock) - binder_lock(__func__); + binder_lock(__func__); seq_puts(m, "binder stats:\n"); @@ -3541,24 +3533,20 @@ static int binder_stats_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); - if (do_lock) - binder_unlock(__func__); + binder_unlock(__func__); return 0; } static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - int do_lock = !binder_debug_no_lock; - if (do_lock) - binder_lock(__func__); + binder_lock(__func__); seq_puts(m, "binder transactions:\n"); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); - if (do_lock) - binder_unlock(__func__); + binder_unlock(__func__); return 0; } @@ -3566,10 +3554,8 @@ static int binder_proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; int pid = (unsigned long)m->private; - int do_lock = !binder_debug_no_lock; - if (do_lock) - binder_lock(__func__); + binder_lock(__func__); hlist_for_each_entry(itr, &binder_procs, proc_node) { if (itr->pid == pid) { @@ -3577,8 +3563,7 @@ static int binder_proc_show(struct seq_file *m, void *unused) print_binder_proc(m, itr, 1); } } - if (do_lock) - binder_unlock(__func__); + binder_unlock(__func__); return 0; } From 3490fdcb77030b103e6a709a63191e09c6e64ac0 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 17 Oct 2016 12:33:15 -0700 Subject: [PATCH 011/176] FROMLIST: binder: add protection for non-perf cases (from https://patchwork.kernel.org/patch/9817749/) Add binder_dead_nodes_lock, binder_procs_lock, and binder_context_mgr_node_lock to protect the associated global lists Bug: 33250092 32225111 Change-Id: I9d1158536783763e0fa93b18e19fba2c488d8cfc Signed-off-by: Todd Kjos --- drivers/android/binder.c | 81 +++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9fdf1ed9d7f6..4b9816409b54 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -45,12 +45,16 @@ #include "binder_trace.h" static DEFINE_MUTEX(binder_main_lock); + +static HLIST_HEAD(binder_deferred_list); static DEFINE_MUTEX(binder_deferred_lock); static HLIST_HEAD(binder_devices); static HLIST_HEAD(binder_procs); -static HLIST_HEAD(binder_deferred_list); +static DEFINE_MUTEX(binder_procs_lock); + static HLIST_HEAD(binder_dead_nodes); +static DEFINE_SPINLOCK(binder_dead_nodes_lock); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; @@ -219,6 +223,8 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_context { struct binder_node *binder_context_mgr_node; + struct mutex context_mgr_node_lock; + kuid_t binder_context_mgr_uid; const char *name; }; @@ -571,7 +577,9 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) "refless node %d deleted\n", node->debug_id); } else { + spin_lock(&binder_dead_nodes_lock); hlist_del(&node->dead_node); + spin_unlock(&binder_dead_nodes_lock); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "dead node %d deleted\n", node->debug_id); @@ -1455,11 +1463,14 @@ static void binder_transaction(struct binder_proc *proc, } target_node = ref->node; } else { + mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; if (target_node == NULL) { return_error = BR_DEAD_REPLY; + mutex_unlock(&context->context_mgr_node_lock); goto err_no_context_mgr_node; } + mutex_unlock(&context->context_mgr_node_lock); } e->to_node = target_node->debug_id; target_proc = target_node->proc; @@ -1825,22 +1836,31 @@ static int binder_thread_write(struct binder_proc *proc, case BC_RELEASE: case BC_DECREFS: { uint32_t target; - struct binder_ref *ref; + struct binder_ref *ref = NULL; const char *debug_string; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; + ptr += sizeof(uint32_t); - if (target == 0 && context->binder_context_mgr_node && + if (target == 0 && (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) { - ref = binder_get_ref_for_node(proc, - context->binder_context_mgr_node); - if (ref->desc != target) { - binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", - proc->pid, thread->pid, - ref->desc); + struct binder_node *ctx_mgr_node; + + mutex_lock(&context->context_mgr_node_lock); + ctx_mgr_node = context->binder_context_mgr_node; + if (ctx_mgr_node) { + ref = binder_get_ref_for_node(proc, + ctx_mgr_node); + if (ref && ref->desc != target) { + binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", + proc->pid, thread->pid, + ref->desc); + } } - } else + mutex_unlock(&context->context_mgr_node_lock); + } + if (ref == NULL) ref = binder_get_ref(proc, target, cmd == BC_ACQUIRE || cmd == BC_RELEASE); @@ -2754,9 +2774,10 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) int ret = 0; struct binder_proc *proc = filp->private_data; struct binder_context *context = proc->context; - + struct binder_node *new_node; kuid_t curr_euid = current_euid(); + mutex_lock(&context->context_mgr_node_lock); if (context->binder_context_mgr_node) { pr_err("BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; @@ -2777,16 +2798,18 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) } else { context->binder_context_mgr_uid = curr_euid; } - context->binder_context_mgr_node = binder_new_node(proc, 0, 0); - if (!context->binder_context_mgr_node) { + new_node = binder_new_node(proc, 0, 0); + if (!new_node) { ret = -ENOMEM; goto out; } - context->binder_context_mgr_node->local_weak_refs++; - context->binder_context_mgr_node->local_strong_refs++; - context->binder_context_mgr_node->has_strong_ref = 1; - context->binder_context_mgr_node->has_weak_ref = 1; + new_node->local_weak_refs++; + new_node->local_strong_refs++; + new_node->has_strong_ref = 1; + new_node->has_weak_ref = 1; + context->binder_context_mgr_node = new_node; out: + mutex_unlock(&context->context_mgr_node_lock); return ret; } @@ -2966,13 +2989,16 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_lock(__func__); binder_stats_created(BINDER_STAT_PROC); - hlist_add_head(&proc->proc_node, &binder_procs); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); filp->private_data = proc; binder_unlock(__func__); + mutex_lock(&binder_procs_lock); + hlist_add_head(&proc->proc_node, &binder_procs); + mutex_unlock(&binder_procs_lock); + if (binder_debugfs_dir_entry_proc) { char strbuf[11]; @@ -3051,7 +3077,10 @@ static int binder_node_release(struct binder_node *node, int refs) node->proc = NULL; node->local_strong_refs = 0; node->local_weak_refs = 0; + + spin_lock(&binder_dead_nodes_lock); hlist_add_head(&node->dead_node, &binder_dead_nodes); + spin_unlock(&binder_dead_nodes_lock); hlist_for_each_entry(ref, &node->refs, node_entry) { refs++; @@ -3085,8 +3114,11 @@ static void binder_deferred_release(struct binder_proc *proc) BUG_ON(proc->files); + mutex_lock(&binder_procs_lock); hlist_del(&proc->proc_node); + mutex_unlock(&binder_procs_lock); + mutex_lock(&context->context_mgr_node_lock); if (context->binder_context_mgr_node && context->binder_context_mgr_node->proc == proc) { binder_debug(BINDER_DEBUG_DEAD_BINDER, @@ -3094,6 +3126,7 @@ static void binder_deferred_release(struct binder_proc *proc) __func__, proc->pid); context->binder_context_mgr_node = NULL; } + mutex_unlock(&context->context_mgr_node_lock); threads = 0; active_transactions = 0; @@ -3510,13 +3543,17 @@ static int binder_state_show(struct seq_file *m, void *unused) seq_puts(m, "binder state:\n"); + spin_lock(&binder_dead_nodes_lock); if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); hlist_for_each_entry(node, &binder_dead_nodes, dead_node) print_binder_node(m, node); + spin_unlock(&binder_dead_nodes_lock); + mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); + mutex_unlock(&binder_procs_lock); binder_unlock(__func__); return 0; } @@ -3531,8 +3568,10 @@ static int binder_stats_show(struct seq_file *m, void *unused) print_binder_stats(m, "", &binder_stats); + mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); + mutex_unlock(&binder_procs_lock); binder_unlock(__func__); return 0; } @@ -3544,8 +3583,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused) binder_lock(__func__); seq_puts(m, "binder transactions:\n"); + mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); + mutex_unlock(&binder_procs_lock); binder_unlock(__func__); return 0; } @@ -3557,12 +3598,15 @@ static int binder_proc_show(struct seq_file *m, void *unused) binder_lock(__func__); + mutex_lock(&binder_procs_lock); hlist_for_each_entry(itr, &binder_procs, proc_node) { if (itr->pid == pid) { seq_puts(m, "binder proc state:\n"); print_binder_proc(m, itr, 1); } } + mutex_unlock(&binder_procs_lock); + binder_unlock(__func__); return 0; } @@ -3623,6 +3667,7 @@ static int __init init_binder_device(const char *name) binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; + mutex_init(&binder_device->context.context_mgr_node_lock); ret = misc_register(&binder_device->miscdev); if (ret < 0) { From f716ecfc028f7733e8549cb2cb1f2851bc471253 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Thu, 13 Oct 2016 16:36:15 -0700 Subject: [PATCH 012/176] FROMLIST: binder: change binder_stats to atomics (from https://patchwork.kernel.org/patch/9817755/) Use atomics for stats to avoid needing to lock for increments/decrements Bug: 33250092 32225111 Change-Id: I13e69b7f0485ccf16673e25091455781e1933a98 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 48 +++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4b9816409b54..0b6ff5f84322 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -167,22 +167,22 @@ enum binder_stat_types { }; struct binder_stats { - int br[_IOC_NR(BR_FAILED_REPLY) + 1]; - int bc[_IOC_NR(BC_REPLY_SG) + 1]; - int obj_created[BINDER_STAT_COUNT]; - int obj_deleted[BINDER_STAT_COUNT]; + atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1]; + atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1]; + atomic_t obj_created[BINDER_STAT_COUNT]; + atomic_t obj_deleted[BINDER_STAT_COUNT]; }; static struct binder_stats binder_stats; static inline void binder_stats_deleted(enum binder_stat_types type) { - binder_stats.obj_deleted[type]++; + atomic_inc(&binder_stats.obj_deleted[type]); } static inline void binder_stats_created(enum binder_stat_types type) { - binder_stats.obj_created[type]++; + atomic_inc(&binder_stats.obj_created[type]); } struct binder_transaction_log_entry { @@ -1826,9 +1826,9 @@ static int binder_thread_write(struct binder_proc *proc, ptr += sizeof(uint32_t); trace_binder_command(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { - binder_stats.bc[_IOC_NR(cmd)]++; - proc->stats.bc[_IOC_NR(cmd)]++; - thread->stats.bc[_IOC_NR(cmd)]++; + atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]); } switch (cmd) { case BC_INCREFS: @@ -2202,9 +2202,9 @@ static void binder_stat_br(struct binder_proc *proc, { trace_binder_return(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { - binder_stats.br[_IOC_NR(cmd)]++; - proc->stats.br[_IOC_NR(cmd)]++; - thread->stats.br[_IOC_NR(cmd)]++; + atomic_inc(&binder_stats.br[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.br[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.br[_IOC_NR(cmd)]); } } @@ -3454,17 +3454,21 @@ static void print_binder_stats(struct seq_file *m, const char *prefix, BUILD_BUG_ON(ARRAY_SIZE(stats->bc) != ARRAY_SIZE(binder_command_strings)); for (i = 0; i < ARRAY_SIZE(stats->bc); i++) { - if (stats->bc[i]) + int temp = atomic_read(&stats->bc[i]); + + if (temp) seq_printf(m, "%s%s: %d\n", prefix, - binder_command_strings[i], stats->bc[i]); + binder_command_strings[i], temp); } BUILD_BUG_ON(ARRAY_SIZE(stats->br) != ARRAY_SIZE(binder_return_strings)); for (i = 0; i < ARRAY_SIZE(stats->br); i++) { - if (stats->br[i]) + int temp = atomic_read(&stats->br[i]); + + if (temp) seq_printf(m, "%s%s: %d\n", prefix, - binder_return_strings[i], stats->br[i]); + binder_return_strings[i], temp); } BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != @@ -3472,11 +3476,15 @@ static void print_binder_stats(struct seq_file *m, const char *prefix, BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != ARRAY_SIZE(stats->obj_deleted)); for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { - if (stats->obj_created[i] || stats->obj_deleted[i]) - seq_printf(m, "%s%s: active %d total %d\n", prefix, + int created = atomic_read(&stats->obj_created[i]); + int deleted = atomic_read(&stats->obj_deleted[i]); + + if (created || deleted) + seq_printf(m, "%s%s: active %d total %d\n", + prefix, binder_objstat_strings[i], - stats->obj_created[i] - stats->obj_deleted[i], - stats->obj_created[i]); + created - deleted, + created); } } From be4dde1f05cda42d74035c50d944a6ec0ef365d7 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 25 May 2017 10:56:00 -0700 Subject: [PATCH 013/176] FROMLIST: binder: make binder_last_id an atomic (from https://patchwork.kernel.org/patch/9817809/) Change-Id: I12a505091d377ca9034861317b7e68c2e75f7256 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0b6ff5f84322..edbd729f2f43 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -58,7 +58,7 @@ static DEFINE_SPINLOCK(binder_dead_nodes_lock); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; -static int binder_last_id; +static atomic_t binder_last_id; static struct workqueue_struct *binder_deferred_workqueue; #define BINDER_DEBUG_ENTRY(name) \ @@ -496,7 +496,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, binder_stats_created(BINDER_STAT_NODE); rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); - node->debug_id = ++binder_last_id; + node->debug_id = atomic_inc_return(&binder_last_id); node->proc = proc; node->ptr = ptr; node->cookie = cookie; @@ -640,7 +640,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, if (new_ref == NULL) return NULL; binder_stats_created(BINDER_STAT_REF); - new_ref->debug_id = ++binder_last_id; + new_ref->debug_id = atomic_inc_return(&binder_last_id); new_ref->proc = proc; new_ref->node = node; rb_link_node(&new_ref->rb_node_node, parent, p); @@ -1528,7 +1528,7 @@ static void binder_transaction(struct binder_proc *proc, } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); - t->debug_id = ++binder_last_id; + t->debug_id = atomic_inc_return(&binder_last_id); e->debug_id = t->debug_id; if (reply) From 0a0fdc1fdc20beec7da852d2392f3111e3555983 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 22 Mar 2017 17:19:52 -0700 Subject: [PATCH 014/176] FROMLIST: binder: add log information for binder transaction failures (from https://patchwork.kernel.org/patch/9817751/) Add additional information to determine the cause of binder failures. Adds the following to failed transaction log and kernel messages: return_error : value returned for transaction return_error_param : errno returned by binder allocator return_error_line : line number where error detected Also, return BR_DEAD_REPLY if an allocation error indicates a dead proc (-ESRCH) Bug: 36406078 Change-Id: Ifc8881fa5adfcced3f2d67f9030fbd3efa3e2cab Test: tested manually Signed-off-by: Todd Kjos --- drivers/android/binder.c | 87 ++++++++++++++++++++++++++++++---- drivers/android/binder_alloc.c | 20 ++++---- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index edbd729f2f43..5990f7fbcbd3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -196,6 +196,9 @@ struct binder_transaction_log_entry { int to_node; int data_size; int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; const char *context_name; }; struct binder_transaction_log { @@ -1143,7 +1146,7 @@ static int binder_translate_binder(struct flat_binder_object *fp, ref = binder_get_ref_for_node(target_proc, node); if (!ref) - return -EINVAL; + return -ENOMEM; if (fp->hdr.type == BINDER_TYPE_BINDER) fp->hdr.type = BINDER_TYPE_HANDLE; @@ -1200,7 +1203,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, new_ref = binder_get_ref_for_node(target_proc, ref->node); if (!new_ref) - return -EINVAL; + return -ENOMEM; fp->binder = 0; fp->handle = new_ref->desc; @@ -1398,7 +1401,9 @@ static void binder_transaction(struct binder_proc *proc, wait_queue_head_t *target_wait; struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; - uint32_t return_error; + uint32_t return_error = 0; + uint32_t return_error_param = 0; + uint32_t return_error_line = 0; struct binder_buffer_object *last_fixup_obj = NULL; binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; @@ -1418,6 +1423,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got reply transaction with no transaction stack\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; goto err_empty_call_stack; } binder_set_nice(in_reply_to->saved_priority); @@ -1429,6 +1436,8 @@ static void binder_transaction(struct binder_proc *proc, in_reply_to->to_thread ? in_reply_to->to_thread->pid : 0); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; in_reply_to = NULL; goto err_bad_call_stack; } @@ -1436,6 +1445,7 @@ static void binder_transaction(struct binder_proc *proc, target_thread = in_reply_to->from; if (target_thread == NULL) { return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; goto err_dead_binder; } if (target_thread->transaction_stack != in_reply_to) { @@ -1445,6 +1455,8 @@ static void binder_transaction(struct binder_proc *proc, target_thread->transaction_stack->debug_id : 0, in_reply_to->debug_id); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; in_reply_to = NULL; target_thread = NULL; goto err_dead_binder; @@ -1459,6 +1471,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_invalid_target_handle; } target_node = ref->node; @@ -1468,6 +1482,7 @@ static void binder_transaction(struct binder_proc *proc, if (target_node == NULL) { return_error = BR_DEAD_REPLY; mutex_unlock(&context->context_mgr_node_lock); + return_error_line = __LINE__; goto err_no_context_mgr_node; } mutex_unlock(&context->context_mgr_node_lock); @@ -1476,11 +1491,14 @@ static void binder_transaction(struct binder_proc *proc, target_proc = target_node->proc; if (target_proc == NULL) { return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; goto err_dead_binder; } if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; + return_error_param = -EPERM; + return_error_line = __LINE__; goto err_invalid_target_handle; } if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { @@ -1494,6 +1512,8 @@ static void binder_transaction(struct binder_proc *proc, tmp->to_thread ? tmp->to_thread->pid : 0); return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; goto err_bad_call_stack; } while (tmp) { @@ -1517,6 +1537,8 @@ static void binder_transaction(struct binder_proc *proc, t = kzalloc(sizeof(*t), GFP_KERNEL); if (t == NULL) { return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; goto err_alloc_t_failed; } binder_stats_created(BINDER_STAT_TRANSACTION); @@ -1524,6 +1546,8 @@ static void binder_transaction(struct binder_proc *proc, tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); if (tcomplete == NULL) { return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; goto err_alloc_tcomplete_failed; } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); @@ -1566,8 +1590,15 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, tr->offsets_size, extra_buffers_size, !reply && (t->flags & TF_ONE_WAY)); - if (t->buffer == NULL) { - return_error = BR_FAILED_REPLY; + if (IS_ERR(t->buffer)) { + /* + * -ESRCH indicates VMA cleared. The target is dying. + */ + return_error_param = PTR_ERR(t->buffer); + return_error = return_error_param == -ESRCH ? + BR_DEAD_REPLY : BR_FAILED_REPLY; + return_error_line = __LINE__; + t->buffer = NULL; goto err_binder_alloc_buf_failed; } t->buffer->allow_user_free = 0; @@ -1587,6 +1618,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; goto err_copy_data_failed; } if (copy_from_user(offp, (const void __user *)(uintptr_t) @@ -1594,12 +1627,16 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; goto err_copy_data_failed; } if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) { binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n", proc->pid, thread->pid, (u64)tr->offsets_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { @@ -1607,6 +1644,8 @@ static void binder_transaction(struct binder_proc *proc, proc->pid, thread->pid, (u64)extra_buffers_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } off_end = (void *)off_start + tr->offsets_size; @@ -1623,6 +1662,8 @@ static void binder_transaction(struct binder_proc *proc, (u64)off_min, (u64)t->buffer->data_size); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } @@ -1637,6 +1678,8 @@ static void binder_transaction(struct binder_proc *proc, ret = binder_translate_binder(fp, t, thread); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } } break; @@ -1648,6 +1691,8 @@ static void binder_transaction(struct binder_proc *proc, ret = binder_translate_handle(fp, t, thread); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } } break; @@ -1659,6 +1704,8 @@ static void binder_transaction(struct binder_proc *proc, if (target_fd < 0) { return_error = BR_FAILED_REPLY; + return_error_param = target_fd; + return_error_line = __LINE__; goto err_translate_failed; } fp->pad_binder = 0; @@ -1675,6 +1722,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid parent offset or type\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_parent; } if (!binder_validate_fixup(t->buffer, off_start, @@ -1684,12 +1733,16 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_parent; } ret = binder_translate_fd_array(fda, parent, t, thread, in_reply_to); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj = parent; @@ -1705,6 +1758,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with too large buffer\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_offset; } if (copy_from_user(sg_bufp, @@ -1712,7 +1767,9 @@ static void binder_transaction(struct binder_proc *proc, bp->buffer, bp->length)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); + return_error_param = -EFAULT; return_error = BR_FAILED_REPLY; + return_error_line = __LINE__; goto err_copy_data_failed; } /* Fixup buffer pointer to target proc address space */ @@ -1727,6 +1784,8 @@ static void binder_transaction(struct binder_proc *proc, last_fixup_min_off); if (ret < 0) { return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj = bp; @@ -1736,6 +1795,8 @@ static void binder_transaction(struct binder_proc *proc, binder_user_error("%d:%d got transaction with invalid object type, %x\n", proc->pid, thread->pid, hdr->type); return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; goto err_bad_object_type; } } @@ -1790,13 +1851,17 @@ err_dead_binder: err_invalid_target_handle: err_no_context_mgr_node: binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "%d:%d transaction failed %d, size %lld-%lld\n", - proc->pid, thread->pid, return_error, - (u64)tr->data_size, (u64)tr->offsets_size); + "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", + proc->pid, thread->pid, return_error, return_error_param, + (u64)tr->data_size, (u64)tr->offsets_size, + return_error_line); { struct binder_transaction_log_entry *fe; + e->return_error = return_error; + e->return_error_param = return_error_param; + e->return_error_line = return_error_line; fe = binder_transaction_log_add(&binder_transaction_log_failed); *fe = *e; } @@ -3623,11 +3688,13 @@ static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { seq_printf(m, - "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d\n", + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d\n", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, e->from_thread, e->to_proc, e->to_thread, e->context_name, - e->to_node, e->target_handle, e->data_size, e->offsets_size); + e->to_node, e->target_handle, e->data_size, e->offsets_size, + e->return_error, e->return_error_param, + e->return_error_line); } static int binder_transaction_log_show(struct seq_file *m, void *unused) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index eaef8f8fb859..16bc8a3b53f5 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -262,7 +262,7 @@ err_no_vma: up_write(&mm->mmap_sem); mmput(mm); } - return -ENOMEM; + return vma ? -ENOMEM : -ESRCH; } struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, @@ -278,11 +278,12 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, void *has_page_addr; void *end_page_addr; size_t size, data_offsets_size; + int ret; if (alloc->vma == NULL) { pr_err("%d: binder_alloc_buf, no vma\n", alloc->pid); - return NULL; + return ERR_PTR(-ESRCH); } data_offsets_size = ALIGN(data_size, sizeof(void *)) + @@ -292,21 +293,21 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: got transaction with invalid size %zd-%zd\n", alloc->pid, data_size, offsets_size); - return NULL; + return ERR_PTR(-EINVAL); } size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); if (size < data_offsets_size || size < extra_buffers_size) { binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: got transaction with invalid extra_buffers_size %zd\n", alloc->pid, extra_buffers_size); - return NULL; + return ERR_PTR(-EINVAL); } if (is_async && alloc->free_async_space < size + sizeof(struct binder_buffer)) { binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd failed, no async space left\n", alloc->pid, size); - return NULL; + return ERR_PTR(-ENOSPC); } while (n) { @@ -327,7 +328,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, if (best_fit == NULL) { pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", alloc->pid, size); - return NULL; + return ERR_PTR(-ENOSPC); } if (n == NULL) { buffer = rb_entry(best_fit, struct binder_buffer, rb_node); @@ -350,9 +351,10 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, (void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size); if (end_page_addr > has_page_addr) end_page_addr = has_page_addr; - if (binder_update_page_range(alloc, 1, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL)) - return NULL; + ret = binder_update_page_range(alloc, 1, + (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL); + if (ret) + return ERR_PTR(ret); rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; From 9b9340c58afa2cf2a76ae06f3019ad631e6f7799 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 24 May 2017 10:51:01 -0700 Subject: [PATCH 015/176] FROMLIST: binder: refactor queue management in binder_thread_read (from https://patchwork.kernel.org/patch/9817757/) In binder_thread_read, the BINDER_WORK_NODE command is used to communicate the references on the node to userspace. It can take a couple of iterations in the loop to construct the list of commands for user space. When locking is added, the lock would need to be release on each iteration which means the state could change. The work item is not dequeued during this process which prevents a simpler queue management that can just dequeue up front and handle the work item. Fixed by changing the BINDER_WORK_NODE algorithm in binder_thread_read to determine which commands to send to userspace atomically in 1 pass so it stays consistent with the kernel view. The work item is now dequeued immediately since only 1 pass is needed. Change-Id: I9b4109997b2d53ba661867b14d7336cd076be06d Signed-off-by: Todd Kjos --- drivers/android/binder.c | 147 ++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 55 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5990f7fbcbd3..4e7439506924 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2286,6 +2286,37 @@ static int binder_has_thread_work(struct binder_thread *thread) (thread->looper & BINDER_LOOPER_STATE_NEED_RETURN); } +static int binder_put_node_cmd(struct binder_proc *proc, + struct binder_thread *thread, + void __user **ptrp, + binder_uintptr_t node_ptr, + binder_uintptr_t node_cookie, + int node_debug_id, + uint32_t cmd, const char *cmd_name) +{ + void __user *ptr = *ptrp; + + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + + if (put_user(node_ptr, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + if (put_user(node_cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + binder_stat_br(proc, thread, cmd); + binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n", + proc->pid, thread->pid, cmd_name, node_debug_id, + (u64)node_ptr, (u64)node_cookie); + + *ptrp = ptr; + return 0; +} + static int binder_thread_read(struct binder_proc *proc, struct binder_thread *thread, binder_uintptr_t binder_buffer, size_t size, @@ -2411,72 +2442,78 @@ retry: } break; case BINDER_WORK_NODE: { struct binder_node *node = container_of(w, struct binder_node, work); - uint32_t cmd = BR_NOOP; - const char *cmd_name; - int strong = node->internal_strong_refs || node->local_strong_refs; - int weak = !hlist_empty(&node->refs) || node->local_weak_refs || strong; + int strong, weak; + binder_uintptr_t node_ptr = node->ptr; + binder_uintptr_t node_cookie = node->cookie; + int node_debug_id = node->debug_id; + int has_weak_ref; + int has_strong_ref; + void __user *orig_ptr = ptr; - if (weak && !node->has_weak_ref) { - cmd = BR_INCREFS; - cmd_name = "BR_INCREFS"; + BUG_ON(proc != node->proc); + strong = node->internal_strong_refs || + node->local_strong_refs; + weak = !hlist_empty(&node->refs) || + node->local_weak_refs || strong; + has_strong_ref = node->has_strong_ref; + has_weak_ref = node->has_weak_ref; + + if (weak && !has_weak_ref) { node->has_weak_ref = 1; node->pending_weak_ref = 1; node->local_weak_refs++; - } else if (strong && !node->has_strong_ref) { - cmd = BR_ACQUIRE; - cmd_name = "BR_ACQUIRE"; + } + if (strong && !has_strong_ref) { node->has_strong_ref = 1; node->pending_strong_ref = 1; node->local_strong_refs++; - } else if (!strong && node->has_strong_ref) { - cmd = BR_RELEASE; - cmd_name = "BR_RELEASE"; + } + if (!strong && has_strong_ref) node->has_strong_ref = 0; - } else if (!weak && node->has_weak_ref) { - cmd = BR_DECREFS; - cmd_name = "BR_DECREFS"; + if (!weak && has_weak_ref) node->has_weak_ref = 0; - } - if (cmd != BR_NOOP) { - if (put_user(cmd, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - if (put_user(node->ptr, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); - if (put_user(node->cookie, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); + list_del(&w->entry); - binder_stat_br(proc, thread, cmd); - binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s %d u%016llx c%016llx\n", - proc->pid, thread->pid, cmd_name, - node->debug_id, - (u64)node->ptr, (u64)node->cookie); - } else { - list_del_init(&w->entry); - if (!weak && !strong) { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d:%d node %d u%016llx c%016llx deleted\n", - proc->pid, thread->pid, - node->debug_id, - (u64)node->ptr, - (u64)node->cookie); - rb_erase(&node->rb_node, &proc->nodes); - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); - } else { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d:%d node %d u%016llx c%016llx state unchanged\n", - proc->pid, thread->pid, - node->debug_id, - (u64)node->ptr, - (u64)node->cookie); - } + if (!weak && !strong) { + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx deleted\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + rb_erase(&node->rb_node, &proc->nodes); + kfree(node); + binder_stats_deleted(BINDER_STAT_NODE); } + if (weak && !has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_INCREFS, "BR_INCREFS"); + if (!ret && strong && !has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_ACQUIRE, "BR_ACQUIRE"); + if (!ret && !strong && has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_RELEASE, "BR_RELEASE"); + if (!ret && !weak && has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_DECREFS, "BR_DECREFS"); + if (orig_ptr == ptr) + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx state unchanged\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + if (ret) + return ret; } break; case BINDER_WORK_DEAD_BINDER: case BINDER_WORK_DEAD_BINDER_AND_CLEAR: From 6ea6027115718c27e01e4459901c7938492cfa8f Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 9 May 2017 08:31:32 -0700 Subject: [PATCH 016/176] FROMLIST: binder: avoid race conditions when enqueuing txn (from https://patchwork.kernel.org/patch/9817813/) Currently, the transaction complete work item is queued after the transaction. This means that it is possible for the transaction to be handled and a reply to be enqueued in the current thread before the transaction complete is enqueued, which violates the protocol with userspace who may not expect the transaction complete. Fixed by always enqueing the transaction complete first. Also, once the transaction is enqueued, it is unsafe to access since it might be freed. Currently, t->flags is accessed to determine whether a sync wake is needed. Changed to access tr->flags instead. Change-Id: I6c01566e167a39cf17c9027c3817618182e56975 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4e7439506924..5b1e45cef89e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1800,6 +1800,9 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; + list_add_tail(&tcomplete->entry, &thread->todo); + if (reply) { BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction(target_thread, in_reply_to); @@ -1819,10 +1822,8 @@ static void binder_transaction(struct binder_proc *proc, } t->work.type = BINDER_WORK_TRANSACTION; list_add_tail(&t->work.entry, target_list); - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - list_add_tail(&tcomplete->entry, &thread->todo); if (target_wait) { - if (reply || !(t->flags & TF_ONE_WAY)) + if (reply || !(tr->flags & TF_ONE_WAY)) wake_up_interruptible_sync(target_wait); else wake_up_interruptible(target_wait); From afda44d0aa52f4427a6867fe784997f307b7fe31 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 6 Jan 2017 14:19:25 -0800 Subject: [PATCH 017/176] FROMLIST: binder: don't modify thread->looper from other threads (from https://patchwork.kernel.org/patch/9817799/) The looper member of struct binder_thread is a bitmask of control bits. All of the existing bits are modified by the affected thread except for BINDER_LOOPER_STATE_NEED_RETURN which can be modified in binder_deferred_flush() by another thread. To avoid adding a spinlock around all read-mod-writes to modify a bit, the BINDER_LOOPER_STATE_NEED_RETURN flag is replaced by a separate field in struct binder_thread. Bug: 33250092 32225111 Change-Id: Ia4cefbdbd683c6cb17c323ba7d278de5f2ca0745 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5b1e45cef89e..6eb9fc064846 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -334,14 +334,14 @@ enum { BINDER_LOOPER_STATE_EXITED = 0x04, BINDER_LOOPER_STATE_INVALID = 0x08, BINDER_LOOPER_STATE_WAITING = 0x10, - BINDER_LOOPER_STATE_NEED_RETURN = 0x20 }; struct binder_thread { struct binder_proc *proc; struct rb_node rb_node; int pid; - int looper; + int looper; /* only modified by this thread */ + bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; uint32_t return_error; /* Write failed, return error code in read buf */ @@ -2277,14 +2277,13 @@ static void binder_stat_br(struct binder_proc *proc, static int binder_has_proc_work(struct binder_proc *proc, struct binder_thread *thread) { - return !list_empty(&proc->todo) || - (thread->looper & BINDER_LOOPER_STATE_NEED_RETURN); + return !list_empty(&proc->todo) || thread->looper_need_return; } static int binder_has_thread_work(struct binder_thread *thread) { return !list_empty(&thread->todo) || thread->return_error != BR_OK || - (thread->looper & BINDER_LOOPER_STATE_NEED_RETURN); + thread->looper_need_return; } static int binder_put_node_cmd(struct binder_proc *proc, @@ -2413,8 +2412,7 @@ retry: entry); } else { /* no data added */ - if (ptr - buffer == 4 && - !(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN)) + if (ptr - buffer == 4 && !thread->looper_need_return) goto retry; break; } @@ -2728,7 +2726,7 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) INIT_LIST_HEAD(&thread->todo); rb_link_node(&thread->rb_node, parent, p); rb_insert_color(&thread->rb_node, &proc->threads); - thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN; + thread->looper_need_return = true; thread->return_error = BR_OK; thread->return_error2 = BR_OK; } @@ -2984,7 +2982,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = 0; err: if (thread) - thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN; + thread->looper_need_return = false; binder_unlock(__func__); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) @@ -3139,7 +3137,7 @@ static void binder_deferred_flush(struct binder_proc *proc) for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node); - thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN; + thread->looper_need_return = true; if (thread->looper & BINDER_LOOPER_STATE_WAITING) { wake_up_interruptible(&thread->wait); wake_count++; @@ -3400,7 +3398,9 @@ static void print_binder_thread(struct seq_file *m, size_t start_pos = m->count; size_t header_pos; - seq_printf(m, " thread %d: l %02x\n", thread->pid, thread->looper); + seq_printf(m, " thread %d: l %02x need_return %d\n", + thread->pid, thread->looper, + thread->looper_need_return); header_pos = m->count; t = thread->transaction_stack; while (t) { From ce9b7747d6a1cb1b4b772b4e2c524ba1a56b551b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 1 May 2017 17:21:51 -0700 Subject: [PATCH 018/176] FROMLIST: binder: remove dead code in binder_get_ref_for_node (from https://patchwork.kernel.org/patch/9817819/) node is always non-NULL in binder_get_ref_for_node so the conditional and else clause are not needed Change-Id: I23f011ba59e1869d9577e6bf28e1f1dd38f45713 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6eb9fc064846..812c86bae4db 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -671,18 +671,12 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, } rb_link_node(&new_ref->rb_node_desc, parent, p); rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc); - if (node) { - hlist_add_head(&new_ref->node_entry, &node->refs); + hlist_add_head(&new_ref->node_entry, &node->refs); - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d new ref %d desc %d for node %d\n", - proc->pid, new_ref->debug_id, new_ref->desc, - node->debug_id); - } else { - binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "%d new ref %d desc %d for dead node\n", - proc->pid, new_ref->debug_id, new_ref->desc); - } + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d new ref %d desc %d for node %d\n", + proc->pid, new_ref->debug_id, new_ref->desc, + node->debug_id); return new_ref; } From db51658467e03c756b7de23eb03ebeb63e607d4c Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 21 Apr 2017 14:32:11 -0700 Subject: [PATCH 019/176] FROMLIST: binder: protect against two threads freeing buffer (from https://patchwork.kernel.org/patch/9817815/) Adds protection against malicious user code freeing the same buffer at the same time which could cause a crash. Cannot happen under normal use. Bug: 36650912 Change-Id: I43e078cbf31c0789aaff5ceaf8f1a94c75f79d45 Test: tested manually Signed-off-by: Todd Kjos --- drivers/android/binder.c | 4 ++-- drivers/android/binder_alloc.c | 22 +++++++++++++++++----- drivers/android/binder_alloc.h | 7 ++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 812c86bae4db..d484655133ee 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2025,8 +2025,8 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(binder_uintptr_t); - buffer = binder_alloc_buffer_lookup(&proc->alloc, - data_ptr); + buffer = binder_alloc_prepare_to_free(&proc->alloc, + data_ptr); if (buffer == NULL) { binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", proc->pid, thread->pid, (u64)data_ptr); diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 16bc8a3b53f5..8f79df3f3613 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -116,7 +116,7 @@ static void binder_insert_allocated_buffer_locked( rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); } -static struct binder_buffer *binder_alloc_buffer_lookup_locked( +static struct binder_buffer *binder_alloc_prepare_to_free_locked( struct binder_alloc *alloc, uintptr_t user_ptr) { @@ -135,8 +135,19 @@ static struct binder_buffer *binder_alloc_buffer_lookup_locked( n = n->rb_left; else if (kern_ptr > buffer) n = n->rb_right; - else + else { + /* + * Guard against user threads attempting to + * free the buffer twice + */ + if (buffer->free_in_progress) { + pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", + alloc->pid, current->pid, (u64)user_ptr); + return NULL; + } + buffer->free_in_progress = 1; return buffer; + } } return NULL; } @@ -152,13 +163,13 @@ static struct binder_buffer *binder_alloc_buffer_lookup_locked( * * Return: Pointer to buffer or NULL */ -struct binder_buffer *binder_alloc_buffer_lookup(struct binder_alloc *alloc, - uintptr_t user_ptr) +struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, + uintptr_t user_ptr) { struct binder_buffer *buffer; mutex_lock(&alloc->mutex); - buffer = binder_alloc_buffer_lookup_locked(alloc, user_ptr); + buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr); mutex_unlock(&alloc->mutex); return buffer; } @@ -358,6 +369,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; + buffer->free_in_progress = 0; binder_insert_allocated_buffer_locked(alloc, buffer); if (buffer_size != size) { struct binder_buffer *new_buffer = (void *)buffer->data + size; diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 721c511431f9..088e4ffc6230 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -48,7 +48,8 @@ struct binder_buffer { unsigned free:1; unsigned allow_user_free:1; unsigned async_transaction:1; - unsigned debug_id:29; + unsigned free_in_progress:1; + unsigned debug_id:28; struct binder_transaction *transaction; @@ -109,8 +110,8 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, extern void binder_alloc_init(struct binder_alloc *alloc); extern void binder_alloc_vma_close(struct binder_alloc *alloc); extern struct binder_buffer * -binder_alloc_buffer_lookup(struct binder_alloc *alloc, - uintptr_t user_ptr); +binder_alloc_prepare_to_free(struct binder_alloc *alloc, + uintptr_t user_ptr); extern void binder_alloc_free_buf(struct binder_alloc *alloc, struct binder_buffer *buffer); extern int binder_alloc_mmap_handler(struct binder_alloc *alloc, From 42e1ca789418a06b5ed727f527bb0f329bc28c26 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 15 Mar 2017 18:22:52 +0100 Subject: [PATCH 020/176] FROMLIST: binder: add more debug info when allocation fails. (from https://patchwork.kernel.org/patch/9817797/) Bug: 36088202 Test: tested manually Change-Id: Ib526a9c375e6136669b72f341e0b54d896fd1cec Signed-off-by: Martijn Coenen Signed-off-by: Siqi Lin --- drivers/android/binder_alloc.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 8f79df3f3613..aabfebac6e57 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -337,8 +337,36 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, } } if (best_fit == NULL) { + size_t allocated_buffers = 0; + size_t largest_alloc_size = 0; + size_t total_alloc_size = 0; + size_t free_buffers = 0; + size_t largest_free_size = 0; + size_t total_free_size = 0; + + for (n = rb_first(&alloc->allocated_buffers); n != NULL; + n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + allocated_buffers++; + total_alloc_size += buffer_size; + if (buffer_size > largest_alloc_size) + largest_alloc_size = buffer_size; + } + for (n = rb_first(&alloc->free_buffers); n != NULL; + n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + free_buffers++; + total_free_size += buffer_size; + if (buffer_size > largest_free_size) + largest_free_size = buffer_size; + } pr_err("%d: binder_alloc_buf size %zd failed, no address space\n", alloc->pid, size); + pr_err("allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n", + total_alloc_size, allocated_buffers, largest_alloc_size, + total_free_size, free_buffers, largest_free_size); return ERR_PTR(-ENOSPC); } if (n == NULL) { @@ -698,9 +726,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) static void print_binder_buffer(struct seq_file *m, const char *prefix, struct binder_buffer *buffer) { - seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", + seq_printf(m, "%s %d: %pK size %zd:%zd:%zd %s\n", prefix, buffer->debug_id, buffer->data, buffer->data_size, buffer->offsets_size, + buffer->extra_buffers_size, buffer->transaction ? "active" : "delivered"); } From 0f32aeb35f8b8c3d07f1c03062cc195590145d35 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 24 May 2017 13:33:28 -0700 Subject: [PATCH 021/176] FROMLIST: binder: use atomic for transaction_log index (from https://patchwork.kernel.org/patch/9817807/) The log->next index for the transaction log was not protected when incremented. This led to a case where log->next++ resulted in an index larger than ARRAY_SIZE(log->entry) and eventually a bad access to memory. Fixed by making the log index an atomic64 and converting to an array by using "% ARRAY_SIZE(log->entry)" Also added "complete" field to the log entry which is written last to tell the print code whether the entry is complete Bug: 62038227 Test: tested manually Change-Id: I1bb1c1a332a6ac458a626f5bedd05022b56b91f2 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 74 +++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d484655133ee..33d7e981324d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -187,6 +187,7 @@ static inline void binder_stats_created(enum binder_stat_types type) struct binder_transaction_log_entry { int debug_id; + int debug_id_done; int call_type; int from_proc; int from_thread; @@ -202,8 +203,8 @@ struct binder_transaction_log_entry { const char *context_name; }; struct binder_transaction_log { - int next; - int full; + atomic_t cur; + bool full; struct binder_transaction_log_entry entry[32]; }; static struct binder_transaction_log binder_transaction_log; @@ -213,14 +214,19 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) { struct binder_transaction_log_entry *e; + unsigned int cur = atomic_inc_return(&log->cur); - e = &log->entry[log->next]; - memset(e, 0, sizeof(*e)); - log->next++; - if (log->next == ARRAY_SIZE(log->entry)) { - log->next = 0; + if (cur >= ARRAY_SIZE(log->entry)) log->full = 1; - } + e = &log->entry[cur % ARRAY_SIZE(log->entry)]; + WRITE_ONCE(e->debug_id_done, 0); + /* + * write-barrier to synchronize access to e->debug_id_done. + * We make sure the initialized 0 value is seen before + * memset() other fields are zeroed by memset. + */ + smp_wmb(); + memset(e, 0, sizeof(*e)); return e; } @@ -1401,8 +1407,10 @@ static void binder_transaction(struct binder_proc *proc, struct binder_buffer_object *last_fixup_obj = NULL; binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; + int t_debug_id = atomic_inc_return(&binder_last_id); e = binder_transaction_log_add(&binder_transaction_log); + e->debug_id = t_debug_id; e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); e->from_proc = proc->pid; e->from_thread = thread->pid; @@ -1546,8 +1554,7 @@ static void binder_transaction(struct binder_proc *proc, } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); - t->debug_id = atomic_inc_return(&binder_last_id); - e->debug_id = t->debug_id; + t->debug_id = t_debug_id; if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, @@ -1822,6 +1829,12 @@ static void binder_transaction(struct binder_proc *proc, else wake_up_interruptible(target_wait); } + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); return; err_translate_failed: @@ -1859,6 +1872,13 @@ err_no_context_mgr_node: e->return_error_line = return_error_line; fe = binder_transaction_log_add(&binder_transaction_log_failed); *fe = *e; + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); + WRITE_ONCE(fe->debug_id_done, t_debug_id); } BUG_ON(thread->return_error != BR_OK); @@ -3719,27 +3739,47 @@ static int binder_proc_show(struct seq_file *m, void *unused) static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { + int debug_id = READ_ONCE(e->debug_id_done); + /* + * read barrier to guarantee debug_id_done read before + * we print the log values + */ + smp_rmb(); seq_printf(m, - "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d\n", + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, e->from_thread, e->to_proc, e->to_thread, e->context_name, e->to_node, e->target_handle, e->data_size, e->offsets_size, e->return_error, e->return_error_param, e->return_error_line); + /* + * read-barrier to guarantee read of debug_id_done after + * done printing the fields of the entry + */ + smp_rmb(); + seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ? + "\n" : " (incomplete)\n"); } static int binder_transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; + unsigned int log_cur = atomic_read(&log->cur); + unsigned int count; + unsigned int cur; int i; - if (log->full) { - for (i = log->next; i < ARRAY_SIZE(log->entry); i++) - print_binder_transaction_log_entry(m, &log->entry[i]); + count = log_cur + 1; + cur = count < ARRAY_SIZE(log->entry) && !log->full ? + 0 : count % ARRAY_SIZE(log->entry); + if (count > ARRAY_SIZE(log->entry) || log->full) + count = ARRAY_SIZE(log->entry); + for (i = 0; i < count; i++) { + unsigned int index = cur++ % ARRAY_SIZE(log->entry); + + print_binder_transaction_log_entry(m, &log->entry[index]); } - for (i = 0; i < log->next; i++) - print_binder_transaction_log_entry(m, &log->entry[i]); return 0; } @@ -3794,6 +3834,8 @@ static int __init binder_init(void) struct binder_device *device; struct hlist_node *tmp; + atomic_set(&binder_transaction_log.cur, ~0U); + atomic_set(&binder_transaction_log_failed.cur, ~0U); binder_deferred_workqueue = create_singlethread_workqueue("binder"); if (!binder_deferred_workqueue) return -ENOMEM; From 162735381739bb4f3e5ad6f35a6319d99ee7e697 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 30 Mar 2017 18:02:13 -0700 Subject: [PATCH 022/176] FROMLIST: binder: refactor binder_pop_transaction (from https://lkml.org/lkml/2017/6/29/754) binder_pop_transaction needs to be split into 2 pieces to to allow the proc lock to be held on entry to dequeue the transaction stack, but no lock when kfree'ing the transaction. Split into binder_pop_transaction_locked and binder_free_transaction (the actual locks are still to be added). Change-Id: I848ae994cc27b3cd083cff2dbd1071762784f4a3 Test: tested manually Signed-off-by: Todd Kjos --- drivers/android/binder.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 33d7e981324d..2006fb7cd69a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -769,14 +769,16 @@ static int binder_dec_ref(struct binder_ref *ref, int strong) static void binder_pop_transaction(struct binder_thread *target_thread, struct binder_transaction *t) { - if (target_thread) { - BUG_ON(target_thread->transaction_stack != t); - BUG_ON(target_thread->transaction_stack->from != target_thread); - target_thread->transaction_stack = - target_thread->transaction_stack->from_parent; - t->from = NULL; - } - t->need_reply = 0; + BUG_ON(!target_thread); + BUG_ON(target_thread->transaction_stack != t); + BUG_ON(target_thread->transaction_stack->from != target_thread); + target_thread->transaction_stack = + target_thread->transaction_stack->from_parent; + t->from = NULL; +} + +static void binder_free_transaction(struct binder_transaction *t) +{ if (t->buffer) t->buffer->transaction = NULL; kfree(t); @@ -809,6 +811,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_pop_transaction(target_thread, t); target_thread->return_error = error_code; wake_up_interruptible(&target_thread->wait); + binder_free_transaction(t); } else { pr_err("reply failed, target thread, %d:%d, has error code %d already\n", target_thread->proc->pid, @@ -823,7 +826,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, "send failed reply for transaction %d, target dead\n", t->debug_id); - binder_pop_transaction(target_thread, t); + binder_free_transaction(t); if (next == NULL) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "reply failed, no target thread at root\n"); @@ -1807,6 +1810,7 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction(target_thread, in_reply_to); + binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); t->need_reply = 1; @@ -2636,9 +2640,7 @@ retry: t->to_thread = thread; thread->transaction_stack = t; } else { - t->buffer->transaction = NULL; - kfree(t); - binder_stats_deleted(BINDER_STAT_TRANSACTION); + binder_free_transaction(t); } break; } @@ -2681,9 +2683,7 @@ static void binder_release_work(struct list_head *list) binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered transaction %d\n", t->debug_id); - t->buffer->transaction = NULL; - kfree(t); - binder_stats_deleted(BINDER_STAT_TRANSACTION); + binder_free_transaction(t); } } break; case BINDER_WORK_TRANSACTION_COMPLETE: { From 3a822b33c84bf30d2081ede2c42d53ae7ab89d85 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 21 Apr 2017 17:35:12 -0700 Subject: [PATCH 023/176] FROMLIST: binder: guarantee txn complete / errors delivered in-order (from https://patchwork.kernel.org/patch/9817805/) Since errors are tracked in the return_error/return_error2 fields of the binder_thread object and BR_TRANSACTION_COMPLETEs can be tracked either in those fields or via the thread todo work list, it is possible for errors to be reported ahead of the associated txn complete. Use the thread todo work list for errors to guarantee order. Also changed binder_send_failed_reply to pop the transaction even if it failed to send a reply. Bug: 37218618 Test: tested manually Change-Id: I196cfaeed09fdcd697f8ab25eea4e04241fdb08f Signed-off-by: Todd Kjos --- drivers/android/binder.c | 125 ++++++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 53 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2006fb7cd69a..f62919855936 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -249,6 +249,7 @@ struct binder_work { enum { BINDER_WORK_TRANSACTION = 1, BINDER_WORK_TRANSACTION_COMPLETE, + BINDER_WORK_RETURN_ERROR, BINDER_WORK_NODE, BINDER_WORK_DEAD_BINDER, BINDER_WORK_DEAD_BINDER_AND_CLEAR, @@ -256,6 +257,11 @@ struct binder_work { } type; }; +struct binder_error { + struct binder_work work; + uint32_t cmd; +}; + struct binder_node { int debug_id; struct binder_work work; @@ -350,10 +356,8 @@ struct binder_thread { bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; - uint32_t return_error; /* Write failed, return error code in read buf */ - uint32_t return_error2; /* Write failed, return error code in read */ - /* buffer. Used when sending a reply to a dead process that */ - /* we are also waiting on */ + struct binder_error return_error; + struct binder_error reply_error; wait_queue_head_t wait; struct binder_stats stats; }; @@ -795,29 +799,24 @@ static void binder_send_failed_reply(struct binder_transaction *t, while (1) { target_thread = t->from; if (target_thread) { - if (target_thread->return_error != BR_OK && - target_thread->return_error2 == BR_OK) { - target_thread->return_error2 = - target_thread->return_error; - target_thread->return_error = BR_OK; - } - if (target_thread->return_error == BR_OK) { - binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "send failed reply for transaction %d to %d:%d\n", - t->debug_id, - target_thread->proc->pid, - target_thread->pid); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "send failed reply for transaction %d to %d:%d\n", + t->debug_id, + target_thread->proc->pid, + target_thread->pid); - binder_pop_transaction(target_thread, t); - target_thread->return_error = error_code; + binder_pop_transaction(target_thread, t); + if (target_thread->reply_error.cmd == BR_OK) { + target_thread->reply_error.cmd = error_code; + list_add_tail( + &target_thread->reply_error.work.entry, + &target_thread->todo); wake_up_interruptible(&target_thread->wait); - binder_free_transaction(t); } else { - pr_err("reply failed, target thread, %d:%d, has error code %d already\n", - target_thread->proc->pid, - target_thread->pid, - target_thread->return_error); + WARN(1, "Unexpected reply error: %u\n", + target_thread->reply_error.cmd); } + binder_free_transaction(t); return; } next = t->from_parent; @@ -1885,12 +1884,17 @@ err_no_context_mgr_node: WRITE_ONCE(fe->debug_id_done, t_debug_id); } - BUG_ON(thread->return_error != BR_OK); + BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { - thread->return_error = BR_TRANSACTION_COMPLETE; + thread->return_error.cmd = BR_TRANSACTION_COMPLETE; + list_add_tail(&thread->return_error.work.entry, + &thread->todo); binder_send_failed_reply(in_reply_to, return_error); - } else - thread->return_error = return_error; + } else { + thread->return_error.cmd = return_error; + list_add_tail(&thread->return_error.work.entry, + &thread->todo); + } } static int binder_thread_write(struct binder_proc *proc, @@ -1904,7 +1908,7 @@ static int binder_thread_write(struct binder_proc *proc, void __user *ptr = buffer + *consumed; void __user *end = buffer + size; - while (ptr < end && thread->return_error == BR_OK) { + while (ptr < end && thread->return_error.cmd == BR_OK) { if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); @@ -2184,7 +2188,12 @@ static int binder_thread_write(struct binder_proc *proc, } death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { - thread->return_error = BR_ERROR; + WARN_ON(thread->return_error.cmd != + BR_OK); + thread->return_error.cmd = BR_ERROR; + list_add_tail( + &thread->return_error.work.entry, + &thread->todo); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", proc->pid, thread->pid); @@ -2300,8 +2309,7 @@ static int binder_has_proc_work(struct binder_proc *proc, static int binder_has_thread_work(struct binder_thread *thread) { - return !list_empty(&thread->todo) || thread->return_error != BR_OK || - thread->looper_need_return; + return !list_empty(&thread->todo) || thread->looper_need_return; } static int binder_put_node_cmd(struct binder_proc *proc, @@ -2357,25 +2365,6 @@ retry: wait_for_proc_work = thread->transaction_stack == NULL && list_empty(&thread->todo); - if (thread->return_error != BR_OK && ptr < end) { - if (thread->return_error2 != BR_OK) { - if (put_user(thread->return_error2, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - binder_stat_br(proc, thread, thread->return_error2); - if (ptr == end) - goto done; - thread->return_error2 = BR_OK; - } - if (put_user(thread->return_error, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - binder_stat_br(proc, thread, thread->return_error); - thread->return_error = BR_OK; - goto done; - } - - thread->looper |= BINDER_LOOPER_STATE_WAITING; if (wait_for_proc_work) proc->ready_threads++; @@ -2442,6 +2431,19 @@ retry: case BINDER_WORK_TRANSACTION: { t = container_of(w, struct binder_transaction, work); } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + WARN_ON(e->cmd == BR_OK); + if (put_user(e->cmd, (uint32_t __user *)ptr)) + return -EFAULT; + e->cmd = BR_OK; + ptr += sizeof(uint32_t); + + binder_stat_br(proc, thread, cmd); + list_del(&w->entry); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: { cmd = BR_TRANSACTION_COMPLETE; if (put_user(cmd, (uint32_t __user *)ptr)) @@ -2686,6 +2688,14 @@ static void binder_release_work(struct list_head *list) binder_free_transaction(t); } } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered TRANSACTION_ERROR: %u\n", + e->cmd); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); @@ -2741,8 +2751,10 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) rb_link_node(&thread->rb_node, parent, p); rb_insert_color(&thread->rb_node, &proc->threads); thread->looper_need_return = true; - thread->return_error = BR_OK; - thread->return_error2 = BR_OK; + thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->return_error.cmd = BR_OK; + thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->reply_error.cmd = BR_OK; } return thread; } @@ -2800,7 +2812,7 @@ static unsigned int binder_poll(struct file *filp, thread = binder_get_thread(proc); wait_for_proc_work = thread->transaction_stack == NULL && - list_empty(&thread->todo) && thread->return_error == BR_OK; + list_empty(&thread->todo); binder_unlock(__func__); @@ -3379,6 +3391,13 @@ static void print_binder_work(struct seq_file *m, const char *prefix, t = container_of(w, struct binder_transaction, work); print_binder_transaction(m, transaction_prefix, t); break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + seq_printf(m, "%stransaction error: %u\n", + prefix, e->cmd); + } break; case BINDER_WORK_TRANSACTION_COMPLETE: seq_printf(m, "%stransaction complete\n", prefix); break; From f80cbc72e1b892d9df17c84050891a6bb53cbeac Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 26 May 2017 11:56:29 -0700 Subject: [PATCH 024/176] FROMLIST: binder: make sure target_node has strong ref (from https://patchwork.kernel.org/patch/9817787/) When initiating a transaction, the target_node must have a strong ref on it. Then we take a second strong ref to make sure the node survives until the transaction is complete. Change-Id: If7429cb43eda520ab89d45df6c19327cee97c60c Signed-off-by: Todd Kjos --- drivers/android/binder.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f62919855936..0dec6d6c66ca 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1470,8 +1470,19 @@ static void binder_transaction(struct binder_proc *proc, if (tr->target.handle) { struct binder_ref *ref; + /* + * There must already be a strong ref + * on this node. If so, do a strong + * increment on the node to ensure it + * stays alive until the transaction is + * done. + */ ref = binder_get_ref(proc, tr->target.handle, true); - if (ref == NULL) { + if (ref) { + binder_inc_node(ref->node, 1, 0, NULL); + target_node = ref->node; + } + if (target_node == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; @@ -1479,7 +1490,6 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_invalid_target_handle; } - target_node = ref->node; } else { mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; @@ -1489,6 +1499,7 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_no_context_mgr_node; } + binder_inc_node(target_node, 1, 0, NULL); mutex_unlock(&context->context_mgr_node_lock); } e->to_node = target_node->debug_id; @@ -1609,9 +1620,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->transaction = t; t->buffer->target_node = target_node; trace_binder_transaction_alloc_buf(t->buffer); - if (target_node) - binder_inc_node(target_node, 1, 0, NULL); - off_start = (binder_size_t *)(t->buffer->data + ALIGN(tr->data_size, sizeof(void *))); offp = off_start; @@ -1847,6 +1855,7 @@ err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); + target_node = NULL; t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: @@ -1861,6 +1870,9 @@ err_empty_call_stack: err_dead_binder: err_invalid_target_handle: err_no_context_mgr_node: + if (target_node) + binder_dec_node(target_node, 1, 0); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", proc->pid, thread->pid, return_error, return_error_param, From e482ec39d635cd1bcf764f2aefbc4ab3149d2299 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 12 May 2017 14:42:55 -0700 Subject: [PATCH 025/176] FROMLIST: binder: make sure accesses to proc/thread are safe (from https://patchwork.kernel.org/patch/9817787/) binder_thread and binder_proc may be accessed by other threads when processing transaction. Therefore they must be prevented from being freed while a transaction is in progress that references them. This is done by introducing a temporary reference counter for threads and procs that indicates that the object is in use and must not be freed. binder_thread_dec_tmpref() and binder_proc_dec_tmpref() are used to decrement the temporary reference. It is safe to free a binder_thread if there is no reference and it has been released (indicated by thread->is_dead). It is safe to free a binder_proc if it has no remaining threads and no reference. A spinlock is added to the binder_transaction to safely access and set references for t->from and for debug code to safely access t->to_thread and t->to_proc. Change-Id: I0a00a0294c3e93aea8b3f141c6f18e77ad244078 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 233 ++++++++++++++++++++++++++++++++++----- 1 file changed, 206 insertions(+), 27 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0dec6d6c66ca..a16375e133cc 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -325,6 +325,7 @@ struct binder_proc { struct files_struct *files; struct hlist_node deferred_work_node; int deferred_work; + bool is_dead; struct list_head todo; wait_queue_head_t wait; @@ -334,6 +335,7 @@ struct binder_proc { int requested_threads; int requested_threads_started; int ready_threads; + int tmp_ref; long default_priority; struct dentry *debugfs_entry; struct binder_alloc alloc; @@ -360,6 +362,8 @@ struct binder_thread { struct binder_error reply_error; wait_queue_head_t wait; struct binder_stats stats; + atomic_t tmp_ref; + bool is_dead; }; struct binder_transaction { @@ -379,10 +383,19 @@ struct binder_transaction { long priority; long saved_priority; kuid_t sender_euid; + /** + * @lock: protects @from, @to_proc, and @to_thread + * + * @from, @to_proc, and @to_thread can be set to NULL + * during thread teardown + */ + spinlock_t lock; }; static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); +static void binder_free_thread(struct binder_thread *thread); +static void binder_free_proc(struct binder_proc *proc); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { @@ -781,6 +794,79 @@ static void binder_pop_transaction(struct binder_thread *target_thread, t->from = NULL; } +/** + * binder_thread_dec_tmpref() - decrement thread->tmp_ref + * @thread: thread to decrement + * + * A thread needs to be kept alive while being used to create or + * handle a transaction. binder_get_txn_from() is used to safely + * extract t->from from a binder_transaction and keep the thread + * indicated by t->from from being freed. When done with that + * binder_thread, this function is called to decrement the + * tmp_ref and free if appropriate (thread has been released + * and no transaction being processed by the driver) + */ +static void binder_thread_dec_tmpref(struct binder_thread *thread) +{ + /* + * atomic is used to protect the counter value while + * it cannot reach zero or thread->is_dead is false + * + * TODO: future patch adds locking to ensure that the + * check of tmp_ref and is_dead is done with a lock held + */ + atomic_dec(&thread->tmp_ref); + if (thread->is_dead && !atomic_read(&thread->tmp_ref)) { + binder_free_thread(thread); + return; + } +} + +/** + * binder_proc_dec_tmpref() - decrement proc->tmp_ref + * @proc: proc to decrement + * + * A binder_proc needs to be kept alive while being used to create or + * handle a transaction. proc->tmp_ref is incremented when + * creating a new transaction or the binder_proc is currently in-use + * by threads that are being released. When done with the binder_proc, + * this function is called to decrement the counter and free the + * proc if appropriate (proc has been released, all threads have + * been released and not currenly in-use to process a transaction). + */ +static void binder_proc_dec_tmpref(struct binder_proc *proc) +{ + proc->tmp_ref--; + if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) && + !proc->tmp_ref) { + binder_free_proc(proc); + return; + } +} + +/** + * binder_get_txn_from() - safely extract the "from" thread in transaction + * @t: binder transaction for t->from + * + * Atomically return the "from" thread and increment the tmp_ref + * count for the thread to ensure it stays alive until + * binder_thread_dec_tmpref() is called. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from( + struct binder_transaction *t) +{ + struct binder_thread *from; + + spin_lock(&t->lock); + from = t->from; + if (from) + atomic_inc(&from->tmp_ref); + spin_unlock(&t->lock); + return from; +} + static void binder_free_transaction(struct binder_transaction *t) { if (t->buffer) @@ -797,7 +883,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, BUG_ON(t->flags & TF_ONE_WAY); while (1) { - target_thread = t->from; + target_thread = binder_get_txn_from(t); if (target_thread) { binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "send failed reply for transaction %d to %d:%d\n", @@ -816,6 +902,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, WARN(1, "Unexpected reply error: %u\n", target_thread->reply_error.cmd); } + binder_thread_dec_tmpref(target_thread); binder_free_transaction(t); return; } @@ -1396,7 +1483,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; u8 *sg_bufp, *sg_buf_end; - struct binder_proc *target_proc; + struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; struct list_head *target_list; @@ -1433,12 +1520,14 @@ static void binder_transaction(struct binder_proc *proc, } binder_set_nice(in_reply_to->saved_priority); if (in_reply_to->to_thread != thread) { + spin_lock(&in_reply_to->lock); binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, in_reply_to->debug_id, in_reply_to->to_proc ? in_reply_to->to_proc->pid : 0, in_reply_to->to_thread ? in_reply_to->to_thread->pid : 0); + spin_unlock(&in_reply_to->lock); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; @@ -1446,7 +1535,7 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_call_stack; } thread->transaction_stack = in_reply_to->to_parent; - target_thread = in_reply_to->from; + target_thread = binder_get_txn_from(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; return_error_line = __LINE__; @@ -1466,6 +1555,7 @@ static void binder_transaction(struct binder_proc *proc, goto err_dead_binder; } target_proc = target_thread->proc; + target_proc->tmp_ref++; } else { if (tr->target.handle) { struct binder_ref *ref; @@ -1509,6 +1599,7 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_dead_binder; } + target_proc->tmp_ref++; if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; @@ -1521,19 +1612,30 @@ static void binder_transaction(struct binder_proc *proc, tmp = thread->transaction_stack; if (tmp->to_thread != thread) { + spin_lock(&tmp->lock); binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, tmp->debug_id, tmp->to_proc ? tmp->to_proc->pid : 0, tmp->to_thread ? tmp->to_thread->pid : 0); + spin_unlock(&tmp->lock); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; goto err_bad_call_stack; } while (tmp) { - if (tmp->from && tmp->from->proc == target_proc) - target_thread = tmp->from; + struct binder_thread *from; + + spin_lock(&tmp->lock); + from = tmp->from; + if (from && from->proc == target_proc) { + atomic_inc(&from->tmp_ref); + target_thread = from; + spin_unlock(&tmp->lock); + break; + } + spin_unlock(&tmp->lock); tmp = tmp->from_parent; } } @@ -1557,6 +1659,7 @@ static void binder_transaction(struct binder_proc *proc, goto err_alloc_t_failed; } binder_stats_created(BINDER_STAT_TRANSACTION); + spin_lock_init(&t->lock); tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); if (tcomplete == NULL) { @@ -1815,6 +1918,8 @@ static void binder_transaction(struct binder_proc *proc, list_add_tail(&tcomplete->entry, &thread->todo); if (reply) { + if (target_thread->is_dead) + goto err_dead_proc_or_thread; BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction(target_thread, in_reply_to); binder_free_transaction(in_reply_to); @@ -1823,6 +1928,11 @@ static void binder_transaction(struct binder_proc *proc, t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; + if (target_proc->is_dead || + (target_thread && target_thread->is_dead)) { + binder_pop_transaction(thread, t); + goto err_dead_proc_or_thread; + } } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); @@ -1831,6 +1941,9 @@ static void binder_transaction(struct binder_proc *proc, target_wait = NULL; } else target_node->has_async_transaction = 1; + if (target_proc->is_dead || + (target_thread && target_thread->is_dead)) + goto err_dead_proc_or_thread; } t->work.type = BINDER_WORK_TRANSACTION; list_add_tail(&t->work.entry, target_list); @@ -1840,6 +1953,9 @@ static void binder_transaction(struct binder_proc *proc, else wake_up_interruptible(target_wait); } + if (target_thread) + binder_thread_dec_tmpref(target_thread); + binder_proc_dec_tmpref(target_proc); /* * write barrier to synchronize with initialization * of log entry @@ -1848,6 +1964,9 @@ static void binder_transaction(struct binder_proc *proc, WRITE_ONCE(e->debug_id_done, t_debug_id); return; +err_dead_proc_or_thread: + return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; err_translate_failed: err_bad_object_type: err_bad_offset: @@ -1870,6 +1989,10 @@ err_empty_call_stack: err_dead_binder: err_invalid_target_handle: err_no_context_mgr_node: + if (target_thread) + binder_thread_dec_tmpref(target_thread); + if (target_proc) + binder_proc_dec_tmpref(target_proc); if (target_node) binder_dec_node(target_node, 1, 0); @@ -2422,6 +2545,7 @@ retry: struct binder_transaction_data tr; struct binder_work *w; struct binder_transaction *t = NULL; + struct binder_thread *t_from; if (!list_empty(&thread->todo)) { w = list_first_entry(&thread->todo, struct binder_work, @@ -2610,8 +2734,9 @@ retry: tr.flags = t->flags; tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid); - if (t->from) { - struct task_struct *sender = t->from->proc->tsk; + t_from = binder_get_txn_from(t); + if (t_from) { + struct task_struct *sender = t_from->proc->tsk; tr.sender_pid = task_tgid_nr_ns(sender, task_active_pid_ns(current)); @@ -2628,11 +2753,17 @@ retry: ALIGN(t->buffer->data_size, sizeof(void *)); - if (put_user(cmd, (uint32_t __user *)ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) { + if (t_from) + binder_thread_dec_tmpref(t_from); return -EFAULT; + } ptr += sizeof(uint32_t); - if (copy_to_user(ptr, &tr, sizeof(tr))) + if (copy_to_user(ptr, &tr, sizeof(tr))) { + if (t_from) + binder_thread_dec_tmpref(t_from); return -EFAULT; + } ptr += sizeof(tr); trace_binder_transaction_received(t); @@ -2642,11 +2773,13 @@ retry: proc->pid, thread->pid, (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" : "BR_REPLY", - t->debug_id, t->from ? t->from->proc->pid : 0, - t->from ? t->from->pid : 0, cmd, + t->debug_id, t_from ? t_from->proc->pid : 0, + t_from ? t_from->pid : 0, cmd, t->buffer->data_size, t->buffer->offsets_size, (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets); + if (t_from) + binder_thread_dec_tmpref(t_from); list_del(&t->work.entry); t->buffer->allow_user_free = 1; if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) { @@ -2758,6 +2891,7 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) binder_stats_created(BINDER_STAT_THREAD); thread->proc = proc; thread->pid = current->pid; + atomic_set(&thread->tmp_ref, 0); init_waitqueue_head(&thread->wait); INIT_LIST_HEAD(&thread->todo); rb_link_node(&thread->rb_node, parent, p); @@ -2771,18 +2905,55 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) return thread; } -static int binder_free_thread(struct binder_proc *proc, - struct binder_thread *thread) +static void binder_free_proc(struct binder_proc *proc) +{ + BUG_ON(!list_empty(&proc->todo)); + BUG_ON(!list_empty(&proc->delivered_death)); + binder_alloc_deferred_release(&proc->alloc); + put_task_struct(proc->tsk); + binder_stats_deleted(BINDER_STAT_PROC); + kfree(proc); +} + +static void binder_free_thread(struct binder_thread *thread) +{ + BUG_ON(!list_empty(&thread->todo)); + binder_stats_deleted(BINDER_STAT_THREAD); + binder_proc_dec_tmpref(thread->proc); + kfree(thread); +} + +static int binder_thread_release(struct binder_proc *proc, + struct binder_thread *thread) { struct binder_transaction *t; struct binder_transaction *send_reply = NULL; int active_transactions = 0; + struct binder_transaction *last_t = NULL; + /* + * take a ref on the proc so it survives + * after we remove this thread from proc->threads. + * The corresponding dec is when we actually + * free the thread in binder_free_thread() + */ + proc->tmp_ref++; + /* + * take a ref on this thread to ensure it + * survives while we are releasing it + */ + atomic_inc(&thread->tmp_ref); rb_erase(&thread->rb_node, &proc->threads); t = thread->transaction_stack; - if (t && t->to_thread == thread) - send_reply = t; + if (t) { + spin_lock(&t->lock); + if (t->to_thread == thread) + send_reply = t; + } + thread->is_dead = true; + while (t) { + last_t = t; active_transactions++; binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "release %d:%d transaction %d %s, still active\n", @@ -2803,12 +2974,15 @@ static int binder_free_thread(struct binder_proc *proc, t = t->from_parent; } else BUG(); + spin_unlock(&last_t->lock); + if (t) + spin_lock(&t->lock); } + if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); binder_release_work(&thread->todo); - kfree(thread); - binder_stats_deleted(BINDER_STAT_THREAD); + binder_thread_dec_tmpref(thread); return active_transactions; } @@ -2996,7 +3170,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case BINDER_THREAD_EXIT: binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n", proc->pid, thread->pid); - binder_free_thread(proc, thread); + binder_thread_release(proc, thread); thread = NULL; break; case BINDER_VERSION: { @@ -3266,7 +3440,13 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + /* + * Make sure proc stays alive after we + * remove all the threads + */ + proc->tmp_ref++; + proc->is_dead = true; threads = 0; active_transactions = 0; while ((n = rb_first(&proc->threads))) { @@ -3274,7 +3454,7 @@ static void binder_deferred_release(struct binder_proc *proc) thread = rb_entry(n, struct binder_thread, rb_node); threads++; - active_transactions += binder_free_thread(proc, thread); + active_transactions += binder_thread_release(proc, thread); } nodes = 0; @@ -3300,17 +3480,12 @@ static void binder_deferred_release(struct binder_proc *proc) binder_release_work(&proc->todo); binder_release_work(&proc->delivered_death); - binder_alloc_deferred_release(&proc->alloc); - binder_stats_deleted(BINDER_STAT_PROC); - - put_task_struct(proc->tsk); - binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", __func__, proc->pid, threads, nodes, incoming_refs, outgoing_refs, active_transactions); - kfree(proc); + binder_proc_dec_tmpref(proc); } static void binder_deferred_func(struct work_struct *work) @@ -3371,6 +3546,7 @@ binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) static void print_binder_transaction(struct seq_file *m, const char *prefix, struct binder_transaction *t) { + spin_lock(&t->lock); seq_printf(m, "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", prefix, t->debug_id, t, @@ -3379,6 +3555,8 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, t->to_proc ? t->to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, t->code, t->flags, t->priority, t->need_reply); + spin_unlock(&t->lock); + if (t->buffer == NULL) { seq_puts(m, " buffer free\n"); return; @@ -3443,9 +3621,10 @@ static void print_binder_thread(struct seq_file *m, size_t start_pos = m->count; size_t header_pos; - seq_printf(m, " thread %d: l %02x need_return %d\n", + seq_printf(m, " thread %d: l %02x need_return %d tr %d\n", thread->pid, thread->looper, - thread->looper_need_return); + thread->looper_need_return, + atomic_read(&thread->tmp_ref)); header_pos = m->count; t = thread->transaction_stack; while (t) { From f7d874123e4de442dcd6aca8afe0fd0b9d2854b4 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 8 May 2017 09:16:27 -0700 Subject: [PATCH 026/176] FROMLIST: binder: refactor binder ref inc/dec for thread safety (from https://patchwork.kernel.org/patch/9817781/) Once locks are added, binder_ref's will only be accessed safely with the proc lock held. Refactor the inc/dec paths to make them atomic with the binder_get_ref* paths and node inc/dec. For example, instead of: ref = binder_get_ref(proc, handle, strong); ... binder_dec_ref(ref, strong); we now have: ret = binder_dec_ref_for_handle(proc, handle, strong, &rdata); Since the actual ref is no longer exposed to callers, a new struct binder_ref_data is introduced which can be used to return a copy of ref state. Change-Id: I7de22107f8ebc967cee63251d584fceb4ea56250 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 484 ++++++++++++++++++++++++--------- drivers/android/binder_trace.h | 32 ++- 2 files changed, 379 insertions(+), 137 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a16375e133cc..b7109a356c46 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -291,20 +291,51 @@ struct binder_ref_death { binder_uintptr_t cookie; }; +/** + * struct binder_ref_data - binder_ref counts and id + * @debug_id: unique ID for the ref + * @desc: unique userspace handle for ref + * @strong: strong ref count (debugging only if not locked) + * @weak: weak ref count (debugging only if not locked) + * + * Structure to hold ref count and ref id information. Since + * the actual ref can only be accessed with a lock, this structure + * is used to return information about the ref to callers of + * ref inc/dec functions. + */ +struct binder_ref_data { + int debug_id; + uint32_t desc; + int strong; + int weak; +}; + +/** + * struct binder_ref - struct to track references on nodes + * @data: binder_ref_data containing id, handle, and current refcounts + * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree + * @rb_node_node: node for lookup by @node in proc's rb_tree + * @node_entry: list entry for node->refs list in target node + * @proc: binder_proc containing ref + * @node: binder_node of target node. When cleaning up a + * ref for deletion in binder_cleanup_ref, a non-NULL + * @node indicates the node must be freed + * @death: pointer to death notification (ref_death) if requested + * + * Structure to track references from procA to target node (on procB). This + * structure is unsafe to access without holding @proc->outer_lock. + */ struct binder_ref { /* Lookups needed: */ /* node + proc => ref (transaction) */ /* desc + proc => ref (transaction, inc/dec ref) */ /* node => refs + procs (proc exit) */ - int debug_id; + struct binder_ref_data data; struct rb_node rb_node_desc; struct rb_node rb_node_node; struct hlist_node node_entry; struct binder_proc *proc; struct binder_node *node; - uint32_t desc; - int strong; - int weak; struct binder_ref_death *death; }; @@ -628,11 +659,11 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) { + if (desc < ref->data.desc) { n = n->rb_left; - } else if (desc > ref->desc) { + } else if (desc > ref->data.desc) { n = n->rb_right; - } else if (need_strong_ref && !ref->strong) { + } else if (need_strong_ref && !ref->data.strong) { binder_user_error("tried to use weak ref as strong ref\n"); return NULL; } else { @@ -642,14 +673,33 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, return NULL; } +/** + * binder_get_ref_for_node() - get the ref associated with given node + * @proc: binder_proc that owns the ref + * @node: binder_node of target + * @new_ref: newly allocated binder_ref to be initialized or %NULL + * + * Look up the ref for the given node and return it if it exists + * + * If it doesn't exist and the caller provides a newly allocated + * ref, initialize the fields of the newly allocated ref and insert + * into the given proc rb_trees and node refs list. + * + * Return: the ref for node. It is possible that another thread + * allocated/initialized the ref first in which case the + * returned ref would be different than the passed-in + * new_ref. new_ref must be kfree'd by the caller in + * this case. + */ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, - struct binder_node *node) + struct binder_node *node, + struct binder_ref *new_ref) { - struct rb_node *n; + struct binder_context *context = proc->context; struct rb_node **p = &proc->refs_by_node.rb_node; struct rb_node *parent = NULL; - struct binder_ref *ref, *new_ref; - struct binder_context *context = proc->context; + struct binder_ref *ref; + struct rb_node *n; while (*p) { parent = *p; @@ -662,22 +712,22 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, else return ref; } - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); - if (new_ref == NULL) + if (!new_ref) return NULL; + binder_stats_created(BINDER_STAT_REF); - new_ref->debug_id = atomic_inc_return(&binder_last_id); + new_ref->data.debug_id = atomic_inc_return(&binder_last_id); new_ref->proc = proc; new_ref->node = node; rb_link_node(&new_ref->rb_node_node, parent, p); rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); - new_ref->desc = (node == context->binder_context_mgr_node) ? 0 : 1; + new_ref->data.desc = (node == context->binder_context_mgr_node) ? 0 : 1; for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (ref->desc > new_ref->desc) + if (ref->data.desc > new_ref->data.desc) break; - new_ref->desc = ref->desc + 1; + new_ref->data.desc = ref->data.desc + 1; } p = &proc->refs_by_desc.rb_node; @@ -685,9 +735,9 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, parent = *p; ref = rb_entry(parent, struct binder_ref, rb_node_desc); - if (new_ref->desc < ref->desc) + if (new_ref->data.desc < ref->data.desc) p = &(*p)->rb_left; - else if (new_ref->desc > ref->desc) + else if (new_ref->data.desc > ref->data.desc) p = &(*p)->rb_right; else BUG(); @@ -698,89 +748,267 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d new ref %d desc %d for node %d\n", - proc->pid, new_ref->debug_id, new_ref->desc, + proc->pid, new_ref->data.debug_id, new_ref->data.desc, node->debug_id); return new_ref; } -static void binder_delete_ref(struct binder_ref *ref) +static void binder_cleanup_ref(struct binder_ref *ref) { binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d delete ref %d desc %d for node %d\n", - ref->proc->pid, ref->debug_id, ref->desc, + ref->proc->pid, ref->data.debug_id, ref->data.desc, ref->node->debug_id); rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); - if (ref->strong) + + if (ref->data.strong) binder_dec_node(ref->node, 1, 1); + hlist_del(&ref->node_entry); binder_dec_node(ref->node, 0, 1); + if (ref->death) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%d delete ref %d desc %d has death notification\n", - ref->proc->pid, ref->debug_id, ref->desc); + ref->proc->pid, ref->data.debug_id, + ref->data.desc); list_del(&ref->death->work.entry); - kfree(ref->death); binder_stats_deleted(BINDER_STAT_DEATH); } - kfree(ref); binder_stats_deleted(BINDER_STAT_REF); } +/** + * binder_inc_ref() - increment the ref for given handle + * @ref: ref to be incremented + * @strong: if true, strong increment, else weak + * @target_list: list to queue node work on + * + * Increment the ref. + * + * Return: 0, if successful, else errno + */ static int binder_inc_ref(struct binder_ref *ref, int strong, struct list_head *target_list) { int ret; if (strong) { - if (ref->strong == 0) { + if (ref->data.strong == 0) { ret = binder_inc_node(ref->node, 1, 1, target_list); if (ret) return ret; } - ref->strong++; + ref->data.strong++; } else { - if (ref->weak == 0) { + if (ref->data.weak == 0) { ret = binder_inc_node(ref->node, 0, 1, target_list); if (ret) return ret; } - ref->weak++; + ref->data.weak++; } return 0; } - -static int binder_dec_ref(struct binder_ref *ref, int strong) +/** + * binder_dec_ref() - dec the ref for given handle + * @ref: ref to be decremented + * @strong: if true, strong decrement, else weak + * + * Decrement the ref. + * + * TODO: kfree is avoided here since an upcoming patch + * will put this under a lock. + * + * Return: true if ref is cleaned up and ready to be freed + */ +static bool binder_dec_ref(struct binder_ref *ref, int strong) { if (strong) { - if (ref->strong == 0) { + if (ref->data.strong == 0) { binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n", - ref->proc->pid, ref->debug_id, - ref->desc, ref->strong, ref->weak); - return -EINVAL; + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; } - ref->strong--; - if (ref->strong == 0) { + ref->data.strong--; + if (ref->data.strong == 0) { int ret; ret = binder_dec_node(ref->node, strong, 1); if (ret) - return ret; + return false; } } else { - if (ref->weak == 0) { + if (ref->data.weak == 0) { binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n", - ref->proc->pid, ref->debug_id, - ref->desc, ref->strong, ref->weak); - return -EINVAL; + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; } - ref->weak--; + ref->data.weak--; } - if (ref->strong == 0 && ref->weak == 0) - binder_delete_ref(ref); - return 0; + if (ref->data.strong == 0 && ref->data.weak == 0) { + binder_cleanup_ref(ref); + /* + * TODO: we could kfree(ref) here, but an upcoming + * patch will call this with a lock held, so we + * return an indication that the ref should be + * freed. + */ + return true; + } + return false; +} + +/** + * binder_get_node_from_ref() - get the node from the given proc/desc + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @need_strong_ref: if true, only return node if ref is strong + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, return the associated binder_node + * + * Return: a binder_node or NULL if not found or not strong when strong required + */ +static struct binder_node *binder_get_node_from_ref( + struct binder_proc *proc, + u32 desc, bool need_strong_ref, + struct binder_ref_data *rdata) +{ + struct binder_node *node; + struct binder_ref *ref; + + ref = binder_get_ref(proc, desc, need_strong_ref); + if (!ref) + goto err_no_ref; + node = ref->node; + if (rdata) + *rdata = ref->data; + + return node; + +err_no_ref: + return NULL; +} + +/** + * binder_free_ref() - free the binder_ref + * @ref: ref to free + * + * Free the binder_ref and the binder_ref_death indicated by ref->death. + */ +static void binder_free_ref(struct binder_ref *ref) +{ + kfree(ref->death); + kfree(ref); +} + +/** + * binder_update_ref_for_handle() - inc/dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @increment: true=inc reference, false=dec reference + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, increment or decrement the ref + * according to "increment" arg. + * + * Return: 0 if successful, else errno + */ +static int binder_update_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool increment, bool strong, + struct binder_ref_data *rdata) +{ + int ret = 0; + struct binder_ref *ref; + bool delete_ref = false; + + ref = binder_get_ref(proc, desc, strong); + if (!ref) { + ret = -EINVAL; + goto err_no_ref; + } + if (increment) + ret = binder_inc_ref(ref, strong, NULL); + else + delete_ref = binder_dec_ref(ref, strong); + + if (rdata) + *rdata = ref->data; + + if (delete_ref) + binder_free_ref(ref); + return ret; + +err_no_ref: + return ret; +} + +/** + * binder_dec_ref_for_handle() - dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Just calls binder_update_ref_for_handle() to decrement the ref. + * + * Return: 0 if successful, else errno + */ +static int binder_dec_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool strong, struct binder_ref_data *rdata) +{ + return binder_update_ref_for_handle(proc, desc, false, strong, rdata); +} + + +/** + * binder_inc_ref_for_node() - increment the ref for given proc/node + * @proc: proc containing the ref + * @node: target node + * @strong: true=strong reference, false=weak reference + * @target_list: worklist to use if node is incremented + * @rdata: the id/refcount data for the ref + * + * Given a proc and node, increment the ref. Create the ref if it + * doesn't already exist + * + * Return: 0 if successful, else errno + */ +static int binder_inc_ref_for_node(struct binder_proc *proc, + struct binder_node *node, + bool strong, + struct list_head *target_list, + struct binder_ref_data *rdata) +{ + struct binder_ref *ref; + struct binder_ref *new_ref = NULL; + int ret = 0; + + ref = binder_get_ref_for_node(proc, node, NULL); + if (!ref) { + new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!new_ref) + return -ENOMEM; + ref = binder_get_ref_for_node(proc, node, new_ref); + } + ret = binder_inc_ref(ref, strong, target_list); + *rdata = ref->data; + if (new_ref && ref != new_ref) + /* + * Another thread created the ref first so + * free the one we allocated + */ + kfree(new_ref); + return ret; } static void binder_pop_transaction(struct binder_thread *target_thread, @@ -1125,20 +1353,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { struct flat_binder_object *fp; - struct binder_ref *ref; + struct binder_ref_data rdata; + int ret; fp = to_flat_binder_object(hdr); - ref = binder_get_ref(proc, fp->handle, - hdr->type == BINDER_TYPE_HANDLE); - if (ref == NULL) { - pr_err("transaction release %d bad handle %d\n", - debug_id, fp->handle); + ret = binder_dec_ref_for_handle(proc, fp->handle, + hdr->type == BINDER_TYPE_HANDLE, &rdata); + + if (ret) { + pr_err("transaction release %d bad handle %d, ret = %d\n", + debug_id, fp->handle, ret); break; } binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, ref->node->debug_id); - binder_dec_ref(ref, hdr->type == BINDER_TYPE_HANDLE); + " ref %d desc %d\n", + rdata.debug_id, rdata.desc); } break; case BINDER_TYPE_FD: { @@ -1210,9 +1439,10 @@ static int binder_translate_binder(struct flat_binder_object *fp, struct binder_thread *thread) { struct binder_node *node; - struct binder_ref *ref; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; + struct binder_ref_data rdata; + int ret; node = binder_get_node(proc, fp->binder); if (!node) { @@ -1233,25 +1463,25 @@ static int binder_translate_binder(struct flat_binder_object *fp, if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) return -EPERM; - ref = binder_get_ref_for_node(target_proc, node); - if (!ref) - return -ENOMEM; + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_BINDER, + &thread->todo, &rdata); + if (ret) + return ret; if (fp->hdr.type == BINDER_TYPE_BINDER) fp->hdr.type = BINDER_TYPE_HANDLE; else fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; fp->binder = 0; - fp->handle = ref->desc; + fp->handle = rdata.desc; fp->cookie = 0; - binder_inc_ref(ref, fp->hdr.type == BINDER_TYPE_HANDLE, &thread->todo); - trace_binder_transaction_node_to_ref(t, node, ref); + trace_binder_transaction_node_to_ref(t, node, &rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx -> ref %d desc %d\n", node->debug_id, (u64)node->ptr, - ref->debug_id, ref->desc); - + rdata.debug_id, rdata.desc); return 0; } @@ -1259,13 +1489,14 @@ static int binder_translate_handle(struct flat_binder_object *fp, struct binder_transaction *t, struct binder_thread *thread) { - struct binder_ref *ref; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; + struct binder_node *node; + struct binder_ref_data src_rdata; - ref = binder_get_ref(proc, fp->handle, - fp->hdr.type == BINDER_TYPE_HANDLE); - if (!ref) { + node = binder_get_node_from_ref(proc, fp->handle, + fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata); + if (!node) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", proc->pid, thread->pid, fp->handle); return -EINVAL; @@ -1273,37 +1504,41 @@ static int binder_translate_handle(struct flat_binder_object *fp, if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) return -EPERM; - if (ref->node->proc == target_proc) { + if (node->proc == target_proc) { if (fp->hdr.type == BINDER_TYPE_HANDLE) fp->hdr.type = BINDER_TYPE_BINDER; else fp->hdr.type = BINDER_TYPE_WEAK_BINDER; - fp->binder = ref->node->ptr; - fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->hdr.type == BINDER_TYPE_BINDER, + fp->binder = node->ptr; + fp->cookie = node->cookie; + binder_inc_node(node, + fp->hdr.type == BINDER_TYPE_BINDER, 0, NULL); - trace_binder_transaction_ref_to_node(t, ref); + trace_binder_transaction_ref_to_node(t, node, &src_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%016llx\n", - ref->debug_id, ref->desc, ref->node->debug_id, - (u64)ref->node->ptr); + src_rdata.debug_id, src_rdata.desc, node->debug_id, + (u64)node->ptr); } else { - struct binder_ref *new_ref; + int ret; + struct binder_ref_data dest_rdata; - new_ref = binder_get_ref_for_node(target_proc, ref->node); - if (!new_ref) - return -ENOMEM; + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_HANDLE, + NULL, &dest_rdata); + if (ret) + return ret; fp->binder = 0; - fp->handle = new_ref->desc; + fp->handle = dest_rdata.desc; fp->cookie = 0; - binder_inc_ref(new_ref, fp->hdr.type == BINDER_TYPE_HANDLE, - NULL); - trace_binder_transaction_ref_to_ref(t, ref, new_ref); + trace_binder_transaction_ref_to_ref(t, node, &src_rdata, + &dest_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, new_ref->debug_id, - new_ref->desc, ref->node->debug_id); + src_rdata.debug_id, src_rdata.desc, + dest_rdata.debug_id, dest_rdata.desc, + node->debug_id); } return 0; } @@ -2044,6 +2279,8 @@ static int binder_thread_write(struct binder_proc *proc, void __user *end = buffer + size; while (ptr < end && thread->return_error.cmd == BR_OK) { + int ret; + if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); @@ -2059,62 +2296,61 @@ static int binder_thread_write(struct binder_proc *proc, case BC_RELEASE: case BC_DECREFS: { uint32_t target; - struct binder_ref *ref = NULL; const char *debug_string; + bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE; + bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE; + struct binder_ref_data rdata; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (target == 0 && - (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) { + ret = -1; + if (increment && !target) { struct binder_node *ctx_mgr_node; - mutex_lock(&context->context_mgr_node_lock); ctx_mgr_node = context->binder_context_mgr_node; - if (ctx_mgr_node) { - ref = binder_get_ref_for_node(proc, - ctx_mgr_node); - if (ref && ref->desc != target) { - binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", - proc->pid, thread->pid, - ref->desc); - } - } + if (ctx_mgr_node) + ret = binder_inc_ref_for_node( + proc, ctx_mgr_node, + strong, NULL, &rdata); mutex_unlock(&context->context_mgr_node_lock); } - if (ref == NULL) - ref = binder_get_ref(proc, target, - cmd == BC_ACQUIRE || - cmd == BC_RELEASE); - if (ref == NULL) { - binder_user_error("%d:%d refcount change on invalid ref %d\n", - proc->pid, thread->pid, target); - break; + if (ret) + ret = binder_update_ref_for_handle( + proc, target, increment, strong, + &rdata); + if (!ret && rdata.desc != target) { + binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n", + proc->pid, thread->pid, + target, rdata.desc); } switch (cmd) { case BC_INCREFS: debug_string = "IncRefs"; - binder_inc_ref(ref, 0, NULL); break; case BC_ACQUIRE: debug_string = "Acquire"; - binder_inc_ref(ref, 1, NULL); break; case BC_RELEASE: debug_string = "Release"; - binder_dec_ref(ref, 1); break; case BC_DECREFS: default: debug_string = "DecRefs"; - binder_dec_ref(ref, 0); + break; + } + if (ret) { + binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n", + proc->pid, thread->pid, debug_string, + strong, target, ret); break; } binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s ref %d desc %d s %d w %d for node %d\n", - proc->pid, thread->pid, debug_string, ref->debug_id, - ref->desc, ref->strong, ref->weak, ref->node->debug_id); + "%d:%d %s ref %d desc %d s %d w %d\n", + proc->pid, thread->pid, debug_string, + rdata.debug_id, rdata.desc, rdata.strong, + rdata.weak); break; } case BC_INCREFS_DONE: @@ -2312,8 +2548,9 @@ static int binder_thread_write(struct binder_proc *proc, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", - (u64)cookie, ref->debug_id, ref->desc, - ref->strong, ref->weak, ref->node->debug_id); + (u64)cookie, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak, ref->node->debug_id); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { @@ -3474,7 +3711,8 @@ static void binder_deferred_release(struct binder_proc *proc) ref = rb_entry(n, struct binder_ref, rb_node_desc); outgoing_refs++; - binder_delete_ref(ref); + binder_cleanup_ref(ref); + binder_free_ref(ref); } binder_release_work(&proc->todo); @@ -3676,9 +3914,11 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) { - seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", - ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", - ref->node->debug_id, ref->strong, ref->weak, ref->death); + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", + ref->data.debug_id, ref->data.desc, + ref->node->proc ? "" : "dead ", + ref->node->debug_id, ref->data.strong, + ref->data.weak, ref->death); } static void print_binder_proc(struct seq_file *m, @@ -3845,8 +4085,8 @@ static void print_binder_proc_stats(struct seq_file *m, struct binder_ref *ref = rb_entry(n, struct binder_ref, rb_node_desc); count++; - strong += ref->strong; - weak += ref->weak; + strong += ref->data.strong; + weak += ref->data.weak; } seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index 50b0d21f42cf..7967db16ba5a 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h @@ -24,7 +24,7 @@ struct binder_buffer; struct binder_node; struct binder_proc; struct binder_alloc; -struct binder_ref; +struct binder_ref_data; struct binder_thread; struct binder_transaction; @@ -147,8 +147,8 @@ TRACE_EVENT(binder_transaction_received, TRACE_EVENT(binder_transaction_node_to_ref, TP_PROTO(struct binder_transaction *t, struct binder_node *node, - struct binder_ref *ref), - TP_ARGS(t, node, ref), + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), TP_STRUCT__entry( __field(int, debug_id) @@ -161,8 +161,8 @@ TRACE_EVENT(binder_transaction_node_to_ref, __entry->debug_id = t->debug_id; __entry->node_debug_id = node->debug_id; __entry->node_ptr = node->ptr; - __entry->ref_debug_id = ref->debug_id; - __entry->ref_desc = ref->desc; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; ), TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d", __entry->debug_id, __entry->node_debug_id, @@ -171,8 +171,9 @@ TRACE_EVENT(binder_transaction_node_to_ref, ); TRACE_EVENT(binder_transaction_ref_to_node, - TP_PROTO(struct binder_transaction *t, struct binder_ref *ref), - TP_ARGS(t, ref), + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), TP_STRUCT__entry( __field(int, debug_id) @@ -183,10 +184,10 @@ TRACE_EVENT(binder_transaction_ref_to_node, ), TP_fast_assign( __entry->debug_id = t->debug_id; - __entry->ref_debug_id = ref->debug_id; - __entry->ref_desc = ref->desc; - __entry->node_debug_id = ref->node->debug_id; - __entry->node_ptr = ref->node->ptr; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; + __entry->node_debug_id = node->debug_id; + __entry->node_ptr = node->ptr; ), TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx", __entry->debug_id, __entry->node_debug_id, @@ -195,9 +196,10 @@ TRACE_EVENT(binder_transaction_ref_to_node, ); TRACE_EVENT(binder_transaction_ref_to_ref, - TP_PROTO(struct binder_transaction *t, struct binder_ref *src_ref, - struct binder_ref *dest_ref), - TP_ARGS(t, src_ref, dest_ref), + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *src_ref, + struct binder_ref_data *dest_ref), + TP_ARGS(t, node, src_ref, dest_ref), TP_STRUCT__entry( __field(int, debug_id) @@ -209,7 +211,7 @@ TRACE_EVENT(binder_transaction_ref_to_ref, ), TP_fast_assign( __entry->debug_id = t->debug_id; - __entry->node_debug_id = src_ref->node->debug_id; + __entry->node_debug_id = node->debug_id; __entry->src_ref_debug_id = src_ref->debug_id; __entry->src_ref_desc = src_ref->desc; __entry->dest_ref_debug_id = dest_ref->debug_id; From 96dd75d9917402f0e3a8243c6152b2e5d662c37f Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 9 May 2017 11:08:05 -0700 Subject: [PATCH 027/176] FROMLIST: binder: use node->tmp_refs to ensure node safety (from https://patchwork.kernel.org/patch/9817795/) When obtaining a node via binder_get_node(), binder_get_node_from_ref() or binder_new_node(), increment node->tmp_refs to take a temporary reference on the node to ensure the node persists while being used. binder_put_node() must be called to remove the temporary reference. Change-Id: I962b39d5cd80b2d7e4786bb87236ede7914e2db7 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 124 ++++++++++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 20 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b7109a356c46..4c636da38b2b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -274,6 +274,7 @@ struct binder_node { int internal_strong_refs; int local_weak_refs; int local_strong_refs; + int tmp_refs; binder_uintptr_t ptr; binder_uintptr_t cookie; unsigned has_strong_ref:1; @@ -427,6 +428,7 @@ static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); +static void binder_inc_node_tmpref(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { @@ -521,8 +523,15 @@ static struct binder_node *binder_get_node(struct binder_proc *proc, n = n->rb_left; else if (ptr > node->ptr) n = n->rb_right; - else + else { + /* + * take an implicit weak reference + * to ensure node stays alive until + * call to binder_put_node() + */ + binder_inc_node_tmpref(node); return node; + } } return NULL; } @@ -551,6 +560,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, if (node == NULL) return NULL; binder_stats_created(BINDER_STAT_NODE); + node->tmp_refs++; rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); node->debug_id = atomic_inc_return(&binder_last_id); @@ -616,7 +626,8 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) } else { if (!internal) node->local_weak_refs--; - if (node->local_weak_refs || !hlist_empty(&node->refs)) + if (node->local_weak_refs || node->tmp_refs || + !hlist_empty(&node->refs)) return 0; } if (node->proc && (node->has_strong_ref || node->has_weak_ref)) { @@ -626,7 +637,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && - !node->local_weak_refs) { + !node->local_weak_refs && !node->tmp_refs) { list_del_init(&node->work.entry); if (node->proc) { rb_erase(&node->rb_node, &node->proc->nodes); @@ -649,6 +660,46 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) return 0; } +/** + * binder_inc_node_tmpref() - take a temporary reference on node + * @node: node to reference + * + * Take reference on node to prevent the node from being freed + * while referenced only by a local variable + */ +static void binder_inc_node_tmpref(struct binder_node *node) +{ + /* + * No call to binder_inc_node() is needed since we + * don't need to inform userspace of any changes to + * tmp_refs + */ + node->tmp_refs++; +} + +/** + * binder_dec_node_tmpref() - remove a temporary reference on node + * @node: node to reference + * + * Release temporary reference on node taken via binder_inc_node_tmpref() + */ +static void binder_dec_node_tmpref(struct binder_node *node) +{ + node->tmp_refs--; + BUG_ON(node->tmp_refs < 0); + /* + * Call binder_dec_node() to check if all refcounts are 0 + * and cleanup is needed. Calling with strong=0 and internal=1 + * causes no actual reference to be released in binder_dec_node(). + * If that changes, a change is needed here too. + */ + binder_dec_node(node, 0, 1); +} + +static void binder_put_node(struct binder_node *node) +{ + binder_dec_node_tmpref(node); +} static struct binder_ref *binder_get_ref(struct binder_proc *proc, u32 desc, bool need_strong_ref) @@ -889,6 +940,11 @@ static struct binder_node *binder_get_node_from_ref( if (!ref) goto err_no_ref; node = ref->node; + /* + * Take an implicit reference on the node to ensure + * it stays alive until the call to binder_put_node() + */ + binder_inc_node_tmpref(node); if (rdata) *rdata = ref->data; @@ -1349,6 +1405,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, node->debug_id, (u64)node->ptr); binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, 0); + binder_put_node(node); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { @@ -1442,7 +1499,7 @@ static int binder_translate_binder(struct flat_binder_object *fp, struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; struct binder_ref_data rdata; - int ret; + int ret = 0; node = binder_get_node(proc, fp->binder); if (!node) { @@ -1458,16 +1515,19 @@ static int binder_translate_binder(struct flat_binder_object *fp, proc->pid, thread->pid, (u64)fp->binder, node->debug_id, (u64)fp->cookie, (u64)node->cookie); - return -EINVAL; + ret = -EINVAL; + goto done; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + ret = -EPERM; + goto done; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) - return -EPERM; ret = binder_inc_ref_for_node(target_proc, node, fp->hdr.type == BINDER_TYPE_BINDER, &thread->todo, &rdata); if (ret) - return ret; + goto done; if (fp->hdr.type == BINDER_TYPE_BINDER) fp->hdr.type = BINDER_TYPE_HANDLE; @@ -1482,7 +1542,9 @@ static int binder_translate_binder(struct flat_binder_object *fp, " node %d u%016llx -> ref %d desc %d\n", node->debug_id, (u64)node->ptr, rdata.debug_id, rdata.desc); - return 0; +done: + binder_put_node(node); + return ret; } static int binder_translate_handle(struct flat_binder_object *fp, @@ -1493,6 +1555,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, struct binder_proc *target_proc = t->to_proc; struct binder_node *node; struct binder_ref_data src_rdata; + int ret = 0; node = binder_get_node_from_ref(proc, fp->handle, fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata); @@ -1501,8 +1564,10 @@ static int binder_translate_handle(struct flat_binder_object *fp, proc->pid, thread->pid, fp->handle); return -EINVAL; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) - return -EPERM; + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + ret = -EPERM; + goto done; + } if (node->proc == target_proc) { if (fp->hdr.type == BINDER_TYPE_HANDLE) @@ -1527,7 +1592,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, fp->hdr.type == BINDER_TYPE_HANDLE, NULL, &dest_rdata); if (ret) - return ret; + goto done; fp->binder = 0; fp->handle = dest_rdata.desc; @@ -1540,7 +1605,9 @@ static int binder_translate_handle(struct flat_binder_object *fp, dest_rdata.debug_id, dest_rdata.desc, node->debug_id); } - return 0; +done: + binder_put_node(node); + return ret; } static int binder_translate_fd(int fd, @@ -2382,6 +2449,7 @@ static int binder_thread_write(struct binder_proc *proc, "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", (u64)node_ptr, node->debug_id, (u64)cookie, (u64)node->cookie); + binder_put_node(node); break; } if (cmd == BC_ACQUIRE_DONE) { @@ -2389,6 +2457,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", proc->pid, thread->pid, node->debug_id); + binder_put_node(node); break; } node->pending_strong_ref = 0; @@ -2397,16 +2466,19 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", proc->pid, thread->pid, node->debug_id); + binder_put_node(node); break; } node->pending_weak_ref = 0; } binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0); binder_debug(BINDER_DEBUG_USER_REFS, - "%d:%d %s node %d ls %d lw %d\n", + "%d:%d %s node %d ls %d lw %d tr %d\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", - node->debug_id, node->local_strong_refs, node->local_weak_refs); + node->debug_id, node->local_strong_refs, + node->local_weak_refs, node->tmp_refs); + binder_put_node(node); break; } case BC_ATTEMPT_ACQUIRE: @@ -2846,7 +2918,8 @@ retry: strong = node->internal_strong_refs || node->local_strong_refs; weak = !hlist_empty(&node->refs) || - node->local_weak_refs || strong; + node->local_weak_refs || + node->tmp_refs || strong; has_strong_ref = node->has_strong_ref; has_weak_ref = node->has_weak_ref; @@ -3358,6 +3431,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) new_node->has_strong_ref = 1; new_node->has_weak_ref = 1; context->binder_context_mgr_node = new_node; + binder_put_node(new_node); out: mutex_unlock(&context->context_mgr_node_lock); return ret; @@ -3616,8 +3690,11 @@ static int binder_node_release(struct binder_node *node, int refs) list_del_init(&node->work.entry); binder_release_work(&node->async_todo); - - if (hlist_empty(&node->refs)) { + /* + * The caller must have taken a temporary ref on the node, + */ + BUG_ON(!node->tmp_refs); + if (hlist_empty(&node->refs) && node->tmp_refs == 1) { kfree(node); binder_stats_deleted(BINDER_STAT_NODE); @@ -3652,6 +3729,7 @@ static int binder_node_release(struct binder_node *node, int refs) binder_debug(BINDER_DEBUG_DEAD_BINDER, "node %d now dead, refs %d, death %d\n", node->debug_id, refs, death); + binder_put_node(node); return refs; } @@ -3701,6 +3779,12 @@ static void binder_deferred_release(struct binder_proc *proc) node = rb_entry(n, struct binder_node, rb_node); nodes++; + /* + * take a temporary ref on the node before + * calling binder_node_release() which will either + * kfree() the node or call binder_put_node() + */ + binder_inc_node_tmpref(node); rb_erase(&node->rb_node, &proc->nodes); incoming_refs = binder_node_release(node, incoming_refs); } @@ -3896,11 +3980,11 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) hlist_for_each_entry(ref, &node->refs, node_entry) count++; - seq_printf(m, " node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d", + seq_printf(m, " node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d tr %d", node->debug_id, (u64)node->ptr, (u64)node->cookie, node->has_strong_ref, node->has_weak_ref, node->local_strong_refs, node->local_weak_refs, - node->internal_strong_refs, count); + node->internal_strong_refs, count, node->tmp_refs); if (count) { seq_puts(m, " proc"); hlist_for_each_entry(ref, &node->refs, node_entry) From b0f59d6d045c31a10cc0175fdb34ba5ae067b5b2 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 29 May 2017 16:44:24 -0700 Subject: [PATCH 028/176] FROMLIST: binder: introduce locking helper functions (from https://patchwork.kernel.org/patch/9817791/) There are 3 main spinlocks which must be acquired in this order: 1) proc->outer_lock : protects most fields of binder_proc, binder_thread, and binder_ref structures. binder_proc_lock() and binder_proc_unlock() are used to acq/rel. 2) node->lock : protects most fields of binder_node. binder_node_lock() and binder_node_unlock() are used to acq/rel 3) proc->inner_lock : protects the thread and node lists (proc->threads, proc->nodes) and all todo lists associated with the binder_proc (proc->todo, thread->todo, proc->delivered_death and node->async_todo). binder_inner_proc_lock() and binder_inner_proc_unlock() are used to acq/rel Any lock under procA must never be nested under any lock at the same level or below on procB. Functions that require a lock held on entry indicate which lock in the suffix of the function name: foo_olocked() : requires node->outer_lock foo_nlocked() : requires node->lock foo_ilocked() : requires proc->inner_lock foo_iolocked(): requires proc->outer_lock and proc->inner_lock foo_nilocked(): requires node->lock and proc->inner_lock Change-Id: Ied42674486092a0e3bdde64356e45b2494844558 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 238 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4c636da38b2b..86eec8db3b4f 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -15,6 +15,39 @@ * */ +/* + * Locking overview + * + * There are 3 main spinlocks which must be acquired in the + * order shown: + * + * 1) proc->outer_lock : protects binder_ref + * binder_proc_lock() and binder_proc_unlock() are + * used to acq/rel. + * 2) node->lock : protects most fields of binder_node. + * binder_node_lock() and binder_node_unlock() are + * used to acq/rel + * 3) proc->inner_lock : protects the thread and node lists + * (proc->threads, proc->nodes) and all todo lists associated + * with the binder_proc (proc->todo, thread->todo, + * proc->delivered_death and node->async_todo). + * binder_inner_proc_lock() and binder_inner_proc_unlock() + * are used to acq/rel + * + * Any lock under procA must never be nested under any lock at the same + * level or below on procB. + * + * Functions that require a lock held on entry indicate which lock + * in the suffix of the function name: + * + * foo_olocked() : requires node->outer_lock + * foo_nlocked() : requires node->lock + * foo_ilocked() : requires proc->inner_lock + * foo_oilocked(): requires proc->outer_lock and proc->inner_lock + * foo_nilocked(): requires node->lock and proc->inner_lock + * ... + */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -35,6 +68,7 @@ #include #include #include +#include #ifdef CONFIG_ANDROID_BINDER_IPC_32BIT #define BINDER_IPC_32BIT 1 @@ -106,6 +140,7 @@ enum { BINDER_DEBUG_FREE_BUFFER = 1U << 11, BINDER_DEBUG_INTERNAL_REFS = 1U << 12, BINDER_DEBUG_PRIORITY_CAP = 1U << 13, + BINDER_DEBUG_SPINLOCKS = 1U << 14, }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; @@ -262,8 +297,43 @@ struct binder_error { uint32_t cmd; }; +/** + * struct binder_node - binder node bookkeeping + * @debug_id: unique ID for debugging + * (invariant after initialized) + * @lock: lock for node fields + * @work: worklist element for node work + * @rb_node: element for proc->nodes tree + * @dead_node: element for binder_dead_nodes list + * (protected by binder_dead_nodes_lock) + * @proc: binder_proc that owns this node + * (invariant after initialized) + * @refs: list of references on this node + * @internal_strong_refs: used to take strong references when + * initiating a transaction + * @local_weak_refs: weak user refs from local process + * @local_strong_refs: strong user refs from local process + * @tmp_refs: temporary kernel refs + * @ptr: userspace pointer for node + * (invariant, no lock needed) + * @cookie: userspace cookie for node + * (invariant, no lock needed) + * @has_strong_ref: userspace notified of strong ref + * @pending_strong_ref: userspace has acked notification of strong ref + * @has_weak_ref: userspace notified of weak ref + * @pending_weak_ref: userspace has acked notification of weak ref + * @has_async_transaction: async transaction to node in progress + * @accept_fds: file descriptor operations supported for node + * (invariant after initialized) + * @min_priority: minimum scheduling priority + * (invariant after initialized) + * @async_todo: list of async work items + * + * Bookkeeping structure for binder nodes. + */ struct binder_node { int debug_id; + spinlock_t lock; struct binder_work work; union { struct rb_node rb_node; @@ -346,6 +416,51 @@ enum binder_deferred_state { BINDER_DEFERRED_RELEASE = 0x04, }; +/** + * struct binder_proc - binder process bookkeeping + * @proc_node: element for binder_procs list + * @threads: rbtree of binder_threads in this proc + * @nodes: rbtree of binder nodes associated with + * this proc ordered by node->ptr + * @refs_by_desc: rbtree of refs ordered by ref->desc + * @refs_by_node: rbtree of refs ordered by ref->node + * @pid PID of group_leader of process + * (invariant after initialized) + * @tsk task_struct for group_leader of process + * (invariant after initialized) + * @files files_struct for process + * (invariant after initialized) + * @deferred_work_node: element for binder_deferred_list + * (protected by binder_deferred_lock) + * @deferred_work: bitmap of deferred work to perform + * (protected by binder_deferred_lock) + * @is_dead: process is dead and awaiting free + * when outstanding transactions are cleaned up + * @todo: list of work for this process + * @wait: wait queue head to wait for proc work + * (invariant after initialized) + * @stats: per-process binder statistics + * (atomics, no lock needed) + * @delivered_death: list of delivered death notification + * @max_threads: cap on number of binder threads + * @requested_threads: number of binder threads requested but not + * yet started. In current implementation, can + * only be 0 or 1. + * @requested_threads_started: number binder threads started + * @ready_threads: number of threads waiting for proc work + * @tmp_ref: temporary reference to indicate proc is in use + * @default_priority: default scheduler priority + * (invariant after initialized) + * @debugfs_entry: debugfs node + * @alloc: binder allocator bookkeeping + * @context: binder_context for this proc + * (invariant after initialized) + * @inner_lock: can nest under outer_lock and/or node lock + * @outer_lock: no nesting under innor or node lock + * Lock order: 1) outer, 2) node, 3) inner + * + * Bookkeeping structure for binder processes + */ struct binder_proc { struct hlist_node proc_node; struct rb_root threads; @@ -372,6 +487,8 @@ struct binder_proc { struct dentry *debugfs_entry; struct binder_alloc alloc; struct binder_context *context; + spinlock_t inner_lock; + spinlock_t outer_lock; }; enum { @@ -382,6 +499,33 @@ enum { BINDER_LOOPER_STATE_WAITING = 0x10, }; +/** + * struct binder_thread - binder thread bookkeeping + * @proc: binder process for this thread + * (invariant after initialization) + * @rb_node: element for proc->threads rbtree + * @pid: PID for this thread + * (invariant after initialization) + * @looper: bitmap of looping state + * (only accessed by this thread) + * @looper_needs_return: looping thread needs to exit driver + * (no lock needed) + * @transaction_stack: stack of in-progress transactions for this thread + * @todo: list of work to do for this thread + * @return_error: transaction errors reported by this thread + * (only accessed by this thread) + * @reply_error: transaction errors reported by target thread + * @wait: wait queue for thread work + * @stats: per-thread statistics + * (atomics, no lock needed) + * @tmp_ref: temporary reference to indicate thread is in use + * (atomic since @proc->inner_lock cannot + * always be acquired) + * @is_dead: thread is dead and awaiting free + * when outstanding transactions are cleaned up + * + * Bookkeeping structure for binder threads. + */ struct binder_thread { struct binder_proc *proc; struct rb_node rb_node; @@ -424,6 +568,97 @@ struct binder_transaction { spinlock_t lock; }; +/** + * binder_proc_lock() - Acquire outer lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->outer_lock. Used to protect binder_ref + * structures associated with the given proc. + */ +#define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) +static void +_binder_proc_lock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->outer_lock); +} + +/** + * binder_proc_unlock() - Release spinlock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_proc_lock() + */ +#define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__) +static void +_binder_proc_unlock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->outer_lock); +} + +/** + * binder_inner_proc_lock() - Acquire inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->inner_lock. Used to protect todo lists + */ +#define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) +static void +_binder_inner_proc_lock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->inner_lock); +} + +/** + * binder_inner_proc_unlock() - Release inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_inner_proc_lock() + */ +#define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) +static void +_binder_inner_proc_unlock(struct binder_proc *proc, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->inner_lock); +} + +/** + * binder_node_lock() - Acquire spinlock for given binder_node + * @node: struct binder_node to acquire + * + * Acquires node->lock. Used to protect binder_node fields + */ +#define binder_node_lock(node) _binder_node_lock(node, __LINE__) +static void +_binder_node_lock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); +} + +/** + * binder_node_unlock() - Release spinlock for given binder_proc + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) +static void +_binder_node_unlock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&node->lock); +} + static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); static void binder_free_thread(struct binder_thread *thread); @@ -568,6 +803,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; + spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); binder_debug(BINDER_DEBUG_INTERNAL_REFS, @@ -3600,6 +3836,8 @@ static int binder_open(struct inode *nodp, struct file *filp) proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) return -ENOMEM; + spin_lock_init(&proc->inner_lock); + spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; INIT_LIST_HEAD(&proc->todo); From f73f378b52e81acdb9654a5bf81661d9ae461e62 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 21 Mar 2017 13:06:01 -0700 Subject: [PATCH 029/176] FROMLIST: binder: use inner lock to sync work dq and node counts (from https://patchwork.kernel.org/patch/9817789/) For correct behavior we need to hold the inner lock when dequeuing and processing node work in binder_thread_read. We now hold the inner lock when we enter the switch statement and release it after processing anything that might be affected by other threads. We also need to hold the inner lock to protect the node weak/strong ref tracking fields as long as node->proc is non-NULL (if it is NULL then we are guaranteed that we don't have any node work queued). This means that other functions that manipulate these fields must hold the inner lock. Refactored these functions to use the inner lock. Change-Id: I02c5cfdd3ab6dadea7f07f2a275faf3e27be77ad Test: tested manually Signed-off-by: Todd Kjos --- drivers/android/binder.c | 253 +++++++++++++++++++++++++++++++-------- 1 file changed, 200 insertions(+), 53 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 86eec8db3b4f..0bc8fe034234 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -311,17 +311,36 @@ struct binder_error { * @refs: list of references on this node * @internal_strong_refs: used to take strong references when * initiating a transaction + * (protected by @proc->inner_lock if @proc + * and by @lock) * @local_weak_refs: weak user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) * @local_strong_refs: strong user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) * @tmp_refs: temporary kernel refs + * (protected by @proc->inner_lock while @proc + * is valid, and by binder_dead_nodes_lock + * if @proc is NULL. During inc/dec and node release + * it is also protected by @lock to provide safety + * as the node dies and @proc becomes NULL) * @ptr: userspace pointer for node * (invariant, no lock needed) * @cookie: userspace cookie for node * (invariant, no lock needed) * @has_strong_ref: userspace notified of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) * @pending_strong_ref: userspace has acked notification of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) * @has_weak_ref: userspace notified of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) * @pending_weak_ref: userspace has acked notification of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) * @has_async_transaction: async transaction to node in progress * @accept_fds: file descriptor operations supported for node * (invariant after initialized) @@ -347,13 +366,24 @@ struct binder_node { int tmp_refs; binder_uintptr_t ptr; binder_uintptr_t cookie; - unsigned has_strong_ref:1; - unsigned pending_strong_ref:1; - unsigned has_weak_ref:1; - unsigned pending_weak_ref:1; - unsigned has_async_transaction:1; - unsigned accept_fds:1; - unsigned min_priority:8; + struct { + /* + * bitfield elements protected by + * proc inner_lock + */ + u8 has_strong_ref:1; + u8 pending_strong_ref:1; + u8 has_weak_ref:1; + u8 pending_weak_ref:1; + }; + struct { + /* + * invariant after initialization + */ + u8 accept_fds:1; + u8 min_priority; + }; + bool has_async_transaction; struct list_head async_todo; }; @@ -813,9 +843,18 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, return node; } -static int binder_inc_node(struct binder_node *node, int strong, int internal, - struct list_head *target_list) +static void binder_free_node(struct binder_node *node) { + kfree(node); + binder_stats_deleted(BINDER_STAT_NODE); +} + +static int binder_inc_node_ilocked(struct binder_node *node, int strong, + int internal, + struct list_head *target_list) +{ + if (node->proc) + BUG_ON(!spin_is_locked(&node->proc->inner_lock)); if (strong) { if (internal) { if (target_list == NULL && @@ -850,23 +889,43 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, return 0; } -static int binder_dec_node(struct binder_node *node, int strong, int internal) +static int binder_inc_node(struct binder_node *node, int strong, int internal, + struct list_head *target_list) { + int ret; + + if (node->proc) + binder_inner_proc_lock(node->proc); + ret = binder_inc_node_ilocked(node, strong, internal, target_list); + if (node->proc) + binder_inner_proc_unlock(node->proc); + + return ret; +} + +static bool binder_dec_node_ilocked(struct binder_node *node, + int strong, int internal) +{ + struct binder_proc *proc = node->proc; + + if (proc) + BUG_ON(!spin_is_locked(&proc->inner_lock)); if (strong) { if (internal) node->internal_strong_refs--; else node->local_strong_refs--; if (node->local_strong_refs || node->internal_strong_refs) - return 0; + return false; } else { if (!internal) node->local_weak_refs--; if (node->local_weak_refs || node->tmp_refs || !hlist_empty(&node->refs)) - return 0; + return false; } - if (node->proc && (node->has_strong_ref || node->has_weak_ref)) { + + if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { list_add_tail(&node->work.entry, &node->proc->todo); wake_up_interruptible(&node->proc->wait); @@ -875,35 +934,48 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) if (hlist_empty(&node->refs) && !node->local_strong_refs && !node->local_weak_refs && !node->tmp_refs) { list_del_init(&node->work.entry); - if (node->proc) { + if (proc) { rb_erase(&node->rb_node, &node->proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "refless node %d deleted\n", node->debug_id); } else { spin_lock(&binder_dead_nodes_lock); + /* + * tmp_refs could have changed so + * check it again + */ + if (node->tmp_refs) { + spin_unlock(&binder_dead_nodes_lock); + return false; + } hlist_del(&node->dead_node); spin_unlock(&binder_dead_nodes_lock); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "dead node %d deleted\n", node->debug_id); } - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); + return true; } } - - return 0; + return false; } -/** - * binder_inc_node_tmpref() - take a temporary reference on node - * @node: node to reference - * - * Take reference on node to prevent the node from being freed - * while referenced only by a local variable - */ -static void binder_inc_node_tmpref(struct binder_node *node) +static void binder_dec_node(struct binder_node *node, int strong, int internal) +{ + bool free_node; + + if (node->proc) + binder_inner_proc_lock(node->proc); + free_node = binder_dec_node_ilocked(node, strong, internal); + if (node->proc) + binder_inner_proc_unlock(node->proc); + + if (free_node) + binder_free_node(node); +} + +static void binder_inc_node_tmpref_ilocked(struct binder_node *node) { /* * No call to binder_inc_node() is needed since we @@ -913,6 +985,32 @@ static void binder_inc_node_tmpref(struct binder_node *node) node->tmp_refs++; } +/** + * binder_inc_node_tmpref() - take a temporary reference on node + * @node: node to reference + * + * Take reference on node to prevent the node from being freed + * while referenced only by a local variable. The inner lock is + * needed to serialize with the node work on the queue (which + * isn't needed after the node is dead). If the node is dead + * (node->proc is NULL), use binder_dead_nodes_lock to protect + * node->tmp_refs against dead-node-only cases where the node + * lock cannot be acquired (eg traversing the dead node list to + * print nodes) + */ +static void binder_inc_node_tmpref(struct binder_node *node) +{ + if (node->proc) + binder_inner_proc_lock(node->proc); + else + spin_lock(&binder_dead_nodes_lock); + binder_inc_node_tmpref_ilocked(node); + if (node->proc) + binder_inner_proc_unlock(node->proc); + else + spin_unlock(&binder_dead_nodes_lock); +} + /** * binder_dec_node_tmpref() - remove a temporary reference on node * @node: node to reference @@ -921,15 +1019,27 @@ static void binder_inc_node_tmpref(struct binder_node *node) */ static void binder_dec_node_tmpref(struct binder_node *node) { + bool free_node; + + if (node->proc) + binder_inner_proc_lock(node->proc); + else + spin_lock(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); + if (!node->proc) + spin_unlock(&binder_dead_nodes_lock); /* * Call binder_dec_node() to check if all refcounts are 0 * and cleanup is needed. Calling with strong=0 and internal=1 * causes no actual reference to be released in binder_dec_node(). * If that changes, a change is needed here too. */ - binder_dec_node(node, 0, 1); + free_node = binder_dec_node_ilocked(node, 0, 1); + if (node->proc) + binder_inner_proc_unlock(node->proc); + if (free_node) + binder_free_node(node); } static void binder_put_node(struct binder_node *node) @@ -1042,6 +1152,9 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, static void binder_cleanup_ref(struct binder_ref *ref) { + bool delete_node = false; + struct binder_proc *node_proc = ref->node->proc; + binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d delete ref %d desc %d for node %d\n", ref->proc->pid, ref->data.debug_id, ref->data.desc, @@ -1050,11 +1163,26 @@ static void binder_cleanup_ref(struct binder_ref *ref) rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); + if (node_proc) + binder_inner_proc_lock(node_proc); if (ref->data.strong) - binder_dec_node(ref->node, 1, 1); + binder_dec_node_ilocked(ref->node, 1, 1); hlist_del(&ref->node_entry); - binder_dec_node(ref->node, 0, 1); + delete_node = binder_dec_node_ilocked(ref->node, 0, 1); + if (node_proc) + binder_inner_proc_unlock(node_proc); + /* + * Clear ref->node unless we want the caller to free the node + */ + if (!delete_node) { + /* + * The caller uses ref->node to determine + * whether the node needs to be freed. Clear + * it since the node is still alive. + */ + ref->node = NULL; + } if (ref->death) { binder_debug(BINDER_DEBUG_DEAD_BINDER, @@ -1123,13 +1251,8 @@ static bool binder_dec_ref(struct binder_ref *ref, int strong) return false; } ref->data.strong--; - if (ref->data.strong == 0) { - int ret; - - ret = binder_dec_node(ref->node, strong, 1); - if (ret) - return false; - } + if (ref->data.strong == 0) + binder_dec_node(ref->node, strong, 1); } else { if (ref->data.weak == 0) { binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n", @@ -1194,10 +1317,13 @@ err_no_ref: * binder_free_ref() - free the binder_ref * @ref: ref to free * - * Free the binder_ref and the binder_ref_death indicated by ref->death. + * Free the binder_ref. Free the binder_node indicated by ref->node + * (if non-NULL) and the binder_ref_death indicated by ref->death. */ static void binder_free_ref(struct binder_ref *ref) { + if (ref->node) + binder_free_node(ref->node); kfree(ref->death); kfree(ref); } @@ -2688,11 +2814,13 @@ static int binder_thread_write(struct binder_proc *proc, binder_put_node(node); break; } + binder_inner_proc_lock(proc); if (cmd == BC_ACQUIRE_DONE) { if (node->pending_strong_ref == 0) { binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", proc->pid, thread->pid, node->debug_id); + binder_inner_proc_unlock(proc); binder_put_node(node); break; } @@ -2702,11 +2830,13 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", proc->pid, thread->pid, node->debug_id); + binder_inner_proc_unlock(proc); binder_put_node(node); break; } node->pending_weak_ref = 0; } + binder_inner_proc_unlock(proc); binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0); binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s node %d ls %d lw %d tr %d\n", @@ -3092,6 +3222,7 @@ retry: struct binder_transaction *t = NULL; struct binder_thread *t_from; + binder_inner_proc_lock(proc); if (!list_empty(&thread->todo)) { w = list_first_entry(&thread->todo, struct binder_work, entry); @@ -3105,11 +3236,15 @@ retry: break; } - if (end - ptr < sizeof(tr) + 4) + if (end - ptr < sizeof(tr) + 4) { + binder_inner_proc_unlock(proc); break; + } + list_del_init(&w->entry); switch (w->type) { case BINDER_WORK_TRANSACTION: { + binder_inner_proc_unlock(proc); t = container_of(w, struct binder_transaction, work); } break; case BINDER_WORK_RETURN_ERROR: { @@ -3117,15 +3252,16 @@ retry: w, struct binder_error, work); WARN_ON(e->cmd == BR_OK); + binder_inner_proc_unlock(proc); if (put_user(e->cmd, (uint32_t __user *)ptr)) return -EFAULT; e->cmd = BR_OK; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, cmd); - list_del(&w->entry); } break; case BINDER_WORK_TRANSACTION_COMPLETE: { + binder_inner_proc_unlock(proc); cmd = BR_TRANSACTION_COMPLETE; if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; @@ -3135,8 +3271,6 @@ retry: binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE, "%d:%d BR_TRANSACTION_COMPLETE\n", proc->pid, thread->pid); - - list_del(&w->entry); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; @@ -3173,8 +3307,6 @@ retry: node->has_strong_ref = 0; if (!weak && has_weak_ref) node->has_weak_ref = 0; - list_del(&w->entry); - if (!weak && !strong) { binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx deleted\n", @@ -3183,9 +3315,11 @@ retry: (u64)node_ptr, (u64)node_cookie); rb_erase(&node->rb_node, &proc->nodes); - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); - } + binder_inner_proc_unlock(proc); + binder_free_node(node); + } else + binder_inner_proc_unlock(proc); + if (weak && !has_weak_ref) ret = binder_put_node_cmd( proc, thread, &ptr, node_ptr, @@ -3227,6 +3361,13 @@ retry: cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; else cmd = BR_DEAD_BINDER; + /* + * TODO: there is a race condition between + * death notification requests and delivery + * of the notifications. This will be handled + * in a later patch. + */ + binder_inner_proc_unlock(proc); if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); @@ -3244,11 +3385,14 @@ retry: (u64)death->cookie); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { - list_del(&w->entry); kfree(death); binder_stats_deleted(BINDER_STAT_DEATH); - } else - list_move(&w->entry, &proc->delivered_death); + } else { + binder_inner_proc_lock(proc); + list_add_tail(&w->entry, + &proc->delivered_death); + binder_inner_proc_unlock(proc); + } if (cmd == BR_DEAD_BINDER) goto done; /* DEAD_BINDER notifications can cause transactions */ } break; @@ -3326,7 +3470,6 @@ retry: if (t_from) binder_thread_dec_tmpref(t_from); - list_del(&t->work.entry); t->buffer->allow_user_free = 1; if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) { t->to_parent = thread->transaction_stack; @@ -3925,16 +4068,19 @@ static int binder_node_release(struct binder_node *node, int refs) { struct binder_ref *ref; int death = 0; + struct binder_proc *proc = node->proc; - list_del_init(&node->work.entry); binder_release_work(&node->async_todo); + + binder_inner_proc_lock(proc); + list_del_init(&node->work.entry); /* * The caller must have taken a temporary ref on the node, */ BUG_ON(!node->tmp_refs); if (hlist_empty(&node->refs) && node->tmp_refs == 1) { - kfree(node); - binder_stats_deleted(BINDER_STAT_NODE); + binder_inner_proc_unlock(proc); + binder_free_node(node); return refs; } @@ -3942,6 +4088,7 @@ static int binder_node_release(struct binder_node *node, int refs) node->proc = NULL; node->local_strong_refs = 0; node->local_weak_refs = 0; + binder_inner_proc_unlock(proc); spin_lock(&binder_dead_nodes_lock); hlist_add_head(&node->dead_node, &binder_dead_nodes); From 57628830c30714d7231f7d5757b968184c8a7e3c Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 20 Oct 2016 10:33:00 -0700 Subject: [PATCH 030/176] FROMLIST: binder: add spinlocks to protect todo lists (from https://patchwork.kernel.org/patch/9817769/) The todo lists in the proc, thread, and node structures are accessed by other procs/threads to place work items on the queue. The todo lists are protected by the new proc->inner_lock. No locks should ever be nested under these locks. As the name suggests, an outer lock will be introduced in a later patch. Change-Id: I7720bacf5ebae4af177e22fcab0900d54c94c11a Signed-off-by: Todd Kjos --- drivers/android/binder.c | 355 +++++++++++++++++++++++++++++---------- 1 file changed, 269 insertions(+), 86 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0bc8fe034234..36074ce8d1b1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -279,8 +279,16 @@ struct binder_device { struct binder_context context; }; +/** + * struct binder_work - work enqueued on a worklist + * @entry: node enqueued on list + * @type: type of work to be performed + * + * There are separate work lists for proc, thread, and node (async). + */ struct binder_work { struct list_head entry; + enum { BINDER_WORK_TRANSACTION = 1, BINDER_WORK_TRANSACTION_COMPLETE, @@ -303,6 +311,7 @@ struct binder_error { * (invariant after initialized) * @lock: lock for node fields * @work: worklist element for node work + * (protected by @proc->inner_lock) * @rb_node: element for proc->nodes tree * @dead_node: element for binder_dead_nodes list * (protected by binder_dead_nodes_lock) @@ -347,6 +356,7 @@ struct binder_error { * @min_priority: minimum scheduling priority * (invariant after initialized) * @async_todo: list of async work items + * (protected by @proc->inner_lock) * * Bookkeeping structure for binder nodes. */ @@ -388,6 +398,11 @@ struct binder_node { }; struct binder_ref_death { + /** + * @work: worklist element for death notifications + * (protected by inner_lock of the proc that + * this ref belongs to) + */ struct binder_work work; binder_uintptr_t cookie; }; @@ -467,11 +482,13 @@ enum binder_deferred_state { * @is_dead: process is dead and awaiting free * when outstanding transactions are cleaned up * @todo: list of work for this process + * (protected by @inner_lock) * @wait: wait queue head to wait for proc work * (invariant after initialized) * @stats: per-process binder statistics * (atomics, no lock needed) * @delivered_death: list of delivered death notification + * (protected by @inner_lock) * @max_threads: cap on number of binder threads * @requested_threads: number of binder threads requested but not * yet started. In current implementation, can @@ -542,6 +559,7 @@ enum { * (no lock needed) * @transaction_stack: stack of in-progress transactions for this thread * @todo: list of work to do for this thread + * (protected by @proc->inner_lock) * @return_error: transaction errors reported by this thread * (only accessed by this thread) * @reply_error: transaction errors reported by target thread @@ -689,6 +707,111 @@ _binder_node_unlock(struct binder_node *node, int line) spin_unlock(&node->lock); } +static bool binder_worklist_empty_ilocked(struct list_head *list) +{ + return list_empty(list); +} + +/** + * binder_worklist_empty() - Check if no items on the work list + * @proc: binder_proc associated with list + * @list: list to check + * + * Return: true if there are no items on list, else false + */ +static bool binder_worklist_empty(struct binder_proc *proc, + struct list_head *list) +{ + bool ret; + + binder_inner_proc_lock(proc); + ret = binder_worklist_empty_ilocked(list); + binder_inner_proc_unlock(proc); + return ret; +} + +static void +binder_enqueue_work_ilocked(struct binder_work *work, + struct list_head *target_list) +{ + BUG_ON(target_list == NULL); + BUG_ON(work->entry.next && !list_empty(&work->entry)); + list_add_tail(&work->entry, target_list); +} + +/** + * binder_enqueue_work() - Add an item to the work list + * @proc: binder_proc associated with list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + */ +static void +binder_enqueue_work(struct binder_proc *proc, + struct binder_work *work, + struct list_head *target_list) +{ + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked(work, target_list); + binder_inner_proc_unlock(proc); +} + +static void +binder_dequeue_work_ilocked(struct binder_work *work) +{ + list_del_init(&work->entry); +} + +/** + * binder_dequeue_work() - Removes an item from the work list + * @proc: binder_proc associated with list + * @work: struct binder_work to remove from list + * + * Removes the specified work item from whatever list it is on. + * Can safely be called if work is not on any list. + */ +static void +binder_dequeue_work(struct binder_proc *proc, struct binder_work *work) +{ + binder_inner_proc_lock(proc); + binder_dequeue_work_ilocked(work); + binder_inner_proc_unlock(proc); +} + +static struct binder_work *binder_dequeue_work_head_ilocked( + struct list_head *list) +{ + struct binder_work *w; + + w = list_first_entry_or_null(list, struct binder_work, entry); + if (w) + list_del_init(&w->entry); + return w; +} + +/** + * binder_dequeue_work_head() - Dequeues the item at head of list + * @proc: binder_proc associated with list + * @list: list to dequeue head + * + * Removes the head of the list if there are items on the list + * + * Return: pointer dequeued binder_work, NULL if list was empty + */ +static struct binder_work *binder_dequeue_work_head( + struct binder_proc *proc, + struct list_head *list) +{ + struct binder_work *w; + + binder_inner_proc_lock(proc); + w = binder_dequeue_work_head_ilocked(list); + binder_inner_proc_unlock(proc); + return w; +} + static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); static void binder_free_thread(struct binder_thread *thread); @@ -871,8 +994,8 @@ static int binder_inc_node_ilocked(struct binder_node *node, int strong, } else node->local_strong_refs++; if (!node->has_strong_ref && target_list) { - list_del_init(&node->work.entry); - list_add_tail(&node->work.entry, target_list); + binder_dequeue_work_ilocked(&node->work); + binder_enqueue_work_ilocked(&node->work, target_list); } } else { if (!internal) @@ -883,7 +1006,7 @@ static int binder_inc_node_ilocked(struct binder_node *node, int strong, node->debug_id); return -EINVAL; } - list_add_tail(&node->work.entry, target_list); + binder_enqueue_work_ilocked(&node->work, target_list); } } return 0; @@ -927,19 +1050,20 @@ static bool binder_dec_node_ilocked(struct binder_node *node, if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { - list_add_tail(&node->work.entry, &node->proc->todo); + binder_enqueue_work_ilocked(&node->work, &proc->todo); wake_up_interruptible(&node->proc->wait); } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && !node->local_weak_refs && !node->tmp_refs) { - list_del_init(&node->work.entry); if (proc) { - rb_erase(&node->rb_node, &node->proc->nodes); + binder_dequeue_work_ilocked(&node->work); + rb_erase(&node->rb_node, &proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "refless node %d deleted\n", node->debug_id); } else { + BUG_ON(!list_empty(&node->work.entry)); spin_lock(&binder_dead_nodes_lock); /* * tmp_refs could have changed so @@ -1189,7 +1313,7 @@ static void binder_cleanup_ref(struct binder_ref *ref) "%d delete ref %d desc %d has death notification\n", ref->proc->pid, ref->data.debug_id, ref->data.desc); - list_del(&ref->death->work.entry); + binder_dequeue_work(ref->proc, &ref->death->work); binder_stats_deleted(BINDER_STAT_DEATH); } binder_stats_deleted(BINDER_STAT_REF); @@ -1540,8 +1664,9 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_pop_transaction(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; - list_add_tail( - &target_thread->reply_error.work.entry, + binder_enqueue_work( + target_thread->proc, + &target_thread->reply_error.work, &target_thread->todo); wake_up_interruptible(&target_thread->wait); } else { @@ -2579,7 +2704,7 @@ static void binder_transaction(struct binder_proc *proc, } } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - list_add_tail(&tcomplete->entry, &thread->todo); + binder_enqueue_work(proc, tcomplete, &thread->todo); if (reply) { if (target_thread->is_dead) @@ -2610,7 +2735,7 @@ static void binder_transaction(struct binder_proc *proc, goto err_dead_proc_or_thread; } t->work.type = BINDER_WORK_TRANSACTION; - list_add_tail(&t->work.entry, target_list); + binder_enqueue_work(target_proc, &t->work, target_list); if (target_wait) { if (reply || !(tr->flags & TF_ONE_WAY)) wake_up_interruptible_sync(target_wait); @@ -2686,13 +2811,15 @@ err_no_context_mgr_node: BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { thread->return_error.cmd = BR_TRANSACTION_COMPLETE; - list_add_tail(&thread->return_error.work.entry, - &thread->todo); + binder_enqueue_work(thread->proc, + &thread->return_error.work, + &thread->todo); binder_send_failed_reply(in_reply_to, return_error); } else { thread->return_error.cmd = return_error; - list_add_tail(&thread->return_error.work.entry, - &thread->todo); + binder_enqueue_work(thread->proc, + &thread->return_error.work, + &thread->todo); } } @@ -2885,11 +3012,21 @@ static int binder_thread_write(struct binder_proc *proc, buffer->transaction = NULL; } if (buffer->async_transaction && buffer->target_node) { - BUG_ON(!buffer->target_node->has_async_transaction); - if (list_empty(&buffer->target_node->async_todo)) - buffer->target_node->has_async_transaction = 0; + struct binder_node *buf_node; + struct binder_work *w; + + buf_node = buffer->target_node; + BUG_ON(!buf_node->has_async_transaction); + BUG_ON(buf_node->proc != proc); + binder_inner_proc_lock(proc); + w = binder_dequeue_work_head_ilocked( + &buf_node->async_todo); + if (!w) + buf_node->has_async_transaction = 0; else - list_move_tail(buffer->target_node->async_todo.next, &thread->todo); + binder_enqueue_work_ilocked( + w, &thread->todo); + binder_inner_proc_unlock(proc); } trace_binder_transaction_buffer_release(buffer); binder_transaction_buffer_release(proc, buffer, NULL); @@ -3001,9 +3138,10 @@ static int binder_thread_write(struct binder_proc *proc, WARN_ON(thread->return_error.cmd != BR_OK); thread->return_error.cmd = BR_ERROR; - list_add_tail( - &thread->return_error.work.entry, - &thread->todo); + binder_enqueue_work( + thread->proc, + &thread->return_error.work, + &thread->todo); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", proc->pid, thread->pid); @@ -3015,11 +3153,20 @@ static int binder_thread_write(struct binder_proc *proc, ref->death = death; if (ref->node->proc == NULL) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&ref->death->work.entry, &thread->todo); - } else { - list_add_tail(&ref->death->work.entry, &proc->todo); - wake_up_interruptible(&proc->wait); + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_work( + proc, + &ref->death->work, + &thread->todo); + else { + binder_enqueue_work( + proc, + &ref->death->work, + &proc->todo); + wake_up_interruptible( + &proc->wait); } } } else { @@ -3037,18 +3184,27 @@ static int binder_thread_write(struct binder_proc *proc, break; } ref->death = NULL; + binder_inner_proc_lock(proc); if (list_empty(&death->work.entry)) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&death->work.entry, &thread->todo); - } else { - list_add_tail(&death->work.entry, &proc->todo); - wake_up_interruptible(&proc->wait); + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_work_ilocked( + &death->work, + &thread->todo); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); + wake_up_interruptible( + &proc->wait); } } else { BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; } + binder_inner_proc_unlock(proc); } } break; case BC_DEAD_BINDER_DONE: { @@ -3060,8 +3216,13 @@ static int binder_thread_write(struct binder_proc *proc, return -EFAULT; ptr += sizeof(cookie); - list_for_each_entry(w, &proc->delivered_death, entry) { - struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->delivered_death, + entry) { + struct binder_ref_death *tmp_death = + container_of(w, + struct binder_ref_death, + work); if (tmp_death->cookie == cookie) { death = tmp_death; @@ -3075,19 +3236,25 @@ static int binder_thread_write(struct binder_proc *proc, if (death == NULL) { binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n", proc->pid, thread->pid, (u64)cookie); + binder_inner_proc_unlock(proc); break; } - - list_del_init(&death->work.entry); + binder_dequeue_work_ilocked(&death->work); if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; - if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) { - list_add_tail(&death->work.entry, &thread->todo); - } else { - list_add_tail(&death->work.entry, &proc->todo); + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_work_ilocked( + &death->work, &thread->todo); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); wake_up_interruptible(&proc->wait); } } + binder_inner_proc_unlock(proc); } break; default: @@ -3114,12 +3281,14 @@ static void binder_stat_br(struct binder_proc *proc, static int binder_has_proc_work(struct binder_proc *proc, struct binder_thread *thread) { - return !list_empty(&proc->todo) || thread->looper_need_return; + return !binder_worklist_empty(proc, &proc->todo) || + thread->looper_need_return; } static int binder_has_thread_work(struct binder_thread *thread) { - return !list_empty(&thread->todo) || thread->looper_need_return; + return !binder_worklist_empty(thread->proc, &thread->todo) || + thread->looper_need_return; } static int binder_put_node_cmd(struct binder_proc *proc, @@ -3173,7 +3342,7 @@ static int binder_thread_read(struct binder_proc *proc, retry: wait_for_proc_work = thread->transaction_stack == NULL && - list_empty(&thread->todo); + binder_worklist_empty(proc, &thread->todo); thread->looper |= BINDER_LOOPER_STATE_WAITING; if (wait_for_proc_work) @@ -3183,7 +3352,7 @@ retry: trace_binder_wait_for_work(wait_for_proc_work, !!thread->transaction_stack, - !list_empty(&thread->todo)); + !binder_worklist_empty(proc, &thread->todo)); if (wait_for_proc_work) { if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED))) { @@ -3218,18 +3387,20 @@ retry: while (1) { uint32_t cmd; struct binder_transaction_data tr; - struct binder_work *w; + struct binder_work *w = NULL; + struct list_head *list = NULL; struct binder_transaction *t = NULL; struct binder_thread *t_from; binder_inner_proc_lock(proc); - if (!list_empty(&thread->todo)) { - w = list_first_entry(&thread->todo, struct binder_work, - entry); - } else if (!list_empty(&proc->todo) && wait_for_proc_work) { - w = list_first_entry(&proc->todo, struct binder_work, - entry); - } else { + if (!binder_worklist_empty_ilocked(&thread->todo)) + list = &thread->todo; + else if (!binder_worklist_empty_ilocked(&proc->todo) && + wait_for_proc_work) + list = &proc->todo; + else { + binder_inner_proc_unlock(proc); + /* no data added */ if (ptr - buffer == 4 && !thread->looper_need_return) goto retry; @@ -3240,7 +3411,7 @@ retry: binder_inner_proc_unlock(proc); break; } - list_del_init(&w->entry); + w = binder_dequeue_work_head_ilocked(list); switch (w->type) { case BINDER_WORK_TRANSACTION: { @@ -3389,8 +3560,8 @@ retry: binder_stats_deleted(BINDER_STAT_DEATH); } else { binder_inner_proc_lock(proc); - list_add_tail(&w->entry, - &proc->delivered_death); + binder_enqueue_work_ilocked( + w, &proc->delivered_death); binder_inner_proc_unlock(proc); } if (cmd == BR_DEAD_BINDER) @@ -3500,13 +3671,16 @@ done: return 0; } -static void binder_release_work(struct list_head *list) +static void binder_release_work(struct binder_proc *proc, + struct list_head *list) { struct binder_work *w; - while (!list_empty(list)) { - w = list_first_entry(list, struct binder_work, entry); - list_del_init(&w->entry); + while (1) { + w = binder_dequeue_work_head(proc, list); + if (!w) + return; + switch (w->type) { case BINDER_WORK_TRANSACTION: { struct binder_transaction *t; @@ -3670,7 +3844,7 @@ static int binder_thread_release(struct binder_proc *proc, if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); - binder_release_work(&thread->todo); + binder_release_work(proc, &thread->todo); binder_thread_dec_tmpref(thread); return active_transactions; } @@ -3687,7 +3861,7 @@ static unsigned int binder_poll(struct file *filp, thread = binder_get_thread(proc); wait_for_proc_work = thread->transaction_stack == NULL && - list_empty(&thread->todo); + binder_worklist_empty(proc, &thread->todo); binder_unlock(__func__); @@ -3750,7 +3924,7 @@ static int binder_ioctl_write_read(struct file *filp, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); - if (!list_empty(&proc->todo)) + if (!binder_worklist_empty(proc, &proc->todo)) wake_up_interruptible(&proc->wait); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) @@ -4070,10 +4244,10 @@ static int binder_node_release(struct binder_node *node, int refs) int death = 0; struct binder_proc *proc = node->proc; - binder_release_work(&node->async_todo); + binder_release_work(proc, &node->async_todo); binder_inner_proc_lock(proc); - list_del_init(&node->work.entry); + binder_dequeue_work_ilocked(&node->work); /* * The caller must have taken a temporary ref on the node, */ @@ -4102,13 +4276,15 @@ static int binder_node_release(struct binder_node *node, int refs) death++; + binder_inner_proc_lock(ref->proc); if (list_empty(&ref->death->work.entry)) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; - list_add_tail(&ref->death->work.entry, - &ref->proc->todo); + binder_enqueue_work_ilocked(&ref->death->work, + &ref->proc->todo); wake_up_interruptible(&ref->proc->wait); } else BUG(); + binder_inner_proc_unlock(ref->proc); } binder_debug(BINDER_DEBUG_DEAD_BINDER, @@ -4184,8 +4360,8 @@ static void binder_deferred_release(struct binder_proc *proc) binder_free_ref(ref); } - binder_release_work(&proc->todo); - binder_release_work(&proc->delivered_death); + binder_release_work(proc, &proc->todo); + binder_release_work(proc, &proc->delivered_death); binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", @@ -4276,9 +4452,9 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, t->buffer->data); } -static void print_binder_work(struct seq_file *m, const char *prefix, - const char *transaction_prefix, - struct binder_work *w) +static void print_binder_work_ilocked(struct seq_file *m, const char *prefix, + const char *transaction_prefix, + struct binder_work *w) { struct binder_node *node; struct binder_transaction *t; @@ -4319,15 +4495,16 @@ static void print_binder_work(struct seq_file *m, const char *prefix, } } -static void print_binder_thread(struct seq_file *m, - struct binder_thread *thread, - int print_always) +static void print_binder_thread_ilocked(struct seq_file *m, + struct binder_thread *thread, + int print_always) { struct binder_transaction *t; struct binder_work *w; size_t start_pos = m->count; size_t header_pos; + WARN_ON(!spin_is_locked(&thread->proc->inner_lock)); seq_printf(m, " thread %d: l %02x need_return %d tr %d\n", thread->pid, thread->looper, thread->looper_need_return, @@ -4349,7 +4526,8 @@ static void print_binder_thread(struct seq_file *m, } } list_for_each_entry(w, &thread->todo, entry) { - print_binder_work(m, " ", " pending transaction", w); + print_binder_work_ilocked(m, " ", + " pending transaction", w); } if (!print_always && m->count == header_pos) m->count = start_pos; @@ -4376,9 +4554,13 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); - list_for_each_entry(w, &node->async_todo, entry) - print_binder_work(m, " ", - " pending async transaction", w); + if (node->proc) { + binder_inner_proc_lock(node->proc); + list_for_each_entry(w, &node->async_todo, entry) + print_binder_work_ilocked(m, " ", + " pending async transaction", w); + binder_inner_proc_unlock(node->proc); + } } static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) @@ -4402,9 +4584,11 @@ static void print_binder_proc(struct seq_file *m, seq_printf(m, "context %s\n", proc->context->name); header_pos = m->count; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) - print_binder_thread(m, rb_entry(n, struct binder_thread, + print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread, rb_node), print_all); + binder_inner_proc_unlock(proc); for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); @@ -4419,12 +4603,14 @@ static void print_binder_proc(struct seq_file *m, rb_node_desc)); } binder_alloc_print_allocated(m, &proc->alloc); + binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) - print_binder_work(m, " ", " pending transaction", w); + print_binder_work_ilocked(m, " ", " pending transaction", w); list_for_each_entry(w, &proc->delivered_death, entry) { seq_puts(m, " has delivered dead binder\n"); break; } + binder_inner_proc_unlock(proc); if (!print_all && m->count == header_pos) m->count = start_pos; } @@ -4563,15 +4749,12 @@ static void print_binder_proc_stats(struct seq_file *m, seq_printf(m, " buffers: %d\n", count); count = 0; + binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) { - switch (w->type) { - case BINDER_WORK_TRANSACTION: + if (w->type == BINDER_WORK_TRANSACTION) count++; - break; - default: - break; - } } + binder_inner_proc_unlock(proc); seq_printf(m, " pending transactions: %d\n", count); print_binder_stats(m, " ", &proc->stats); From 14c312e9262393410ed351b04a44057220e31f6a Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 8 Jun 2017 13:45:59 -0700 Subject: [PATCH 031/176] FROMLIST: binder: add spinlock to protect binder_node (from https://patchwork.kernel.org/patch/9817769/) node->node_lock is used to protect elements of node. No need to acquire for fields that are invariant: debug_id, ptr, cookie. Change-Id: Ib7738e52fa7689767f17136e18cc05ff548b5717 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 220 +++++++++++++++++++++++++++++---------- 1 file changed, 165 insertions(+), 55 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 36074ce8d1b1..2ca40f4d0bb5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -318,6 +318,7 @@ struct binder_error { * @proc: binder_proc that owns this node * (invariant after initialized) * @refs: list of references on this node + * (protected by @lock) * @internal_strong_refs: used to take strong references when * initiating a transaction * (protected by @proc->inner_lock if @proc @@ -351,6 +352,7 @@ struct binder_error { * (protected by @proc->inner_lock if @proc * and by @lock) * @has_async_transaction: async transaction to node in progress + * (protected by @lock) * @accept_fds: file descriptor operations supported for node * (invariant after initialized) * @min_priority: minimum scheduling priority @@ -432,6 +434,7 @@ struct binder_ref_data { * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree * @rb_node_node: node for lookup by @node in proc's rb_tree * @node_entry: list entry for node->refs list in target node + * (protected by @node->lock) * @proc: binder_proc containing ref * @node: binder_node of target node. When cleaning up a * ref for deletion in binder_cleanup_ref, a non-NULL @@ -707,6 +710,43 @@ _binder_node_unlock(struct binder_node *node, int line) spin_unlock(&node->lock); } +/** + * binder_node_inner_lock() - Acquire node and inner locks + * @node: struct binder_node to acquire + * + * Acquires node->lock. If node->proc also acquires + * proc->inner_lock. Used to protect binder_node fields + */ +#define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) +static void +_binder_node_inner_lock(struct binder_node *node, int line) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); + if (node->proc) + binder_inner_proc_lock(node->proc); +} + +/** + * binder_node_unlock() - Release node and inner locks + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) +static void +_binder_node_inner_unlock(struct binder_node *node, int line) +{ + struct binder_proc *proc = node->proc; + + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + if (proc) + binder_inner_proc_unlock(proc); + spin_unlock(&node->lock); +} + static bool binder_worklist_empty_ilocked(struct list_head *list) { return list_empty(list); @@ -925,12 +965,14 @@ static struct binder_node *binder_get_node(struct binder_proc *proc, } static struct binder_node *binder_new_node(struct binder_proc *proc, - binder_uintptr_t ptr, - binder_uintptr_t cookie) + struct flat_binder_object *fp) { struct rb_node **p = &proc->nodes.rb_node; struct rb_node *parent = NULL; struct binder_node *node; + binder_uintptr_t ptr = fp ? fp->binder : 0; + binder_uintptr_t cookie = fp ? fp->cookie : 0; + __u32 flags = fp ? fp->flags : 0; while (*p) { parent = *p; @@ -956,6 +998,8 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; + node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); @@ -972,12 +1016,15 @@ static void binder_free_node(struct binder_node *node) binder_stats_deleted(BINDER_STAT_NODE); } -static int binder_inc_node_ilocked(struct binder_node *node, int strong, - int internal, - struct list_head *target_list) +static int binder_inc_node_nilocked(struct binder_node *node, int strong, + int internal, + struct list_head *target_list) { - if (node->proc) - BUG_ON(!spin_is_locked(&node->proc->inner_lock)); + struct binder_proc *proc = node->proc; + + BUG_ON(!spin_is_locked(&node->lock)); + if (proc) + BUG_ON(!spin_is_locked(&proc->inner_lock)); if (strong) { if (internal) { if (target_list == NULL && @@ -1017,20 +1064,19 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, { int ret; - if (node->proc) - binder_inner_proc_lock(node->proc); - ret = binder_inc_node_ilocked(node, strong, internal, target_list); - if (node->proc) - binder_inner_proc_unlock(node->proc); + binder_node_inner_lock(node); + ret = binder_inc_node_nilocked(node, strong, internal, target_list); + binder_node_inner_unlock(node); return ret; } -static bool binder_dec_node_ilocked(struct binder_node *node, - int strong, int internal) +static bool binder_dec_node_nilocked(struct binder_node *node, + int strong, int internal) { struct binder_proc *proc = node->proc; + BUG_ON(!spin_is_locked(&node->lock)); if (proc) BUG_ON(!spin_is_locked(&proc->inner_lock)); if (strong) { @@ -1089,12 +1135,9 @@ static void binder_dec_node(struct binder_node *node, int strong, int internal) { bool free_node; - if (node->proc) - binder_inner_proc_lock(node->proc); - free_node = binder_dec_node_ilocked(node, strong, internal); - if (node->proc) - binder_inner_proc_unlock(node->proc); - + binder_node_inner_lock(node); + free_node = binder_dec_node_nilocked(node, strong, internal); + binder_node_inner_unlock(node); if (free_node) binder_free_node(node); } @@ -1124,6 +1167,7 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node) */ static void binder_inc_node_tmpref(struct binder_node *node) { + binder_node_lock(node); if (node->proc) binder_inner_proc_lock(node->proc); else @@ -1133,6 +1177,7 @@ static void binder_inc_node_tmpref(struct binder_node *node) binder_inner_proc_unlock(node->proc); else spin_unlock(&binder_dead_nodes_lock); + binder_node_unlock(node); } /** @@ -1145,9 +1190,8 @@ static void binder_dec_node_tmpref(struct binder_node *node) { bool free_node; - if (node->proc) - binder_inner_proc_lock(node->proc); - else + binder_node_inner_lock(node); + if (!node->proc) spin_lock(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); @@ -1159,9 +1203,8 @@ static void binder_dec_node_tmpref(struct binder_node *node) * causes no actual reference to be released in binder_dec_node(). * If that changes, a change is needed here too. */ - free_node = binder_dec_node_ilocked(node, 0, 1); - if (node->proc) - binder_inner_proc_unlock(node->proc); + free_node = binder_dec_node_nilocked(node, 0, 1); + binder_node_inner_unlock(node); if (free_node) binder_free_node(node); } @@ -1265,19 +1308,21 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, } rb_link_node(&new_ref->rb_node_desc, parent, p); rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc); + + binder_node_lock(node); hlist_add_head(&new_ref->node_entry, &node->refs); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d new ref %d desc %d for node %d\n", proc->pid, new_ref->data.debug_id, new_ref->data.desc, node->debug_id); + binder_node_unlock(node); return new_ref; } static void binder_cleanup_ref(struct binder_ref *ref) { bool delete_node = false; - struct binder_proc *node_proc = ref->node->proc; binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d delete ref %d desc %d for node %d\n", @@ -1287,15 +1332,13 @@ static void binder_cleanup_ref(struct binder_ref *ref) rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); - if (node_proc) - binder_inner_proc_lock(node_proc); + binder_node_inner_lock(ref->node); if (ref->data.strong) - binder_dec_node_ilocked(ref->node, 1, 1); + binder_dec_node_nilocked(ref->node, 1, 1); hlist_del(&ref->node_entry); - delete_node = binder_dec_node_ilocked(ref->node, 0, 1); - if (node_proc) - binder_inner_proc_unlock(node_proc); + delete_node = binder_dec_node_nilocked(ref->node, 0, 1); + binder_node_inner_unlock(ref->node); /* * Clear ref->node unless we want the caller to free the node */ @@ -1990,12 +2033,9 @@ static int binder_translate_binder(struct flat_binder_object *fp, node = binder_get_node(proc, fp->binder); if (!node) { - node = binder_new_node(proc, fp->binder, fp->cookie); + node = binder_new_node(proc, fp); if (!node) return -ENOMEM; - - node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); } if (fp->cookie != node->cookie) { binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", @@ -2056,6 +2096,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, goto done; } + binder_node_lock(node); if (node->proc == target_proc) { if (fp->hdr.type == BINDER_TYPE_HANDLE) fp->hdr.type = BINDER_TYPE_BINDER; @@ -2063,18 +2104,24 @@ static int binder_translate_handle(struct flat_binder_object *fp, fp->hdr.type = BINDER_TYPE_WEAK_BINDER; fp->binder = node->ptr; fp->cookie = node->cookie; - binder_inc_node(node, - fp->hdr.type == BINDER_TYPE_BINDER, - 0, NULL); + if (node->proc) + binder_inner_proc_lock(node->proc); + binder_inc_node_nilocked(node, + fp->hdr.type == BINDER_TYPE_BINDER, + 0, NULL); + if (node->proc) + binder_inner_proc_unlock(node->proc); trace_binder_transaction_ref_to_node(t, node, &src_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%016llx\n", src_rdata.debug_id, src_rdata.desc, node->debug_id, (u64)node->ptr); + binder_node_unlock(node); } else { int ret; struct binder_ref_data dest_rdata; + binder_node_unlock(node); ret = binder_inc_ref_for_node(target_proc, node, fp->hdr.type == BINDER_TYPE_HANDLE, NULL, &dest_rdata); @@ -2382,13 +2429,16 @@ static void binder_transaction(struct binder_proc *proc, mutex_unlock(&context->context_mgr_node_lock); } e->to_node = target_node->debug_id; + binder_node_lock(target_node); target_proc = target_node->proc; if (target_proc == NULL) { + binder_node_unlock(target_node); return_error = BR_DEAD_REPLY; return_error_line = __LINE__; goto err_dead_binder; } target_proc->tmp_ref++; + binder_node_unlock(target_node); if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; @@ -2705,6 +2755,7 @@ static void binder_transaction(struct binder_proc *proc, } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; binder_enqueue_work(proc, tcomplete, &thread->todo); + t->work.type = BINDER_WORK_TRANSACTION; if (reply) { if (target_thread->is_dead) @@ -2712,6 +2763,7 @@ static void binder_transaction(struct binder_proc *proc, BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction(target_thread, in_reply_to); binder_free_transaction(in_reply_to); + binder_enqueue_work(target_proc, &t->work, target_list); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); t->need_reply = 1; @@ -2722,20 +2774,29 @@ static void binder_transaction(struct binder_proc *proc, binder_pop_transaction(thread, t); goto err_dead_proc_or_thread; } + binder_enqueue_work(target_proc, &t->work, target_list); } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); + binder_node_lock(target_node); if (target_node->has_async_transaction) { target_list = &target_node->async_todo; target_wait = NULL; } else target_node->has_async_transaction = 1; + /* + * Test/set of has_async_transaction + * must be atomic with enqueue on + * async_todo + */ if (target_proc->is_dead || - (target_thread && target_thread->is_dead)) + (target_thread && target_thread->is_dead)) { + binder_node_unlock(target_node); goto err_dead_proc_or_thread; + } + binder_enqueue_work(target_proc, &t->work, target_list); + binder_node_unlock(target_node); } - t->work.type = BINDER_WORK_TRANSACTION; - binder_enqueue_work(target_proc, &t->work, target_list); if (target_wait) { if (reply || !(tr->flags & TF_ONE_WAY)) wake_up_interruptible_sync(target_wait); @@ -2914,6 +2975,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_uintptr_t node_ptr; binder_uintptr_t cookie; struct binder_node *node; + bool free_node; if (get_user(node_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; @@ -2941,13 +3003,13 @@ static int binder_thread_write(struct binder_proc *proc, binder_put_node(node); break; } - binder_inner_proc_lock(proc); + binder_node_inner_lock(node); if (cmd == BC_ACQUIRE_DONE) { if (node->pending_strong_ref == 0) { binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", proc->pid, thread->pid, node->debug_id); - binder_inner_proc_unlock(proc); + binder_node_inner_unlock(node); binder_put_node(node); break; } @@ -2957,20 +3019,22 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", proc->pid, thread->pid, node->debug_id); - binder_inner_proc_unlock(proc); + binder_node_inner_unlock(node); binder_put_node(node); break; } node->pending_weak_ref = 0; } - binder_inner_proc_unlock(proc); - binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0); + free_node = binder_dec_node_nilocked(node, + cmd == BC_ACQUIRE_DONE, 0); + WARN_ON(free_node); binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s node %d ls %d lw %d tr %d\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", node->debug_id, node->local_strong_refs, node->local_weak_refs, node->tmp_refs); + binder_node_inner_unlock(node); binder_put_node(node); break; } @@ -3016,9 +3080,9 @@ static int binder_thread_write(struct binder_proc *proc, struct binder_work *w; buf_node = buffer->target_node; + binder_node_inner_lock(buf_node); BUG_ON(!buf_node->has_async_transaction); BUG_ON(buf_node->proc != proc); - binder_inner_proc_lock(proc); w = binder_dequeue_work_head_ilocked( &buf_node->async_todo); if (!w) @@ -3026,7 +3090,7 @@ static int binder_thread_write(struct binder_proc *proc, else binder_enqueue_work_ilocked( w, &thread->todo); - binder_inner_proc_unlock(proc); + binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); binder_transaction_buffer_release(proc, buffer, NULL); @@ -3151,6 +3215,7 @@ static int binder_thread_write(struct binder_proc *proc, INIT_LIST_HEAD(&death->work.entry); death->cookie = cookie; ref->death = death; + binder_node_lock(ref->node); if (ref->node->proc == NULL) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; if (thread->looper & @@ -3169,10 +3234,13 @@ static int binder_thread_write(struct binder_proc *proc, &proc->wait); } } + binder_node_unlock(ref->node); } else { + binder_node_lock(ref->node); if (ref->death == NULL) { binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", proc->pid, thread->pid); + binder_node_unlock(ref->node); break; } death = ref->death; @@ -3181,6 +3249,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)death->cookie, (u64)cookie); + binder_node_unlock(ref->node); break; } ref->death = NULL; @@ -3205,6 +3274,7 @@ static int binder_thread_write(struct binder_proc *proc, death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; } binder_inner_proc_unlock(proc); + binder_node_unlock(ref->node); } } break; case BC_DEAD_BINDER_DONE: { @@ -3487,6 +3557,17 @@ retry: (u64)node_cookie); rb_erase(&node->rb_node, &proc->nodes); binder_inner_proc_unlock(proc); + binder_node_lock(node); + /* + * Acquire the node lock before freeing the + * node to serialize with other threads that + * may have been holding the node lock while + * decrementing this node (avoids race where + * this thread frees while the other thread + * is unlocking the node after the final + * decrement) + */ + binder_node_unlock(node); binder_free_node(node); } else binder_inner_proc_unlock(proc); @@ -3974,16 +4055,18 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) } else { context->binder_context_mgr_uid = curr_euid; } - new_node = binder_new_node(proc, 0, 0); + new_node = binder_new_node(proc, NULL); if (!new_node) { ret = -ENOMEM; goto out; } + binder_node_lock(new_node); new_node->local_weak_refs++; new_node->local_strong_refs++; new_node->has_strong_ref = 1; new_node->has_weak_ref = 1; context->binder_context_mgr_node = new_node; + binder_node_unlock(new_node); binder_put_node(new_node); out: mutex_unlock(&context->context_mgr_node_lock); @@ -4246,6 +4329,7 @@ static int binder_node_release(struct binder_node *node, int refs) binder_release_work(proc, &node->async_todo); + binder_node_lock(node); binder_inner_proc_lock(proc); binder_dequeue_work_ilocked(&node->work); /* @@ -4254,6 +4338,7 @@ static int binder_node_release(struct binder_node *node, int refs) BUG_ON(!node->tmp_refs); if (hlist_empty(&node->refs) && node->tmp_refs == 1) { binder_inner_proc_unlock(proc); + binder_node_unlock(node); binder_free_node(node); return refs; @@ -4290,6 +4375,7 @@ static int binder_node_release(struct binder_node *node, int refs) binder_debug(BINDER_DEBUG_DEAD_BINDER, "node %d now dead, refs %d, death %d\n", node->debug_id, refs, death); + binder_node_unlock(node); binder_put_node(node); return refs; @@ -4533,12 +4619,15 @@ static void print_binder_thread_ilocked(struct seq_file *m, m->count = start_pos; } -static void print_binder_node(struct seq_file *m, struct binder_node *node) +static void print_binder_node_nlocked(struct seq_file *m, + struct binder_node *node) { struct binder_ref *ref; struct binder_work *w; int count; + WARN_ON(!spin_is_locked(&node->lock)); + count = 0; hlist_for_each_entry(ref, &node->refs, node_entry) count++; @@ -4565,11 +4654,13 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) { + binder_node_lock(ref->node); seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", ref->data.debug_id, ref->data.desc, ref->node->proc ? "" : "dead ", ref->node->debug_id, ref->data.strong, ref->data.weak, ref->death); + binder_node_unlock(ref->node); } static void print_binder_proc(struct seq_file *m, @@ -4592,8 +4683,10 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); + binder_node_lock(node); if (print_all || node->has_async_transaction) - print_binder_node(m, node); + print_binder_node_nlocked(m, node); + binder_node_unlock(node); } if (print_all) { for (n = rb_first(&proc->refs_by_desc); @@ -4765,6 +4858,7 @@ static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; + struct binder_node *last_node = NULL; binder_lock(__func__); @@ -4773,9 +4867,25 @@ static int binder_state_show(struct seq_file *m, void *unused) spin_lock(&binder_dead_nodes_lock); if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); - hlist_for_each_entry(node, &binder_dead_nodes, dead_node) - print_binder_node(m, node); + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) { + /* + * take a temporary reference on the node so it + * survives and isn't removed from the list + * while we print it. + */ + node->tmp_refs++; + spin_unlock(&binder_dead_nodes_lock); + if (last_node) + binder_put_node(last_node); + binder_node_lock(node); + print_binder_node_nlocked(m, node); + binder_node_unlock(node); + last_node = node; + spin_lock(&binder_dead_nodes_lock); + } spin_unlock(&binder_dead_nodes_lock); + if (last_node) + binder_put_node(last_node); mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) From 46655970b9673b2b60eb67bd902743455dfdebe3 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 12 Jun 2017 12:07:26 -0700 Subject: [PATCH 032/176] FROMLIST: binder: protect proc->nodes with inner lock (from https://patchwork.kernel.org/patch/9817783/) When locks for binder_ref handling are added, proc->nodes will need to be modified while holding the outer lock Change-Id: I17b39e981c55130c14a62fe49900eceff6e3642b Signed-off-by: Todd Kjos --- drivers/android/binder.c | 112 +++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 23 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2ca40f4d0bb5..a0c0bd05a885 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -313,6 +313,7 @@ struct binder_error { * @work: worklist element for node work * (protected by @proc->inner_lock) * @rb_node: element for proc->nodes tree + * (protected by @proc->inner_lock) * @dead_node: element for binder_dead_nodes list * (protected by binder_dead_nodes_lock) * @proc: binder_proc that owns this node @@ -470,6 +471,7 @@ enum binder_deferred_state { * @threads: rbtree of binder_threads in this proc * @nodes: rbtree of binder nodes associated with * this proc ordered by node->ptr + * (protected by @inner_lock) * @refs_by_desc: rbtree of refs ordered by ref->desc * @refs_by_node: rbtree of refs ordered by ref->node * @pid PID of group_leader of process @@ -856,7 +858,7 @@ static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); -static void binder_inc_node_tmpref(struct binder_node *node); +static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { @@ -938,12 +940,14 @@ static void binder_set_nice(long nice) binder_user_error("%d RLIMIT_NICE not set\n", current->pid); } -static struct binder_node *binder_get_node(struct binder_proc *proc, - binder_uintptr_t ptr) +static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, + binder_uintptr_t ptr) { struct rb_node *n = proc->nodes.rb_node; struct binder_node *node; + BUG_ON(!spin_is_locked(&proc->inner_lock)); + while (n) { node = rb_entry(n, struct binder_node, rb_node); @@ -957,15 +961,28 @@ static struct binder_node *binder_get_node(struct binder_proc *proc, * to ensure node stays alive until * call to binder_put_node() */ - binder_inc_node_tmpref(node); + binder_inc_node_tmpref_ilocked(node); return node; } } return NULL; } -static struct binder_node *binder_new_node(struct binder_proc *proc, - struct flat_binder_object *fp) +static struct binder_node *binder_get_node(struct binder_proc *proc, + binder_uintptr_t ptr) +{ + struct binder_node *node; + + binder_inner_proc_lock(proc); + node = binder_get_node_ilocked(proc, ptr); + binder_inner_proc_unlock(proc); + return node; +} + +static struct binder_node *binder_init_node_ilocked( + struct binder_proc *proc, + struct binder_node *new_node, + struct flat_binder_object *fp) { struct rb_node **p = &proc->nodes.rb_node; struct rb_node *parent = NULL; @@ -974,7 +991,9 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, binder_uintptr_t cookie = fp ? fp->cookie : 0; __u32 flags = fp ? fp->flags : 0; + BUG_ON(!spin_is_locked(&proc->inner_lock)); while (*p) { + parent = *p; node = rb_entry(parent, struct binder_node, rb_node); @@ -982,13 +1001,17 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, p = &(*p)->rb_left; else if (ptr > node->ptr) p = &(*p)->rb_right; - else - return NULL; + else { + /* + * A matching node is already in + * the rb tree. Abandon the init + * and return it. + */ + binder_inc_node_tmpref_ilocked(node); + return node; + } } - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (node == NULL) - return NULL; + node = new_node; binder_stats_created(BINDER_STAT_NODE); node->tmp_refs++; rb_link_node(&node->rb_node, parent, p); @@ -1007,6 +1030,27 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, "%d:%d node %d u%016llx c%016llx created\n", proc->pid, current->pid, node->debug_id, (u64)node->ptr, (u64)node->cookie); + + return node; +} + +static struct binder_node *binder_new_node(struct binder_proc *proc, + struct flat_binder_object *fp) +{ + struct binder_node *node; + struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!new_node) + return NULL; + binder_inner_proc_lock(proc); + node = binder_init_node_ilocked(proc, new_node, fp); + binder_inner_proc_unlock(proc); + if (node != new_node) + /* + * The node was already added by another thread + */ + kfree(new_node); + return node; } @@ -4421,6 +4465,7 @@ static void binder_deferred_release(struct binder_proc *proc) nodes = 0; incoming_refs = 0; + binder_inner_proc_lock(proc); while ((n = rb_first(&proc->nodes))) { struct binder_node *node; @@ -4431,10 +4476,13 @@ static void binder_deferred_release(struct binder_proc *proc) * calling binder_node_release() which will either * kfree() the node or call binder_put_node() */ - binder_inc_node_tmpref(node); + binder_inc_node_tmpref_ilocked(node); rb_erase(&node->rb_node, &proc->nodes); + binder_inner_proc_unlock(proc); incoming_refs = binder_node_release(node, incoming_refs); + binder_inner_proc_lock(proc); } + binder_inner_proc_unlock(proc); outgoing_refs = 0; while ((n = rb_first(&proc->refs_by_desc))) { @@ -4619,14 +4667,16 @@ static void print_binder_thread_ilocked(struct seq_file *m, m->count = start_pos; } -static void print_binder_node_nlocked(struct seq_file *m, - struct binder_node *node) +static void print_binder_node_nilocked(struct seq_file *m, + struct binder_node *node) { struct binder_ref *ref; struct binder_work *w; int count; WARN_ON(!spin_is_locked(&node->lock)); + if (node->proc) + WARN_ON(!spin_is_locked(&node->proc->inner_lock)); count = 0; hlist_for_each_entry(ref, &node->refs, node_entry) @@ -4644,11 +4694,9 @@ static void print_binder_node_nlocked(struct seq_file *m, } seq_puts(m, "\n"); if (node->proc) { - binder_inner_proc_lock(node->proc); list_for_each_entry(w, &node->async_todo, entry) print_binder_work_ilocked(m, " ", " pending async transaction", w); - binder_inner_proc_unlock(node->proc); } } @@ -4670,6 +4718,7 @@ static void print_binder_proc(struct seq_file *m, struct rb_node *n; size_t start_pos = m->count; size_t header_pos; + struct binder_node *last_node = NULL; seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); @@ -4679,15 +4728,30 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread, rb_node), print_all); - binder_inner_proc_unlock(proc); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); - binder_node_lock(node); - if (print_all || node->has_async_transaction) - print_binder_node_nlocked(m, node); - binder_node_unlock(node); + /* + * take a temporary reference on the node so it + * survives and isn't removed from the tree + * while we print it. + */ + binder_inc_node_tmpref_ilocked(node); + /* Need to drop inner lock to take node lock */ + binder_inner_proc_unlock(proc); + if (last_node) + binder_put_node(last_node); + binder_node_inner_lock(node); + print_binder_node_nilocked(m, node); + binder_node_inner_unlock(node); + last_node = node; + binder_inner_proc_lock(proc); } + binder_inner_proc_unlock(proc); + if (last_node) + binder_put_node(last_node); + if (print_all) { for (n = rb_first(&proc->refs_by_desc); n != NULL; @@ -4823,8 +4887,10 @@ static void print_binder_proc_stats(struct seq_file *m, proc->ready_threads, binder_alloc_get_free_async_space(&proc->alloc)); count = 0; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; + binder_inner_proc_unlock(proc); seq_printf(m, " nodes: %d\n", count); count = 0; strong = 0; @@ -4878,7 +4944,7 @@ static int binder_state_show(struct seq_file *m, void *unused) if (last_node) binder_put_node(last_node); binder_node_lock(node); - print_binder_node_nlocked(m, node); + print_binder_node_nilocked(m, node); binder_node_unlock(node); last_node = node; spin_lock(&binder_dead_nodes_lock); From e495123304a5949848881496ee23990791a57db3 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 25 May 2017 15:52:17 -0700 Subject: [PATCH 033/176] FROMLIST: binder: protect proc->threads with inner_lock (from https://patchwork.kernel.org/patch/9817775/) proc->threads will need to be accessed with higher locks of other processes held so use proc->inner_lock to protect it. proc->tmp_ref now needs to be protected by proc->inner_lock. Change-Id: I176cfeca16bf7c9b34b428c16405f93db81d2ff8 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 87 +++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a0c0bd05a885..8735f9a6c018 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -469,6 +469,7 @@ enum binder_deferred_state { * struct binder_proc - binder process bookkeeping * @proc_node: element for binder_procs list * @threads: rbtree of binder_threads in this proc + * (protected by @inner_lock) * @nodes: rbtree of binder nodes associated with * this proc ordered by node->ptr * (protected by @inner_lock) @@ -486,6 +487,7 @@ enum binder_deferred_state { * (protected by binder_deferred_lock) * @is_dead: process is dead and awaiting free * when outstanding transactions are cleaned up + * (protected by @inner_lock) * @todo: list of work for this process * (protected by @inner_lock) * @wait: wait queue head to wait for proc work @@ -501,6 +503,7 @@ enum binder_deferred_state { * @requested_threads_started: number binder threads started * @ready_threads: number of threads waiting for proc work * @tmp_ref: temporary reference to indicate proc is in use + * (protected by @inner_lock) * @default_priority: default scheduler priority * (invariant after initialized) * @debugfs_entry: debugfs node @@ -556,6 +559,7 @@ enum { * @proc: binder process for this thread * (invariant after initialization) * @rb_node: element for proc->threads rbtree + * (protected by @proc->inner_lock) * @pid: PID for this thread * (invariant after initialization) * @looper: bitmap of looping state @@ -576,6 +580,7 @@ enum { * always be acquired) * @is_dead: thread is dead and awaiting free * when outstanding transactions are cleaned up + * (protected by @proc->inner_lock) * * Bookkeeping structure for binder threads. */ @@ -1668,15 +1673,15 @@ static void binder_thread_dec_tmpref(struct binder_thread *thread) /* * atomic is used to protect the counter value while * it cannot reach zero or thread->is_dead is false - * - * TODO: future patch adds locking to ensure that the - * check of tmp_ref and is_dead is done with a lock held */ + binder_inner_proc_lock(thread->proc); atomic_dec(&thread->tmp_ref); if (thread->is_dead && !atomic_read(&thread->tmp_ref)) { + binder_inner_proc_unlock(thread->proc); binder_free_thread(thread); return; } + binder_inner_proc_unlock(thread->proc); } /** @@ -1693,12 +1698,15 @@ static void binder_thread_dec_tmpref(struct binder_thread *thread) */ static void binder_proc_dec_tmpref(struct binder_proc *proc) { + binder_inner_proc_lock(proc); proc->tmp_ref--; if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) && !proc->tmp_ref) { + binder_inner_proc_unlock(proc); binder_free_proc(proc); return; } + binder_inner_proc_unlock(proc); } /** @@ -2481,7 +2489,9 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_dead_binder; } + binder_inner_proc_lock(target_proc); target_proc->tmp_ref++; + binder_inner_proc_unlock(target_proc); binder_node_unlock(target_node); if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { @@ -3855,7 +3865,8 @@ static void binder_release_work(struct binder_proc *proc, } -static struct binder_thread *binder_get_thread(struct binder_proc *proc) +static struct binder_thread *binder_get_thread_ilocked( + struct binder_proc *proc, struct binder_thread *new_thread) { struct binder_thread *thread = NULL; struct rb_node *parent = NULL; @@ -3870,25 +3881,45 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) else if (current->pid > thread->pid) p = &(*p)->rb_right; else - break; + return thread; } - if (*p == NULL) { - thread = kzalloc(sizeof(*thread), GFP_KERNEL); - if (thread == NULL) + if (!new_thread) + return NULL; + thread = new_thread; + binder_stats_created(BINDER_STAT_THREAD); + thread->proc = proc; + thread->pid = current->pid; + atomic_set(&thread->tmp_ref, 0); + init_waitqueue_head(&thread->wait); + INIT_LIST_HEAD(&thread->todo); + rb_link_node(&thread->rb_node, parent, p); + rb_insert_color(&thread->rb_node, &proc->threads); + thread->looper_need_return = true; + thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->return_error.cmd = BR_OK; + thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->reply_error.cmd = BR_OK; + + return thread; +} + +static struct binder_thread *binder_get_thread(struct binder_proc *proc) +{ + struct binder_thread *thread; + struct binder_thread *new_thread; + + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, NULL); + binder_inner_proc_unlock(proc); + if (!thread) { + new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); + if (new_thread == NULL) return NULL; - binder_stats_created(BINDER_STAT_THREAD); - thread->proc = proc; - thread->pid = current->pid; - atomic_set(&thread->tmp_ref, 0); - init_waitqueue_head(&thread->wait); - INIT_LIST_HEAD(&thread->todo); - rb_link_node(&thread->rb_node, parent, p); - rb_insert_color(&thread->rb_node, &proc->threads); - thread->looper_need_return = true; - thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; - thread->return_error.cmd = BR_OK; - thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; - thread->reply_error.cmd = BR_OK; + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, new_thread); + binder_inner_proc_unlock(proc); + if (thread != new_thread) + kfree(new_thread); } return thread; } @@ -3919,6 +3950,7 @@ static int binder_thread_release(struct binder_proc *proc, int active_transactions = 0; struct binder_transaction *last_t = NULL; + binder_inner_proc_lock(thread->proc); /* * take a ref on the proc so it survives * after we remove this thread from proc->threads. @@ -3966,6 +3998,7 @@ static int binder_thread_release(struct binder_proc *proc, if (t) spin_lock(&t->lock); } + binder_inner_proc_unlock(thread->proc); if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); @@ -4339,6 +4372,7 @@ static void binder_deferred_flush(struct binder_proc *proc) struct rb_node *n; int wake_count = 0; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node); @@ -4348,6 +4382,7 @@ static void binder_deferred_flush(struct binder_proc *proc) wake_count++; } } + binder_inner_proc_unlock(proc); wake_up_interruptible_all(&proc->wait); binder_debug(BINDER_DEBUG_OPEN_CLOSE, @@ -4446,6 +4481,7 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we * remove all the threads @@ -4459,13 +4495,14 @@ static void binder_deferred_release(struct binder_proc *proc) struct binder_thread *thread; thread = rb_entry(n, struct binder_thread, rb_node); + binder_inner_proc_unlock(proc); threads++; active_transactions += binder_thread_release(proc, thread); + binder_inner_proc_lock(proc); } nodes = 0; incoming_refs = 0; - binder_inner_proc_lock(proc); while ((n = rb_first(&proc->nodes))) { struct binder_node *node; @@ -4873,10 +4910,13 @@ static void print_binder_proc_stats(struct seq_file *m, struct binder_work *w; struct rb_node *n; int count, strong, weak; + size_t free_async_space = + binder_alloc_get_free_async_space(&proc->alloc); seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); count = 0; + binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; seq_printf(m, " threads: %d\n", count); @@ -4885,9 +4925,8 @@ static void print_binder_proc_stats(struct seq_file *m, " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, proc->ready_threads, - binder_alloc_get_free_async_space(&proc->alloc)); + free_async_space); count = 0; - binder_inner_proc_lock(proc); for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; binder_inner_proc_unlock(proc); From 89b657e0d7de20459820d7edf341bc888b69aa83 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 2 Jun 2017 13:36:52 -0700 Subject: [PATCH 034/176] FROMLIST: binder: protect transaction_stack with inner lock. (from https://patchwork.kernel.org/patch/9817779/) This makes future changes to priority inheritance easier, since we want to be able to look at a thread's transaction stack when selecting a thread to inherit priority for. It also allows us to take just a single lock in a few paths, where we used to take two in succession. Change-Id: Idb1b6e9faa5c669978b2b3011fe326be8aece586 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 96 +++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8735f9a6c018..328ec2cd7956 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -30,7 +30,8 @@ * 3) proc->inner_lock : protects the thread and node lists * (proc->threads, proc->nodes) and all todo lists associated * with the binder_proc (proc->todo, thread->todo, - * proc->delivered_death and node->async_todo). + * proc->delivered_death and node->async_todo), as well as + * thread->transaction_stack * binder_inner_proc_lock() and binder_inner_proc_unlock() * are used to acq/rel * @@ -567,11 +568,13 @@ enum { * @looper_needs_return: looping thread needs to exit driver * (no lock needed) * @transaction_stack: stack of in-progress transactions for this thread + * (protected by @proc->inner_lock) * @todo: list of work to do for this thread * (protected by @proc->inner_lock) * @return_error: transaction errors reported by this thread * (only accessed by this thread) * @reply_error: transaction errors reported by target thread + * (protected by @proc->inner_lock) * @wait: wait queue for thread work * @stats: per-thread statistics * (atomics, no lock needed) @@ -1645,10 +1648,11 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, return ret; } -static void binder_pop_transaction(struct binder_thread *target_thread, - struct binder_transaction *t) +static void binder_pop_transaction_ilocked(struct binder_thread *target_thread, + struct binder_transaction *t) { BUG_ON(!target_thread); + BUG_ON(!spin_is_locked(&target_thread->proc->inner_lock)); BUG_ON(target_thread->transaction_stack != t); BUG_ON(target_thread->transaction_stack->from != target_thread); target_thread->transaction_stack = @@ -1732,6 +1736,35 @@ static struct binder_thread *binder_get_txn_from( return from; } +/** + * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock + * @t: binder transaction for t->from + * + * Same as binder_get_txn_from() except it also acquires the proc->inner_lock + * to guarantee that the thread cannot be released while operating on it. + * The caller must call binder_inner_proc_unlock() to release the inner lock + * as well as call binder_dec_thread_txn() to release the reference. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from_and_acq_inner( + struct binder_transaction *t) +{ + struct binder_thread *from; + + from = binder_get_txn_from(t); + if (!from) + return NULL; + binder_inner_proc_lock(from->proc); + if (t->from) { + BUG_ON(from != t->from); + return from; + } + binder_inner_proc_unlock(from->proc); + binder_thread_dec_tmpref(from); + return NULL; +} + static void binder_free_transaction(struct binder_transaction *t) { if (t->buffer) @@ -1748,7 +1781,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, BUG_ON(t->flags & TF_ONE_WAY); while (1) { - target_thread = binder_get_txn_from(t); + target_thread = binder_get_txn_from_and_acq_inner(t); if (target_thread) { binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "send failed reply for transaction %d to %d:%d\n", @@ -1756,11 +1789,10 @@ static void binder_send_failed_reply(struct binder_transaction *t, target_thread->proc->pid, target_thread->pid); - binder_pop_transaction(target_thread, t); + binder_pop_transaction_ilocked(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; - binder_enqueue_work( - target_thread->proc, + binder_enqueue_work_ilocked( &target_thread->reply_error.work, &target_thread->todo); wake_up_interruptible(&target_thread->wait); @@ -1768,6 +1800,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, WARN(1, "Unexpected reply error: %u\n", target_thread->reply_error.cmd); } + binder_inner_proc_unlock(target_thread->proc); binder_thread_dec_tmpref(target_thread); binder_free_transaction(t); return; @@ -2397,8 +2430,10 @@ static void binder_transaction(struct binder_proc *proc, e->context_name = proc->context->name; if (reply) { + binder_inner_proc_lock(proc); in_reply_to = thread->transaction_stack; if (in_reply_to == NULL) { + binder_inner_proc_unlock(proc); binder_user_error("%d:%d got reply transaction with no transaction stack\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; @@ -2406,7 +2441,6 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_empty_call_stack; } - binder_set_nice(in_reply_to->saved_priority); if (in_reply_to->to_thread != thread) { spin_lock(&in_reply_to->lock); binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n", @@ -2416,6 +2450,7 @@ static void binder_transaction(struct binder_proc *proc, in_reply_to->to_thread ? in_reply_to->to_thread->pid : 0); spin_unlock(&in_reply_to->lock); + binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; @@ -2423,7 +2458,9 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_call_stack; } thread->transaction_stack = in_reply_to->to_parent; - target_thread = binder_get_txn_from(in_reply_to); + binder_inner_proc_unlock(proc); + binder_set_nice(in_reply_to->saved_priority); + target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; return_error_line = __LINE__; @@ -2435,6 +2472,7 @@ static void binder_transaction(struct binder_proc *proc, target_thread->transaction_stack ? target_thread->transaction_stack->debug_id : 0, in_reply_to->debug_id); + binder_inner_proc_unlock(target_thread->proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; @@ -2444,6 +2482,7 @@ static void binder_transaction(struct binder_proc *proc, } target_proc = target_thread->proc; target_proc->tmp_ref++; + binder_inner_proc_unlock(target_thread->proc); } else { if (tr->target.handle) { struct binder_ref *ref; @@ -2500,6 +2539,7 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_invalid_target_handle; } + binder_inner_proc_lock(proc); if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { struct binder_transaction *tmp; @@ -2512,6 +2552,7 @@ static void binder_transaction(struct binder_proc *proc, tmp->to_thread ? tmp->to_thread->pid : 0); spin_unlock(&tmp->lock); + binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; @@ -2532,6 +2573,7 @@ static void binder_transaction(struct binder_proc *proc, tmp = tmp->from_parent; } } + binder_inner_proc_unlock(proc); } if (target_thread) { e->to_thread = target_thread->pid; @@ -2812,23 +2854,34 @@ static void binder_transaction(struct binder_proc *proc, t->work.type = BINDER_WORK_TRANSACTION; if (reply) { - if (target_thread->is_dead) + binder_inner_proc_lock(target_proc); + if (target_thread->is_dead) { + binder_inner_proc_unlock(target_proc); goto err_dead_proc_or_thread; + } BUG_ON(t->buffer->async_transaction != 0); - binder_pop_transaction(target_thread, in_reply_to); + binder_pop_transaction_ilocked(target_thread, in_reply_to); + binder_enqueue_work_ilocked(&t->work, target_list); + binder_inner_proc_unlock(target_proc); binder_free_transaction(in_reply_to); - binder_enqueue_work(target_proc, &t->work, target_list); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); + binder_inner_proc_lock(proc); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; + binder_inner_proc_unlock(proc); + binder_inner_proc_lock(target_proc); if (target_proc->is_dead || (target_thread && target_thread->is_dead)) { - binder_pop_transaction(thread, t); + binder_inner_proc_unlock(target_proc); + binder_inner_proc_lock(proc); + binder_pop_transaction_ilocked(thread, t); + binder_inner_proc_unlock(proc); goto err_dead_proc_or_thread; } - binder_enqueue_work(target_proc, &t->work, target_list); + binder_enqueue_work_ilocked(&t->work, target_list); + binder_inner_proc_unlock(target_proc); } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); @@ -2843,12 +2896,15 @@ static void binder_transaction(struct binder_proc *proc, * must be atomic with enqueue on * async_todo */ + binder_inner_proc_lock(target_proc); if (target_proc->is_dead || (target_thread && target_thread->is_dead)) { + binder_inner_proc_unlock(target_proc); binder_node_unlock(target_node); goto err_dead_proc_or_thread; } - binder_enqueue_work(target_proc, &t->work, target_list); + binder_enqueue_work_ilocked(&t->work, target_list); + binder_inner_proc_unlock(target_proc); binder_node_unlock(target_node); } if (target_wait) { @@ -3465,8 +3521,10 @@ static int binder_thread_read(struct binder_proc *proc, } retry: + binder_inner_proc_lock(proc); wait_for_proc_work = thread->transaction_stack == NULL && - binder_worklist_empty(proc, &thread->todo); + binder_worklist_empty_ilocked(&thread->todo); + binder_inner_proc_unlock(proc); thread->looper |= BINDER_LOOPER_STATE_WAITING; if (wait_for_proc_work) @@ -3778,9 +3836,11 @@ retry: binder_thread_dec_tmpref(t_from); t->buffer->allow_user_free = 1; if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) { + binder_inner_proc_lock(thread->proc); t->to_parent = thread->transaction_stack; t->to_thread = thread; thread->transaction_stack = t; + binder_inner_proc_unlock(thread->proc); } else { binder_free_transaction(t); } @@ -4018,8 +4078,10 @@ static unsigned int binder_poll(struct file *filp, thread = binder_get_thread(proc); + binder_inner_proc_lock(thread->proc); wait_for_proc_work = thread->transaction_stack == NULL && - binder_worklist_empty(proc, &thread->todo); + binder_worklist_empty_ilocked(&thread->todo); + binder_inner_proc_unlock(thread->proc); binder_unlock(__func__); From 814ce251cb48216fd254fb78db26fba9e363b953 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 25 May 2017 17:35:02 -0700 Subject: [PATCH 035/176] FROMLIST: binder: use inner lock to protect thread accounting (from https://patchwork.kernel.org/patch/9817763/) Use the inner lock to protect thread accounting fields in proc structure: max_threads, requested_threads, requested_threads_started and ready_threads. Change-Id: I5a17eb68812702f803d4e2806e7887de0b3af18e Signed-off-by: Todd Kjos --- drivers/android/binder.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 328ec2cd7956..fdafd7df9171 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -498,11 +498,15 @@ enum binder_deferred_state { * @delivered_death: list of delivered death notification * (protected by @inner_lock) * @max_threads: cap on number of binder threads + * (protected by @inner_lock) * @requested_threads: number of binder threads requested but not * yet started. In current implementation, can * only be 0 or 1. + * (protected by @inner_lock) * @requested_threads_started: number binder threads started + * (protected by @inner_lock) * @ready_threads: number of threads waiting for proc work + * (protected by @inner_lock) * @tmp_ref: temporary reference to indicate proc is in use * (protected by @inner_lock) * @default_priority: default scheduler priority @@ -3235,6 +3239,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_REGISTER_LOOPER\n", proc->pid, thread->pid); + binder_inner_proc_lock(proc); if (thread->looper & BINDER_LOOPER_STATE_ENTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n", @@ -3248,6 +3253,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->requested_threads_started++; } thread->looper |= BINDER_LOOPER_STATE_REGISTERED; + binder_inner_proc_unlock(proc); break; case BC_ENTER_LOOPER: binder_debug(BINDER_DEBUG_THREADS, @@ -3524,11 +3530,11 @@ retry: binder_inner_proc_lock(proc); wait_for_proc_work = thread->transaction_stack == NULL && binder_worklist_empty_ilocked(&thread->todo); + if (wait_for_proc_work) + proc->ready_threads++; binder_inner_proc_unlock(proc); thread->looper |= BINDER_LOOPER_STATE_WAITING; - if (wait_for_proc_work) - proc->ready_threads++; binder_unlock(__func__); @@ -3559,8 +3565,10 @@ retry: binder_lock(__func__); + binder_inner_proc_lock(proc); if (wait_for_proc_work) proc->ready_threads--; + binder_inner_proc_unlock(proc); thread->looper &= ~BINDER_LOOPER_STATE_WAITING; if (ret) @@ -3850,19 +3858,22 @@ retry: done: *consumed = ptr - buffer; + binder_inner_proc_lock(proc); if (proc->requested_threads + proc->ready_threads == 0 && proc->requested_threads_started < proc->max_threads && (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */ /*spawn a new thread if we leave this out */) { proc->requested_threads++; + binder_inner_proc_unlock(proc); binder_debug(BINDER_DEBUG_THREADS, "%d:%d BR_SPAWN_LOOPER\n", proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; binder_stat_br(proc, thread, BR_SPAWN_LOOPER); - } + } else + binder_inner_proc_unlock(proc); return 0; } @@ -4242,12 +4253,19 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) goto err; break; - case BINDER_SET_MAX_THREADS: - if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { + case BINDER_SET_MAX_THREADS: { + int max_threads; + + if (copy_from_user(&max_threads, ubuf, + sizeof(max_threads))) { ret = -EINVAL; goto err; } + binder_inner_proc_lock(proc); + proc->max_threads = max_threads; + binder_inner_proc_unlock(proc); break; + } case BINDER_SET_CONTEXT_MGR: ret = binder_ioctl_set_ctx_mgr(filp); if (ret) From 6fcb2b9ac4aed1946358073c3a2802aa8bd57c3d Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 20 Oct 2016 16:43:34 -0700 Subject: [PATCH 036/176] FROMLIST: binder: protect binder_ref with outer lock (from https://patchwork.kernel.org/patch/9817771/) Use proc->outer_lock to protect the binder_ref structure. The outer lock allows functions operating on the binder_ref to do nested acquires of node and inner locks as necessary to attach refs to nodes atomically. Binder refs must never be accesssed without holding the outer lock. Change-Id: Icf6add0eddf70473b39239960b2d9a524775b53a Signed-off-by: Todd Kjos --- drivers/android/binder.c | 133 ++++++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 50 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index fdafd7df9171..2ef3022d969d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -475,7 +475,9 @@ enum binder_deferred_state { * this proc ordered by node->ptr * (protected by @inner_lock) * @refs_by_desc: rbtree of refs ordered by ref->desc + * (protected by @outer_lock) * @refs_by_node: rbtree of refs ordered by ref->node + * (protected by @outer_lock) * @pid PID of group_leader of process * (invariant after initialized) * @tsk task_struct for group_leader of process @@ -1270,8 +1272,8 @@ static void binder_put_node(struct binder_node *node) binder_dec_node_tmpref(node); } -static struct binder_ref *binder_get_ref(struct binder_proc *proc, - u32 desc, bool need_strong_ref) +static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, + u32 desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1294,7 +1296,7 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, } /** - * binder_get_ref_for_node() - get the ref associated with given node + * binder_get_ref_for_node_olocked() - get the ref associated with given node * @proc: binder_proc that owns the ref * @node: binder_node of target * @new_ref: newly allocated binder_ref to be initialized or %NULL @@ -1311,9 +1313,10 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, * new_ref. new_ref must be kfree'd by the caller in * this case. */ -static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, - struct binder_node *node, - struct binder_ref *new_ref) +static struct binder_ref *binder_get_ref_for_node_olocked( + struct binder_proc *proc, + struct binder_node *node, + struct binder_ref *new_ref) { struct binder_context *context = proc->context; struct rb_node **p = &proc->refs_by_node.rb_node; @@ -1376,7 +1379,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, return new_ref; } -static void binder_cleanup_ref(struct binder_ref *ref) +static void binder_cleanup_ref_olocked(struct binder_ref *ref) { bool delete_node = false; @@ -1419,17 +1422,17 @@ static void binder_cleanup_ref(struct binder_ref *ref) } /** - * binder_inc_ref() - increment the ref for given handle + * binder_inc_ref_olocked() - increment the ref for given handle * @ref: ref to be incremented * @strong: if true, strong increment, else weak * @target_list: list to queue node work on * - * Increment the ref. + * Increment the ref. @ref->proc->outer_lock must be held on entry * * Return: 0, if successful, else errno */ -static int binder_inc_ref(struct binder_ref *ref, int strong, - struct list_head *target_list) +static int binder_inc_ref_olocked(struct binder_ref *ref, int strong, + struct list_head *target_list) { int ret; @@ -1458,12 +1461,9 @@ static int binder_inc_ref(struct binder_ref *ref, int strong, * * Decrement the ref. * - * TODO: kfree is avoided here since an upcoming patch - * will put this under a lock. - * * Return: true if ref is cleaned up and ready to be freed */ -static bool binder_dec_ref(struct binder_ref *ref, int strong) +static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong) { if (strong) { if (ref->data.strong == 0) { @@ -1487,13 +1487,7 @@ static bool binder_dec_ref(struct binder_ref *ref, int strong) ref->data.weak--; } if (ref->data.strong == 0 && ref->data.weak == 0) { - binder_cleanup_ref(ref); - /* - * TODO: we could kfree(ref) here, but an upcoming - * patch will call this with a lock held, so we - * return an indication that the ref should be - * freed. - */ + binder_cleanup_ref_olocked(ref); return true; } return false; @@ -1518,7 +1512,8 @@ static struct binder_node *binder_get_node_from_ref( struct binder_node *node; struct binder_ref *ref; - ref = binder_get_ref(proc, desc, need_strong_ref); + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, need_strong_ref); if (!ref) goto err_no_ref; node = ref->node; @@ -1529,10 +1524,12 @@ static struct binder_node *binder_get_node_from_ref( binder_inc_node_tmpref(node); if (rdata) *rdata = ref->data; + binder_proc_unlock(proc); return node; err_no_ref: + binder_proc_unlock(proc); return NULL; } @@ -1572,24 +1569,27 @@ static int binder_update_ref_for_handle(struct binder_proc *proc, struct binder_ref *ref; bool delete_ref = false; - ref = binder_get_ref(proc, desc, strong); + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, strong); if (!ref) { ret = -EINVAL; goto err_no_ref; } if (increment) - ret = binder_inc_ref(ref, strong, NULL); + ret = binder_inc_ref_olocked(ref, strong, NULL); else - delete_ref = binder_dec_ref(ref, strong); + delete_ref = binder_dec_ref_olocked(ref, strong); if (rdata) *rdata = ref->data; + binder_proc_unlock(proc); if (delete_ref) binder_free_ref(ref); return ret; err_no_ref: + binder_proc_unlock(proc); return ret; } @@ -1634,15 +1634,19 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, struct binder_ref *new_ref = NULL; int ret = 0; - ref = binder_get_ref_for_node(proc, node, NULL); + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, NULL); if (!ref) { + binder_proc_unlock(proc); new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); if (!new_ref) return -ENOMEM; - ref = binder_get_ref_for_node(proc, node, new_ref); + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, new_ref); } - ret = binder_inc_ref(ref, strong, target_list); + ret = binder_inc_ref_olocked(ref, strong, target_list); *rdata = ref->data; + binder_proc_unlock(proc); if (new_ref && ref != new_ref) /* * Another thread created the ref first so @@ -2498,11 +2502,14 @@ static void binder_transaction(struct binder_proc *proc, * stays alive until the transaction is * done. */ - ref = binder_get_ref(proc, tr->target.handle, true); + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, tr->target.handle, + true); if (ref) { binder_inc_node(ref->node, 1, 0, NULL); target_node = ref->node; } + binder_proc_unlock(proc); if (target_node == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); @@ -3278,7 +3285,7 @@ static int binder_thread_write(struct binder_proc *proc, uint32_t target; binder_uintptr_t cookie; struct binder_ref *ref; - struct binder_ref_death *death; + struct binder_ref_death *death = NULL; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; @@ -3286,7 +3293,29 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - ref = binder_get_ref(proc, target, false); + if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { + /* + * Allocate memory for death notification + * before taking lock + */ + death = kzalloc(sizeof(*death), GFP_KERNEL); + if (death == NULL) { + WARN_ON(thread->return_error.cmd != + BR_OK); + thread->return_error.cmd = BR_ERROR; + binder_enqueue_work( + thread->proc, + &thread->return_error.work, + &thread->todo); + binder_debug( + BINDER_DEBUG_FAILED_TRANSACTION, + "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", + proc->pid, thread->pid); + break; + } + } + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, @@ -3294,6 +3323,8 @@ static int binder_thread_write(struct binder_proc *proc, "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", target); + binder_proc_unlock(proc); + kfree(death); break; } @@ -3311,20 +3342,8 @@ static int binder_thread_write(struct binder_proc *proc, if (ref->death) { binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n", proc->pid, thread->pid); - break; - } - death = kzalloc(sizeof(*death), GFP_KERNEL); - if (death == NULL) { - WARN_ON(thread->return_error.cmd != - BR_OK); - thread->return_error.cmd = BR_ERROR; - binder_enqueue_work( - thread->proc, - &thread->return_error.work, - &thread->todo); - binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", - proc->pid, thread->pid); + binder_proc_unlock(proc); + kfree(death); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -3357,6 +3376,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", proc->pid, thread->pid); binder_node_unlock(ref->node); + binder_proc_unlock(proc); break; } death = ref->death; @@ -3366,6 +3386,7 @@ static int binder_thread_write(struct binder_proc *proc, (u64)death->cookie, (u64)cookie); binder_node_unlock(ref->node); + binder_proc_unlock(proc); break; } ref->death = NULL; @@ -3392,6 +3413,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_inner_proc_unlock(proc); binder_node_unlock(ref->node); } + binder_proc_unlock(proc); } break; case BC_DEAD_BINDER_DONE: { struct binder_work *w; @@ -4602,14 +4624,18 @@ static void binder_deferred_release(struct binder_proc *proc) binder_inner_proc_unlock(proc); outgoing_refs = 0; + binder_proc_lock(proc); while ((n = rb_first(&proc->refs_by_desc))) { struct binder_ref *ref; ref = rb_entry(n, struct binder_ref, rb_node_desc); outgoing_refs++; - binder_cleanup_ref(ref); + binder_cleanup_ref_olocked(ref); + binder_proc_unlock(proc); binder_free_ref(ref); + binder_proc_lock(proc); } + binder_proc_unlock(proc); binder_release_work(proc, &proc->todo); binder_release_work(proc, &proc->delivered_death); @@ -4817,8 +4843,10 @@ static void print_binder_node_nilocked(struct seq_file *m, } } -static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) +static void print_binder_ref_olocked(struct seq_file *m, + struct binder_ref *ref) { + WARN_ON(!spin_is_locked(&ref->proc->outer_lock)); binder_node_lock(ref->node); seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", ref->data.debug_id, ref->data.desc, @@ -4870,11 +4898,14 @@ static void print_binder_proc(struct seq_file *m, binder_put_node(last_node); if (print_all) { + binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) - print_binder_ref(m, rb_entry(n, struct binder_ref, - rb_node_desc)); + print_binder_ref_olocked(m, rb_entry(n, + struct binder_ref, + rb_node_desc)); + binder_proc_unlock(proc); } binder_alloc_print_allocated(m, &proc->alloc); binder_inner_proc_lock(proc); @@ -5014,6 +5045,7 @@ static void print_binder_proc_stats(struct seq_file *m, count = 0; strong = 0; weak = 0; + binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { struct binder_ref *ref = rb_entry(n, struct binder_ref, rb_node_desc); @@ -5021,6 +5053,7 @@ static void print_binder_proc_stats(struct seq_file *m, strong += ref->data.strong; weak += ref->data.weak; } + binder_proc_unlock(proc); seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); count = binder_alloc_get_allocated_count(&proc->alloc); From da957e45dd21fee345544dadf0fc426abe503d28 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 21 Apr 2017 14:32:11 -0700 Subject: [PATCH 037/176] FROMLIST: binder: protect against stale pointers in print_binder_transaction (from https://patchwork.kernel.org/patch/9817761/) When printing transactions there were several race conditions that could cause a stale pointer to be deferenced. Fixed by reading the pointer once and using it if valid (which is safe). The transaction buffer also needed protection via proc lock, so it is only printed if we are holding the correct lock. Bug: 36650912 Test: tested manually Change-Id: I78240f99cc1a070d70a841c0d84d4306e2fd528d Signed-off-by: Todd Kjos --- drivers/android/binder.c | 60 ++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2ef3022d969d..2e5016879654 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4703,35 +4703,52 @@ binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) mutex_unlock(&binder_deferred_lock); } -static void print_binder_transaction(struct seq_file *m, const char *prefix, - struct binder_transaction *t) +static void print_binder_transaction_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + struct binder_transaction *t) { + struct binder_proc *to_proc; + struct binder_buffer *buffer = t->buffer; + + WARN_ON(!spin_is_locked(&proc->inner_lock)); spin_lock(&t->lock); + to_proc = t->to_proc; seq_printf(m, "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, - t->to_proc ? t->to_proc->pid : 0, + to_proc ? to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, t->code, t->flags, t->priority, t->need_reply); spin_unlock(&t->lock); - if (t->buffer == NULL) { + if (proc != to_proc) { + /* + * Can only safely deref buffer if we are holding the + * correct proc inner lock for this node + */ + seq_puts(m, "\n"); + return; + } + + if (buffer == NULL) { seq_puts(m, " buffer free\n"); return; } - if (t->buffer->target_node) - seq_printf(m, " node %d", - t->buffer->target_node->debug_id); + if (buffer->target_node) + seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd data %p\n", - t->buffer->data_size, t->buffer->offsets_size, - t->buffer->data); + buffer->data_size, buffer->offsets_size, + buffer->data); } -static void print_binder_work_ilocked(struct seq_file *m, const char *prefix, - const char *transaction_prefix, - struct binder_work *w) +static void print_binder_work_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + const char *transaction_prefix, + struct binder_work *w) { struct binder_node *node; struct binder_transaction *t; @@ -4739,7 +4756,8 @@ static void print_binder_work_ilocked(struct seq_file *m, const char *prefix, switch (w->type) { case BINDER_WORK_TRANSACTION: t = container_of(w, struct binder_transaction, work); - print_binder_transaction(m, transaction_prefix, t); + print_binder_transaction_ilocked( + m, proc, transaction_prefix, t); break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( @@ -4790,20 +4808,21 @@ static void print_binder_thread_ilocked(struct seq_file *m, t = thread->transaction_stack; while (t) { if (t->from == thread) { - print_binder_transaction(m, - " outgoing transaction", t); + print_binder_transaction_ilocked(m, thread->proc, + " outgoing transaction", t); t = t->from_parent; } else if (t->to_thread == thread) { - print_binder_transaction(m, + print_binder_transaction_ilocked(m, thread->proc, " incoming transaction", t); t = t->to_parent; } else { - print_binder_transaction(m, " bad transaction", t); + print_binder_transaction_ilocked(m, thread->proc, + " bad transaction", t); t = NULL; } } list_for_each_entry(w, &thread->todo, entry) { - print_binder_work_ilocked(m, " ", + print_binder_work_ilocked(m, thread->proc, " ", " pending transaction", w); } if (!print_always && m->count == header_pos) @@ -4838,7 +4857,7 @@ static void print_binder_node_nilocked(struct seq_file *m, seq_puts(m, "\n"); if (node->proc) { list_for_each_entry(w, &node->async_todo, entry) - print_binder_work_ilocked(m, " ", + print_binder_work_ilocked(m, node->proc, " ", " pending async transaction", w); } } @@ -4910,7 +4929,8 @@ static void print_binder_proc(struct seq_file *m, binder_alloc_print_allocated(m, &proc->alloc); binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) - print_binder_work_ilocked(m, " ", " pending transaction", w); + print_binder_work_ilocked(m, proc, " ", + " pending transaction", w); list_for_each_entry(w, &proc->delivered_death, entry) { seq_puts(m, " has delivered dead binder\n"); break; From 6c8ad5b3f0366b8b175d8ee4223fc8dbafc39991 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Mon, 22 May 2017 11:26:23 -0700 Subject: [PATCH 038/176] FROMLIST: binder: fix death race conditions (from https://patchwork.kernel.org/patch/9817765/) A race existed where one thread could register a death notification for a node, while another thread was cleaning up that node and sending out death notifications for its references, causing simultaneous access to ref->death because different locks were held. Test: boots, manual testing Change-Id: Iff73312f34f70374f417beba4c4c82dd33cac119 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 64 ++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2e5016879654..a161e55d1373 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -442,6 +442,7 @@ struct binder_ref_data { * ref for deletion in binder_cleanup_ref, a non-NULL * @node indicates the node must be freed * @death: pointer to death notification (ref_death) if requested + * (protected by @node->lock) * * Structure to track references from procA to target node (on procB). This * structure is unsafe to access without holding @proc->outer_lock. @@ -3338,10 +3339,12 @@ static int binder_thread_write(struct binder_proc *proc, ref->data.desc, ref->data.strong, ref->data.weak, ref->node->debug_id); + binder_node_lock(ref->node); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n", proc->pid, thread->pid); + binder_node_unlock(ref->node); binder_proc_unlock(proc); kfree(death); break; @@ -3350,7 +3353,6 @@ static int binder_thread_write(struct binder_proc *proc, INIT_LIST_HEAD(&death->work.entry); death->cookie = cookie; ref->death = death; - binder_node_lock(ref->node); if (ref->node->proc == NULL) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; if (thread->looper & @@ -3369,9 +3371,7 @@ static int binder_thread_write(struct binder_proc *proc, &proc->wait); } } - binder_node_unlock(ref->node); } else { - binder_node_lock(ref->node); if (ref->death == NULL) { binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", proc->pid, thread->pid); @@ -3411,8 +3411,8 @@ static int binder_thread_write(struct binder_proc *proc, death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; } binder_inner_proc_unlock(proc); - binder_node_unlock(ref->node); } + binder_node_unlock(ref->node); binder_proc_unlock(proc); } break; case BC_DEAD_BINDER_DONE: { @@ -3749,44 +3749,39 @@ retry: case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { struct binder_ref_death *death; uint32_t cmd; + binder_uintptr_t cookie; death = container_of(w, struct binder_ref_death, work); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; else cmd = BR_DEAD_BINDER; - /* - * TODO: there is a race condition between - * death notification requests and delivery - * of the notifications. This will be handled - * in a later patch. - */ - binder_inner_proc_unlock(proc); - if (put_user(cmd, (uint32_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(uint32_t); - if (put_user(death->cookie, - (binder_uintptr_t __user *)ptr)) - return -EFAULT; - ptr += sizeof(binder_uintptr_t); - binder_stat_br(proc, thread, cmd); + cookie = death->cookie; + binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "%d:%d %s %016llx\n", proc->pid, thread->pid, cmd == BR_DEAD_BINDER ? "BR_DEAD_BINDER" : "BR_CLEAR_DEATH_NOTIFICATION_DONE", - (u64)death->cookie); - + (u64)cookie); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { + binder_inner_proc_unlock(proc); kfree(death); binder_stats_deleted(BINDER_STAT_DEATH); } else { - binder_inner_proc_lock(proc); binder_enqueue_work_ilocked( w, &proc->delivered_death); binder_inner_proc_unlock(proc); } + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (put_user(cookie, + (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + binder_stat_br(proc, thread, cmd); if (cmd == BR_DEAD_BINDER) goto done; /* DEAD_BINDER notifications can cause transactions */ } break; @@ -4536,20 +4531,25 @@ static int binder_node_release(struct binder_node *node, int refs) hlist_for_each_entry(ref, &node->refs, node_entry) { refs++; - - if (!ref->death) + /* + * Need the node lock to synchronize + * with new notification requests and the + * inner lock to synchronize with queued + * death notifications. + */ + binder_inner_proc_lock(ref->proc); + if (!ref->death) { + binder_inner_proc_unlock(ref->proc); continue; + } death++; - binder_inner_proc_lock(ref->proc); - if (list_empty(&ref->death->work.entry)) { - ref->death->work.type = BINDER_WORK_DEAD_BINDER; - binder_enqueue_work_ilocked(&ref->death->work, - &ref->proc->todo); - wake_up_interruptible(&ref->proc->wait); - } else - BUG(); + BUG_ON(!list_empty(&ref->death->work.entry)); + ref->death->work.type = BINDER_WORK_DEAD_BINDER; + binder_enqueue_work_ilocked(&ref->death->work, + &ref->proc->todo); + wake_up_interruptible(&ref->proc->wait); binder_inner_proc_unlock(ref->proc); } From 8881f118f526914c4705db04e6ea2e8e5f7023d4 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 14 Nov 2016 11:37:41 -0800 Subject: [PATCH 039/176] FROMLIST: binder: remove global binder lock (from https://patchwork.kernel.org/patch/9817773/) Remove global mutex and rely on fine-grained locking Change-Id: Idd1ae2e52d654e5dd76d443a1ff97522e687fd4c Signed-off-by: Todd Kjos --- drivers/android/binder.c | 46 +++------------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a161e55d1373..5753665f76ee 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -79,8 +79,6 @@ #include "binder_alloc.h" #include "binder_trace.h" -static DEFINE_MUTEX(binder_main_lock); - static HLIST_HEAD(binder_deferred_list); static DEFINE_MUTEX(binder_deferred_lock); @@ -924,19 +922,6 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) return retval; } -static inline void binder_lock(const char *tag) -{ - trace_binder_lock(tag); - mutex_lock(&binder_main_lock); - trace_binder_locked(tag); -} - -static inline void binder_unlock(const char *tag) -{ - trace_binder_unlock(tag); - mutex_unlock(&binder_main_lock); -} - static void binder_set_nice(long nice) { long min_nice; @@ -3558,8 +3543,6 @@ retry: thread->looper |= BINDER_LOOPER_STATE_WAITING; - binder_unlock(__func__); - trace_binder_wait_for_work(wait_for_proc_work, !!thread->transaction_stack, !binder_worklist_empty(proc, &thread->todo)); @@ -3585,8 +3568,6 @@ retry: ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } - binder_lock(__func__); - binder_inner_proc_lock(proc); if (wait_for_proc_work) proc->ready_threads--; @@ -4102,8 +4083,6 @@ static unsigned int binder_poll(struct file *filp, struct binder_thread *thread = NULL; int wait_for_proc_work; - binder_lock(__func__); - thread = binder_get_thread(proc); binder_inner_proc_lock(thread->proc); @@ -4111,8 +4090,6 @@ static unsigned int binder_poll(struct file *filp, binder_worklist_empty_ilocked(&thread->todo); binder_inner_proc_unlock(thread->proc); - binder_unlock(__func__); - if (wait_for_proc_work) { if (binder_has_proc_work(proc, thread)) return POLLIN; @@ -4257,7 +4234,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) goto err_unlocked; - binder_lock(__func__); thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; @@ -4316,7 +4292,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err: if (thread) thread->looper_need_return = false; - binder_unlock(__func__); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); @@ -4422,15 +4397,11 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); - binder_lock(__func__); - binder_stats_created(BINDER_STAT_PROC); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); filp->private_data = proc; - binder_unlock(__func__); - mutex_lock(&binder_procs_lock); hlist_add_head(&proc->proc_node, &binder_procs); mutex_unlock(&binder_procs_lock); @@ -4656,7 +4627,6 @@ static void binder_deferred_func(struct work_struct *work) int defer; do { - binder_lock(__func__); mutex_lock(&binder_deferred_lock); if (!hlist_empty(&binder_deferred_list)) { proc = hlist_entry(binder_deferred_list.first, @@ -4683,7 +4653,6 @@ static void binder_deferred_func(struct work_struct *work) if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ - binder_unlock(__func__); if (files) put_files_struct(files); } while (proc); @@ -5098,8 +5067,6 @@ static int binder_state_show(struct seq_file *m, void *unused) struct binder_node *node; struct binder_node *last_node = NULL; - binder_lock(__func__); - seq_puts(m, "binder state:\n"); spin_lock(&binder_dead_nodes_lock); @@ -5129,7 +5096,7 @@ static int binder_state_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); mutex_unlock(&binder_procs_lock); - binder_unlock(__func__); + return 0; } @@ -5137,8 +5104,6 @@ static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - binder_lock(__func__); - seq_puts(m, "binder stats:\n"); print_binder_stats(m, "", &binder_stats); @@ -5147,7 +5112,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); mutex_unlock(&binder_procs_lock); - binder_unlock(__func__); + return 0; } @@ -5155,14 +5120,12 @@ static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - binder_lock(__func__); - seq_puts(m, "binder transactions:\n"); mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); mutex_unlock(&binder_procs_lock); - binder_unlock(__func__); + return 0; } @@ -5171,8 +5134,6 @@ static int binder_proc_show(struct seq_file *m, void *unused) struct binder_proc *itr; int pid = (unsigned long)m->private; - binder_lock(__func__); - mutex_lock(&binder_procs_lock); hlist_for_each_entry(itr, &binder_procs, proc_node) { if (itr->pid == pid) { @@ -5182,7 +5143,6 @@ static int binder_proc_show(struct seq_file *m, void *unused) } mutex_unlock(&binder_procs_lock); - binder_unlock(__func__); return 0; } From c9cd6356f9631c9489b850c3e04d15071899fa6b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 2 Jun 2017 11:15:44 -0700 Subject: [PATCH 040/176] ANDROID: binder: remove proc waitqueue Removes the process waitqueue, so that threads can only wait on the thread waitqueue. Whenever there is process work to do, pick a thread and wake it up. This also fixes an issue with using epoll(), since we no longer have to block on different waitqueues. Bug: 34461621 Change-Id: I2950b9de6fa078ee72d53c667a03cbaf587f0849 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 257 +++++++++++++++++++++++++++------------ 1 file changed, 182 insertions(+), 75 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5753665f76ee..05185b7b1343 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -28,10 +28,10 @@ * binder_node_lock() and binder_node_unlock() are * used to acq/rel * 3) proc->inner_lock : protects the thread and node lists - * (proc->threads, proc->nodes) and all todo lists associated - * with the binder_proc (proc->todo, thread->todo, - * proc->delivered_death and node->async_todo), as well as - * thread->transaction_stack + * (proc->threads, proc->waiting_threads, proc->nodes) + * and all todo lists associated with the binder_proc + * (proc->todo, thread->todo, proc->delivered_death and + * node->async_todo), as well as thread->transaction_stack * binder_inner_proc_lock() and binder_inner_proc_unlock() * are used to acq/rel * @@ -477,6 +477,8 @@ enum binder_deferred_state { * (protected by @outer_lock) * @refs_by_node: rbtree of refs ordered by ref->node * (protected by @outer_lock) + * @waiting_threads: threads currently waiting for proc work + * (protected by @inner_lock) * @pid PID of group_leader of process * (invariant after initialized) * @tsk task_struct for group_leader of process @@ -506,8 +508,6 @@ enum binder_deferred_state { * (protected by @inner_lock) * @requested_threads_started: number binder threads started * (protected by @inner_lock) - * @ready_threads: number of threads waiting for proc work - * (protected by @inner_lock) * @tmp_ref: temporary reference to indicate proc is in use * (protected by @inner_lock) * @default_priority: default scheduler priority @@ -528,6 +528,7 @@ struct binder_proc { struct rb_root nodes; struct rb_root refs_by_desc; struct rb_root refs_by_node; + struct list_head waiting_threads; int pid; struct task_struct *tsk; struct files_struct *files; @@ -542,7 +543,6 @@ struct binder_proc { int max_threads; int requested_threads; int requested_threads_started; - int ready_threads; int tmp_ref; long default_priority; struct dentry *debugfs_entry; @@ -558,6 +558,7 @@ enum { BINDER_LOOPER_STATE_EXITED = 0x04, BINDER_LOOPER_STATE_INVALID = 0x08, BINDER_LOOPER_STATE_WAITING = 0x10, + BINDER_LOOPER_STATE_POLL = 0x20, }; /** @@ -566,6 +567,8 @@ enum { * (invariant after initialization) * @rb_node: element for proc->threads rbtree * (protected by @proc->inner_lock) + * @waiting_thread_node: element for @proc->waiting_threads list + * (protected by @proc->inner_lock) * @pid: PID for this thread * (invariant after initialization) * @looper: bitmap of looping state @@ -595,6 +598,7 @@ enum { struct binder_thread { struct binder_proc *proc; struct rb_node rb_node; + struct list_head waiting_thread_node; int pid; int looper; /* only modified by this thread */ bool looper_need_return; /* can be written by other thread */ @@ -922,6 +926,86 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) return retval; } +static bool binder_has_work_ilocked(struct binder_thread *thread, + bool do_proc_work) +{ + return !binder_worklist_empty_ilocked(&thread->todo) || + thread->looper_need_return || + (do_proc_work && + !binder_worklist_empty_ilocked(&thread->proc->todo)); +} + +static bool binder_has_work(struct binder_thread *thread, bool do_proc_work) +{ + bool has_work; + + binder_inner_proc_lock(thread->proc); + has_work = binder_has_work_ilocked(thread, do_proc_work); + binder_inner_proc_unlock(thread->proc); + + return has_work; +} + +static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread) +{ + return !thread->transaction_stack && + binder_worklist_empty_ilocked(&thread->todo) && + (thread->looper & (BINDER_LOOPER_STATE_ENTERED | + BINDER_LOOPER_STATE_REGISTERED)); +} + +static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc, + bool sync) +{ + struct rb_node *n; + struct binder_thread *thread; + + for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->looper & BINDER_LOOPER_STATE_POLL && + binder_available_for_proc_work_ilocked(thread)) { + if (sync) + wake_up_interruptible_sync(&thread->wait); + else + wake_up_interruptible(&thread->wait); + } + } +} + +static void binder_wakeup_proc_ilocked(struct binder_proc *proc, bool sync) +{ + struct binder_thread *thread; + + BUG_ON(!spin_is_locked(&proc->inner_lock)); + thread = list_first_entry_or_null(&proc->waiting_threads, + struct binder_thread, + waiting_thread_node); + + if (thread) { + list_del_init(&thread->waiting_thread_node); + if (sync) + wake_up_interruptible_sync(&thread->wait); + else + wake_up_interruptible(&thread->wait); + return; + } + + /* Didn't find a thread waiting for proc work; this can happen + * in two scenarios: + * 1. All threads are busy handling transactions + * In that case, one of those threads should call back into + * the kernel driver soon and pick up this work. + * 2. Threads are using the (e)poll interface, in which case + * they may be blocked on the waitqueue without having been + * added to waiting_threads. For this case, we just iterate + * over all threads not handling transaction work, and + * wake them all up. We wake all because we don't know whether + * a thread that called into (e)poll is handling non-binder + * work currently. + */ + binder_wakeup_poll_threads_ilocked(proc, sync); +} + static void binder_set_nice(long nice) { long min_nice; @@ -1141,7 +1225,7 @@ static bool binder_dec_node_nilocked(struct binder_node *node, if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { binder_enqueue_work_ilocked(&node->work, &proc->todo); - wake_up_interruptible(&node->proc->wait); + binder_wakeup_proc_ilocked(proc, false); } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && @@ -2402,7 +2486,6 @@ static void binder_transaction(struct binder_proc *proc, struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; struct list_head *target_list; - wait_queue_head_t *target_wait; struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error = 0; @@ -2412,6 +2495,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; int t_debug_id = atomic_inc_return(&binder_last_id); + bool wakeup_for_proc_work = false; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -2575,10 +2659,9 @@ static void binder_transaction(struct binder_proc *proc, if (target_thread) { e->to_thread = target_thread->pid; target_list = &target_thread->todo; - target_wait = &target_thread->wait; } else { target_list = &target_proc->todo; - target_wait = &target_proc->wait; + wakeup_for_proc_work = true; } e->to_proc = target_proc->pid; @@ -2885,7 +2968,7 @@ static void binder_transaction(struct binder_proc *proc, binder_node_lock(target_node); if (target_node->has_async_transaction) { target_list = &target_node->async_todo; - target_wait = NULL; + wakeup_for_proc_work = false; } else target_node->has_async_transaction = 1; /* @@ -2904,11 +2987,13 @@ static void binder_transaction(struct binder_proc *proc, binder_inner_proc_unlock(target_proc); binder_node_unlock(target_node); } - if (target_wait) { - if (reply || !(tr->flags & TF_ONE_WAY)) - wake_up_interruptible_sync(target_wait); - else - wake_up_interruptible(target_wait); + if (target_thread) { + wake_up_interruptible_sync(&target_thread->wait); + } else if (wakeup_for_proc_work) { + binder_inner_proc_lock(target_proc); + binder_wakeup_proc_ilocked(target_proc, + !(tr->flags & TF_ONE_WAY)); + binder_inner_proc_unlock(target_proc); } if (target_thread) binder_thread_dec_tmpref(target_thread); @@ -3348,12 +3433,14 @@ static int binder_thread_write(struct binder_proc *proc, &ref->death->work, &thread->todo); else { - binder_enqueue_work( - proc, + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked( &ref->death->work, &proc->todo); - wake_up_interruptible( - &proc->wait); + binder_wakeup_proc_ilocked( + proc, + false); + binder_inner_proc_unlock(proc); } } } else { @@ -3388,8 +3475,9 @@ static int binder_thread_write(struct binder_proc *proc, binder_enqueue_work_ilocked( &death->work, &proc->todo); - wake_up_interruptible( - &proc->wait); + binder_wakeup_proc_ilocked( + proc, + false); } } else { BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); @@ -3444,7 +3532,8 @@ static int binder_thread_write(struct binder_proc *proc, binder_enqueue_work_ilocked( &death->work, &proc->todo); - wake_up_interruptible(&proc->wait); + binder_wakeup_proc_ilocked( + proc, false); } } binder_inner_proc_unlock(proc); @@ -3471,13 +3560,6 @@ static void binder_stat_br(struct binder_proc *proc, } } -static int binder_has_proc_work(struct binder_proc *proc, - struct binder_thread *thread) -{ - return !binder_worklist_empty(proc, &proc->todo) || - thread->looper_need_return; -} - static int binder_has_thread_work(struct binder_thread *thread) { return !binder_worklist_empty(thread->proc, &thread->todo) || @@ -3515,6 +3597,38 @@ static int binder_put_node_cmd(struct binder_proc *proc, return 0; } +static int binder_wait_for_work(struct binder_thread *thread, + bool do_proc_work) +{ + DEFINE_WAIT(wait); + struct binder_proc *proc = thread->proc; + int ret = 0; + + freezer_do_not_count(); + binder_inner_proc_lock(proc); + for (;;) { + prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE); + if (binder_has_work_ilocked(thread, do_proc_work)) + break; + if (do_proc_work) + list_add(&thread->waiting_thread_node, + &proc->waiting_threads); + binder_inner_proc_unlock(proc); + schedule(); + binder_inner_proc_lock(proc); + list_del_init(&thread->waiting_thread_node); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + finish_wait(&thread->wait, &wait); + binder_inner_proc_unlock(proc); + freezer_count(); + + return ret; +} + static int binder_thread_read(struct binder_proc *proc, struct binder_thread *thread, binder_uintptr_t binder_buffer, size_t size, @@ -3535,10 +3649,7 @@ static int binder_thread_read(struct binder_proc *proc, retry: binder_inner_proc_lock(proc); - wait_for_proc_work = thread->transaction_stack == NULL && - binder_worklist_empty_ilocked(&thread->todo); - if (wait_for_proc_work) - proc->ready_threads++; + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); binder_inner_proc_unlock(proc); thread->looper |= BINDER_LOOPER_STATE_WAITING; @@ -3555,23 +3666,15 @@ retry: binder_stop_on_user_error < 2); } binder_set_nice(proc->default_priority); - if (non_block) { - if (!binder_has_proc_work(proc, thread)) - ret = -EAGAIN; - } else - ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); - } else { - if (non_block) { - if (!binder_has_thread_work(thread)) - ret = -EAGAIN; - } else - ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } - binder_inner_proc_lock(proc); - if (wait_for_proc_work) - proc->ready_threads--; - binder_inner_proc_unlock(proc); + if (non_block) { + if (!binder_has_work(thread, wait_for_proc_work)) + ret = -EAGAIN; + } else { + ret = binder_wait_for_work(thread, wait_for_proc_work); + } + thread->looper &= ~BINDER_LOOPER_STATE_WAITING; if (ret) @@ -3857,7 +3960,8 @@ done: *consumed = ptr - buffer; binder_inner_proc_lock(proc); - if (proc->requested_threads + proc->ready_threads == 0 && + if (proc->requested_threads == 0 && + list_empty(&thread->proc->waiting_threads) && proc->requested_threads_started < proc->max_threads && (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */ @@ -3968,7 +4072,7 @@ static struct binder_thread *binder_get_thread_ilocked( thread->return_error.cmd = BR_OK; thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; thread->reply_error.cmd = BR_OK; - + INIT_LIST_HEAD(&new_thread->waiting_thread_node); return thread; } @@ -4081,28 +4185,24 @@ static unsigned int binder_poll(struct file *filp, { struct binder_proc *proc = filp->private_data; struct binder_thread *thread = NULL; - int wait_for_proc_work; + bool wait_for_proc_work; thread = binder_get_thread(proc); binder_inner_proc_lock(thread->proc); - wait_for_proc_work = thread->transaction_stack == NULL && - binder_worklist_empty_ilocked(&thread->todo); + thread->looper |= BINDER_LOOPER_STATE_POLL; + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); + binder_inner_proc_unlock(thread->proc); - if (wait_for_proc_work) { - if (binder_has_proc_work(proc, thread)) - return POLLIN; - poll_wait(filp, &proc->wait, wait); - if (binder_has_proc_work(proc, thread)) - return POLLIN; - } else { - if (binder_has_thread_work(thread)) - return POLLIN; - poll_wait(filp, &thread->wait, wait); - if (binder_has_thread_work(thread)) - return POLLIN; - } + if (binder_has_work(thread, wait_for_proc_work)) + return POLLIN; + + poll_wait(filp, &thread->wait, wait); + + if (binder_has_thread_work(thread)) + return POLLIN; + return 0; } @@ -4149,8 +4249,10 @@ static int binder_ioctl_write_read(struct file *filp, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); - if (!binder_worklist_empty(proc, &proc->todo)) - wake_up_interruptible(&proc->wait); + binder_inner_proc_lock(proc); + if (!binder_worklist_empty_ilocked(&proc->todo)) + binder_wakeup_proc_ilocked(proc, false); + binder_inner_proc_unlock(proc); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; @@ -4390,7 +4492,6 @@ static int binder_open(struct inode *nodp, struct file *filp) get_task_struct(current->group_leader); proc->tsk = current->group_leader; INIT_LIST_HEAD(&proc->todo); - init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); binder_dev = container_of(filp->private_data, struct binder_device, miscdev); @@ -4400,6 +4501,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_stats_created(BINDER_STAT_PROC); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); + INIT_LIST_HEAD(&proc->waiting_threads); filp->private_data = proc; mutex_lock(&binder_procs_lock); @@ -4451,7 +4553,6 @@ static void binder_deferred_flush(struct binder_proc *proc) } } binder_inner_proc_unlock(proc); - wake_up_interruptible_all(&proc->wait); binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_flush: %d woke %d threads\n", proc->pid, @@ -4520,7 +4621,7 @@ static int binder_node_release(struct binder_node *node, int refs) ref->death->work.type = BINDER_WORK_DEAD_BINDER; binder_enqueue_work_ilocked(&ref->death->work, &ref->proc->todo); - wake_up_interruptible(&ref->proc->wait); + binder_wakeup_proc_ilocked(ref->proc, false); binder_inner_proc_unlock(ref->proc); } @@ -5008,23 +5109,29 @@ static void print_binder_proc_stats(struct seq_file *m, struct binder_proc *proc) { struct binder_work *w; + struct binder_thread *thread; struct rb_node *n; - int count, strong, weak; + int count, strong, weak, ready_threads; size_t free_async_space = binder_alloc_get_free_async_space(&proc->alloc); seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); count = 0; + ready_threads = 0; binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; + + list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node) + ready_threads++; + seq_printf(m, " threads: %d\n", count); seq_printf(m, " requested threads: %d+%d/%d\n" " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, - proc->ready_threads, + ready_threads, free_async_space); count = 0; for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) From 5347bf52735ec797553aa11189f03742f4015606 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 6 Jun 2017 15:17:46 -0700 Subject: [PATCH 041/176] ANDROID: binder: push new transactions to waiting threads. Instead of pushing new transactions to the process waitqueue, select a thread that is waiting on proc work to handle the transaction. This will make it easier to improve priority inheritance in future patches, by setting the priority before we wake up a thread. If we can't find a waiting thread, submit the work to the proc waitqueue instead as we did previously. Change-Id: I23cbfcca867bed7b86007e22137d0a8fad4b4001 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 181 +++++++++++++++++++++++++++------------ 1 file changed, 127 insertions(+), 54 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 05185b7b1343..e2daf6b7cbd0 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -972,7 +972,20 @@ static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc, } } -static void binder_wakeup_proc_ilocked(struct binder_proc *proc, bool sync) +/** + * binder_select_thread_ilocked() - selects a thread for doing proc work. + * @proc: process to select a thread from + * + * Note that calling this function moves the thread off the waiting_threads + * list, so it can only be woken up by the caller of this function, or a + * signal. Therefore, callers *should* always wake up the thread this function + * returns. + * + * Return: If there's a thread currently waiting for process work, + * returns that thread. Otherwise returns NULL. + */ +static struct binder_thread * +binder_select_thread_ilocked(struct binder_proc *proc) { struct binder_thread *thread; @@ -981,8 +994,35 @@ static void binder_wakeup_proc_ilocked(struct binder_proc *proc, bool sync) struct binder_thread, waiting_thread_node); - if (thread) { + if (thread) list_del_init(&thread->waiting_thread_node); + + return thread; +} + +/** + * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work. + * @proc: process to wake up a thread in + * @thread: specific thread to wake-up (may be NULL) + * @sync: whether to do a synchronous wake-up + * + * This function wakes up a thread in the @proc process. + * The caller may provide a specific thread to wake-up in + * the @thread parameter. If @thread is NULL, this function + * will wake up threads that have called poll(). + * + * Note that for this function to work as expected, callers + * should first call binder_select_thread() to find a thread + * to handle the work (if they don't have a thread already), + * and pass the result into the @thread parameter. + */ +static void binder_wakeup_thread_ilocked(struct binder_proc *proc, + struct binder_thread *thread, + bool sync) +{ + BUG_ON(!spin_is_locked(&proc->inner_lock)); + + if (thread) { if (sync) wake_up_interruptible_sync(&thread->wait); else @@ -1006,6 +1046,13 @@ static void binder_wakeup_proc_ilocked(struct binder_proc *proc, bool sync) binder_wakeup_poll_threads_ilocked(proc, sync); } +static void binder_wakeup_proc_ilocked(struct binder_proc *proc) +{ + struct binder_thread *thread = binder_select_thread_ilocked(proc); + + binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); +} + static void binder_set_nice(long nice) { long min_nice; @@ -1225,7 +1272,7 @@ static bool binder_dec_node_nilocked(struct binder_node *node, if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { binder_enqueue_work_ilocked(&node->work, &proc->todo); - binder_wakeup_proc_ilocked(proc, false); + binder_wakeup_proc_ilocked(proc); } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && @@ -2471,6 +2518,73 @@ static int binder_fixup_parent(struct binder_transaction *t, return 0; } +/** + * binder_proc_transaction() - sends a transaction to a process and wakes it up + * @t: transaction to send + * @proc: process to send the transaction to + * @thread: thread in @proc to send the transaction to (may be NULL) + * + * This function queues a transaction to the specified process. It will try + * to find a thread in the target process to handle the transaction and + * wake it up. If no thread is found, the work is queued to the proc + * waitqueue. + * + * If the @thread parameter is not NULL, the transaction is always queued + * to the waitlist of that specific thread. + * + * Return: true if the transactions was successfully queued + * false if the target process or thread is dead + */ +static bool binder_proc_transaction(struct binder_transaction *t, + struct binder_proc *proc, + struct binder_thread *thread) +{ + struct list_head *target_list = NULL; + struct binder_node *node = t->buffer->target_node; + bool oneway = !!(t->flags & TF_ONE_WAY); + bool wakeup = true; + + BUG_ON(!node); + binder_node_lock(node); + if (oneway) { + BUG_ON(thread); + if (node->has_async_transaction) { + target_list = &node->async_todo; + wakeup = false; + } else { + node->has_async_transaction = 1; + } + } + + binder_inner_proc_lock(proc); + + if (proc->is_dead || (thread && thread->is_dead)) { + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + return false; + } + + if (!thread && !target_list) + thread = binder_select_thread_ilocked(proc); + + if (thread) + target_list = &thread->todo; + else if (!target_list) + target_list = &proc->todo; + else + BUG_ON(target_list != &node->async_todo); + + binder_enqueue_work_ilocked(&t->work, target_list); + + if (wakeup) + binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); + + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + + return true; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -2485,7 +2599,6 @@ static void binder_transaction(struct binder_proc *proc, struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; - struct list_head *target_list; struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error = 0; @@ -2495,7 +2608,6 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; int t_debug_id = atomic_inc_return(&binder_last_id); - bool wakeup_for_proc_work = false; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -2656,13 +2768,8 @@ static void binder_transaction(struct binder_proc *proc, } binder_inner_proc_unlock(proc); } - if (target_thread) { + if (target_thread) e->to_thread = target_thread->pid; - target_list = &target_thread->todo; - } else { - target_list = &target_proc->todo; - wakeup_for_proc_work = true; - } e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ @@ -2941,8 +3048,9 @@ static void binder_transaction(struct binder_proc *proc, } BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); - binder_enqueue_work_ilocked(&t->work, target_list); + binder_enqueue_work_ilocked(&t->work, &target_thread->todo); binder_inner_proc_unlock(target_proc); + wake_up_interruptible_sync(&target_thread->wait); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); @@ -2951,49 +3059,17 @@ static void binder_transaction(struct binder_proc *proc, t->from_parent = thread->transaction_stack; thread->transaction_stack = t; binder_inner_proc_unlock(proc); - binder_inner_proc_lock(target_proc); - if (target_proc->is_dead || - (target_thread && target_thread->is_dead)) { - binder_inner_proc_unlock(target_proc); + if (!binder_proc_transaction(t, target_proc, target_thread)) { binder_inner_proc_lock(proc); binder_pop_transaction_ilocked(thread, t); binder_inner_proc_unlock(proc); goto err_dead_proc_or_thread; } - binder_enqueue_work_ilocked(&t->work, target_list); - binder_inner_proc_unlock(target_proc); } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); - binder_node_lock(target_node); - if (target_node->has_async_transaction) { - target_list = &target_node->async_todo; - wakeup_for_proc_work = false; - } else - target_node->has_async_transaction = 1; - /* - * Test/set of has_async_transaction - * must be atomic with enqueue on - * async_todo - */ - binder_inner_proc_lock(target_proc); - if (target_proc->is_dead || - (target_thread && target_thread->is_dead)) { - binder_inner_proc_unlock(target_proc); - binder_node_unlock(target_node); + if (!binder_proc_transaction(t, target_proc, NULL)) goto err_dead_proc_or_thread; - } - binder_enqueue_work_ilocked(&t->work, target_list); - binder_inner_proc_unlock(target_proc); - binder_node_unlock(target_node); - } - if (target_thread) { - wake_up_interruptible_sync(&target_thread->wait); - } else if (wakeup_for_proc_work) { - binder_inner_proc_lock(target_proc); - binder_wakeup_proc_ilocked(target_proc, - !(tr->flags & TF_ONE_WAY)); - binder_inner_proc_unlock(target_proc); } if (target_thread) binder_thread_dec_tmpref(target_thread); @@ -3438,8 +3514,7 @@ static int binder_thread_write(struct binder_proc *proc, &ref->death->work, &proc->todo); binder_wakeup_proc_ilocked( - proc, - false); + proc); binder_inner_proc_unlock(proc); } } @@ -3476,8 +3551,7 @@ static int binder_thread_write(struct binder_proc *proc, &death->work, &proc->todo); binder_wakeup_proc_ilocked( - proc, - false); + proc); } } else { BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); @@ -3532,8 +3606,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_enqueue_work_ilocked( &death->work, &proc->todo); - binder_wakeup_proc_ilocked( - proc, false); + binder_wakeup_proc_ilocked(proc); } } binder_inner_proc_unlock(proc); @@ -4251,7 +4324,7 @@ static int binder_ioctl_write_read(struct file *filp, trace_binder_read_done(ret); binder_inner_proc_lock(proc); if (!binder_worklist_empty_ilocked(&proc->todo)) - binder_wakeup_proc_ilocked(proc, false); + binder_wakeup_proc_ilocked(proc); binder_inner_proc_unlock(proc); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) @@ -4621,7 +4694,7 @@ static int binder_node_release(struct binder_node *node, int refs) ref->death->work.type = BINDER_WORK_DEAD_BINDER; binder_enqueue_work_ilocked(&ref->death->work, &ref->proc->todo); - binder_wakeup_proc_ilocked(ref->proc, false); + binder_wakeup_proc_ilocked(ref->proc); binder_inner_proc_unlock(ref->proc); } From d30e6a877a84916efb17836566aeecf2ce0d6e9f Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 6 Jun 2017 17:04:42 -0700 Subject: [PATCH 042/176] ANDROID: binder: add support for RT prio inheritance. Adds support for SCHED_BATCH/SCHED_FIFO/SCHED_RR priority inheritance. Change-Id: I71f356e476be2933713a0ecfa2cc31aa141e2dc6 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 163 ++++++++++++++++++++++++++++++++------- 1 file changed, 134 insertions(+), 29 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e2daf6b7cbd0..85a4651361b6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -465,6 +465,22 @@ enum binder_deferred_state { BINDER_DEFERRED_RELEASE = 0x04, }; +/** + * struct binder_priority - scheduler policy and priority + * @sched_policy scheduler policy + * @prio [100..139] for SCHED_NORMAL, [0..99] for FIFO/RT + * + * The binder driver supports inheriting the following scheduler policies: + * SCHED_NORMAL + * SCHED_BATCH + * SCHED_FIFO + * SCHED_RR + */ +struct binder_priority { + unsigned int sched_policy; + int prio; +}; + /** * struct binder_proc - binder process bookkeeping * @proc_node: element for binder_procs list @@ -544,7 +560,7 @@ struct binder_proc { int requested_threads; int requested_threads_started; int tmp_ref; - long default_priority; + struct binder_priority default_priority; struct dentry *debugfs_entry; struct binder_alloc alloc; struct binder_context *context; @@ -626,8 +642,8 @@ struct binder_transaction { struct binder_buffer *buffer; unsigned int code; unsigned int flags; - long priority; - long saved_priority; + struct binder_priority priority; + struct binder_priority saved_priority; kuid_t sender_euid; /** * @lock: protects @from, @to_proc, and @to_thread @@ -1053,22 +1069,93 @@ static void binder_wakeup_proc_ilocked(struct binder_proc *proc) binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); } -static void binder_set_nice(long nice) +static bool is_rt_policy(int policy) { - long min_nice; + return policy == SCHED_FIFO || policy == SCHED_RR; +} - if (can_nice(current, nice)) { - set_user_nice(current, nice); +static bool is_fair_policy(int policy) +{ + return policy == SCHED_NORMAL || policy == SCHED_BATCH; +} + +static bool binder_supported_policy(int policy) +{ + return is_fair_policy(policy) || is_rt_policy(policy); +} + +static int to_userspace_prio(int policy, int kernel_priority) +{ + if (is_fair_policy(policy)) + return PRIO_TO_NICE(kernel_priority); + else + return MAX_USER_RT_PRIO - 1 - kernel_priority; +} + +static int to_kernel_prio(int policy, int user_priority) +{ + if (is_fair_policy(policy)) + return NICE_TO_PRIO(user_priority); + else + return MAX_USER_RT_PRIO - 1 - user_priority; +} + +static void binder_set_priority(struct task_struct *task, + struct binder_priority desired) +{ + int priority; /* user-space prio value */ + bool has_cap_nice; + unsigned int policy = desired.sched_policy; + + if (task->policy == policy && task->normal_prio == desired.prio) return; + + has_cap_nice = has_capability_noaudit(task, CAP_SYS_NICE); + + priority = to_userspace_prio(policy, desired.prio); + + if (is_rt_policy(policy) && !has_cap_nice) { + long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO); + + if (max_rtprio == 0) { + policy = SCHED_NORMAL; + priority = MIN_NICE; + } else if (priority > max_rtprio) { + priority = max_rtprio; + } } - min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur); - binder_debug(BINDER_DEBUG_PRIORITY_CAP, - "%d: nice value %ld not allowed use %ld instead\n", - current->pid, nice, min_nice); - set_user_nice(current, min_nice); - if (min_nice <= MAX_NICE) - return; - binder_user_error("%d RLIMIT_NICE not set\n", current->pid); + + if (is_fair_policy(policy) && !has_cap_nice) { + long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE)); + + if (min_nice > MAX_NICE) { + binder_user_error("%d RLIMIT_NICE not set\n", + task->pid); + return; + } else if (priority < min_nice) { + priority = min_nice; + } + } + + if (policy != desired.sched_policy || + to_kernel_prio(policy, priority) != desired.prio) + binder_debug(BINDER_DEBUG_PRIORITY_CAP, + "%d: priority %d not allowed, using %d instead\n", + task->pid, desired.prio, + to_kernel_prio(policy, priority)); + + /* Set the actual priority */ + if (task->policy != policy || is_rt_policy(policy)) { + struct sched_param params; + + params.sched_priority = is_rt_policy(policy) ? priority : 0; + + sched_setscheduler_nocheck(task, + policy | SCHED_RESET_ON_FORK, + ¶ms); + } + if (is_fair_policy(policy)) + set_user_nice(task, priority); } static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, @@ -1152,7 +1239,8 @@ static struct binder_node *binder_init_node_ilocked( node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; - node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->min_priority = NICE_TO_PRIO( + flags & FLAT_BINDER_FLAG_PRIORITY_MASK); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); @@ -2649,7 +2737,7 @@ static void binder_transaction(struct binder_proc *proc, } thread->transaction_stack = in_reply_to->to_parent; binder_inner_proc_unlock(proc); - binder_set_nice(in_reply_to->saved_priority); + binder_set_priority(current, in_reply_to->saved_priority); target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; @@ -2822,7 +2910,15 @@ static void binder_transaction(struct binder_proc *proc, t->to_thread = target_thread; t->code = tr->code; t->flags = tr->flags; - t->priority = task_nice(current); + if (!(t->flags & TF_ONE_WAY) && + binder_supported_policy(current->policy)) { + /* Inherit supported policies for synchronous transactions */ + t->priority.sched_policy = current->policy; + t->priority.prio = current->normal_prio; + } else { + /* Otherwise, fall back to the default priority */ + t->priority = target_proc->default_priority; + } trace_binder_transaction(reply, t, target_node); @@ -3738,7 +3834,7 @@ retry: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); } - binder_set_nice(proc->default_priority); + binder_set_priority(current, proc->default_priority); } if (non_block) { @@ -3950,16 +4046,17 @@ retry: BUG_ON(t->buffer == NULL); if (t->buffer->target_node) { struct binder_node *target_node = t->buffer->target_node; + struct binder_priority prio = t->priority; tr.target.ptr = target_node->ptr; tr.cookie = target_node->cookie; - t->saved_priority = task_nice(current); - if (t->priority < target_node->min_priority && - !(t->flags & TF_ONE_WAY)) - binder_set_nice(t->priority); - else if (!(t->flags & TF_ONE_WAY) || - t->saved_priority > target_node->min_priority) - binder_set_nice(target_node->min_priority); + t->saved_priority.sched_policy = current->policy; + t->saved_priority.prio = current->normal_prio; + if (target_node->min_priority < t->priority.prio) { + prio.sched_policy = SCHED_NORMAL; + prio.prio = target_node->min_priority; + } + binder_set_priority(current, prio); cmd = BR_TRANSACTION; } else { tr.target.ptr = 0; @@ -4565,7 +4662,14 @@ static int binder_open(struct inode *nodp, struct file *filp) get_task_struct(current->group_leader); proc->tsk = current->group_leader; INIT_LIST_HEAD(&proc->todo); - proc->default_priority = task_nice(current); + if (binder_supported_policy(current->policy)) { + proc->default_priority.sched_policy = current->policy; + proc->default_priority.prio = current->normal_prio; + } else { + proc->default_priority.sched_policy = SCHED_NORMAL; + proc->default_priority.prio = NICE_TO_PRIO(0); + } + binder_dev = container_of(filp->private_data, struct binder_device, miscdev); proc->context = &binder_dev->context; @@ -4858,13 +4962,14 @@ static void print_binder_transaction_ilocked(struct seq_file *m, spin_lock(&t->lock); to_proc = t->to_proc; seq_printf(m, - "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %d:%d r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, to_proc ? to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, - t->code, t->flags, t->priority, t->need_reply); + t->code, t->flags, t->priority.sched_policy, + t->priority.prio, t->need_reply); spin_unlock(&t->lock); if (proc != to_proc) { From adb685439e768b120a5bdae15f333967d1e239ed Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 7 Jun 2017 09:29:14 -0700 Subject: [PATCH 043/176] ANDROID: binder: add min sched_policy to node. This change adds flags to flat_binder_object.flags to allow indicating a minimum scheduling policy for the node. It also clarifies the valid value range for the priority bits in the flags. Internally, we use the priority map that the kernel uses, e.g. [0..99] for real-time policies and [100..139] for the SCHED_NORMAL/SCHED_BATCH policies. Bug: 34461621 Bug: 37293077 Change-Id: I12438deecb53df432da18c6fc77460768ae726d2 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 26 ++++++++++++++---- include/uapi/linux/android/binder.h | 41 ++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 85a4651361b6..838f4dbc9af5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -353,6 +353,8 @@ struct binder_error { * and by @lock) * @has_async_transaction: async transaction to node in progress * (protected by @lock) + * @sched_policy: minimum scheduling policy for node + * (invariant after initialized) * @accept_fds: file descriptor operations supported for node * (invariant after initialized) * @min_priority: minimum scheduling priority @@ -392,6 +394,7 @@ struct binder_node { /* * invariant after initialization */ + u8 sched_policy:2; u8 accept_fds:1; u8 min_priority; }; @@ -1208,6 +1211,7 @@ static struct binder_node *binder_init_node_ilocked( binder_uintptr_t ptr = fp ? fp->binder : 0; binder_uintptr_t cookie = fp ? fp->cookie : 0; __u32 flags = fp ? fp->flags : 0; + s8 priority; BUG_ON(!spin_is_locked(&proc->inner_lock)); while (*p) { @@ -1239,8 +1243,10 @@ static struct binder_node *binder_init_node_ilocked( node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; - node->min_priority = NICE_TO_PRIO( - flags & FLAT_BINDER_FLAG_PRIORITY_MASK); + priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->sched_policy = (flags & FLAT_BINDER_FLAG_PRIORITY_MASK) >> + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT; + node->min_priority = to_kernel_prio(node->sched_policy, priority); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); @@ -4052,8 +4058,17 @@ retry: tr.cookie = target_node->cookie; t->saved_priority.sched_policy = current->policy; t->saved_priority.prio = current->normal_prio; - if (target_node->min_priority < t->priority.prio) { - prio.sched_policy = SCHED_NORMAL; + if (target_node->min_priority < t->priority.prio || + (target_node->min_priority == t->priority.prio && + target_node->sched_policy == SCHED_FIFO)) { + /* + * In case the minimum priority on the node is + * higher (lower value), use that priority. If + * the priority is the same, but the node uses + * SCHED_FIFO, prefer SCHED_FIFO, since it can + * run unbounded, unlike SCHED_RR. + */ + prio.sched_policy = target_node->sched_policy; prio.prio = target_node->min_priority; } binder_set_priority(current, prio); @@ -5092,8 +5107,9 @@ static void print_binder_node_nilocked(struct seq_file *m, hlist_for_each_entry(ref, &node->refs, node_entry) count++; - seq_printf(m, " node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d tr %d", + seq_printf(m, " node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d", node->debug_id, (u64)node->ptr, (u64)node->cookie, + node->sched_policy, node->min_priority, node->has_strong_ref, node->has_weak_ref, node->local_strong_refs, node->local_weak_refs, node->internal_strong_refs, count, node->tmp_refs); diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 7668b5791c91..026558ac254d 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -37,9 +37,48 @@ enum { BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; -enum { +/** + * enum flat_binder_object_shifts: shift values for flat_binder_object_flags + * @FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT: shift for getting scheduler policy. + * + */ +enum flat_binder_object_shifts { + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT = 9, +}; + +/** + * enum flat_binder_object_flags - flags for use in flat_binder_object.flags + */ +enum flat_binder_object_flags { + /** + * @FLAT_BINDER_FLAG_PRIORITY_MASK: bit-mask for min scheduler priority + * + * These bits can be used to set the minimum scheduler priority + * at which transactions into this node should run. Valid values + * in these bits depend on the scheduler policy encoded in + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK. + * + * For SCHED_NORMAL/SCHED_BATCH, the valid range is between [-20..19] + * For SCHED_FIFO/SCHED_RR, the value can run between [1..99] + */ FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, + /** + * @FLAT_BINDER_FLAG_ACCEPTS_FDS: whether the node accepts fds. + */ FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, + /** + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK: bit-mask for scheduling policy + * + * These two bits can be used to set the min scheduling policy at which + * transactions on this node should run. These match the UAPI + * scheduler policy values, eg: + * 00b: SCHED_NORMAL + * 01b: SCHED_FIFO + * 10b: SCHED_RR + * 11b: SCHED_BATCH + */ + FLAT_BINDER_FLAG_SCHED_POLICY_MASK = + 3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT, }; #ifdef BINDER_IPC_32BIT From 7230f99185e5200b959ea3c4387bbea24e30bd9b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 7 Jun 2017 10:02:12 -0700 Subject: [PATCH 044/176] ANDROID: binder: improve priority inheritance. By raising the priority of a thread selected for a transaction *before* we wake it up. Delay restoring the priority when doing a reply until after we wake-up the process receiving the reply. Change-Id: Ic332e4e0ed7d2d3ca6ab1034da4629c9eadd3405 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 74 ++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 838f4dbc9af5..d046e7798b19 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -611,6 +611,7 @@ enum { * @is_dead: thread is dead and awaiting free * when outstanding transactions are cleaned up * (protected by @proc->inner_lock) + * @task: struct task_struct for this thread * * Bookkeeping structure for binder threads. */ @@ -629,6 +630,7 @@ struct binder_thread { struct binder_stats stats; atomic_t tmp_ref; bool is_dead; + struct task_struct *task; }; struct binder_transaction { @@ -647,6 +649,7 @@ struct binder_transaction { unsigned int flags; struct binder_priority priority; struct binder_priority saved_priority; + bool set_priority_called; kuid_t sender_euid; /** * @lock: protects @from, @to_proc, and @to_thread @@ -1161,6 +1164,38 @@ static void binder_set_priority(struct task_struct *task, set_user_nice(task, priority); } +static void binder_transaction_priority(struct task_struct *task, + struct binder_transaction *t, + struct binder_priority node_prio) +{ + struct binder_priority desired_prio; + + if (t->set_priority_called) + return; + + t->set_priority_called = true; + t->saved_priority.sched_policy = task->policy; + t->saved_priority.prio = task->normal_prio; + + desired_prio.prio = t->priority.prio; + desired_prio.sched_policy = t->priority.sched_policy; + + if (node_prio.prio < t->priority.prio || + (node_prio.prio == t->priority.prio && + node_prio.sched_policy == SCHED_FIFO)) { + /* + * In case the minimum priority on the node is + * higher (lower value), use that priority. If + * the priority is the same, but the node uses + * SCHED_FIFO, prefer SCHED_FIFO, since it can + * run unbounded, unlike SCHED_RR. + */ + desired_prio = node_prio; + } + + binder_set_priority(task, desired_prio); +} + static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, binder_uintptr_t ptr) { @@ -2635,11 +2670,15 @@ static bool binder_proc_transaction(struct binder_transaction *t, { struct list_head *target_list = NULL; struct binder_node *node = t->buffer->target_node; + struct binder_priority node_prio; bool oneway = !!(t->flags & TF_ONE_WAY); bool wakeup = true; BUG_ON(!node); binder_node_lock(node); + node_prio.prio = node->min_priority; + node_prio.sched_policy = node->sched_policy; + if (oneway) { BUG_ON(thread); if (node->has_async_transaction) { @@ -2661,12 +2700,14 @@ static bool binder_proc_transaction(struct binder_transaction *t, if (!thread && !target_list) thread = binder_select_thread_ilocked(proc); - if (thread) + if (thread) { target_list = &thread->todo; - else if (!target_list) + binder_transaction_priority(thread->task, t, node_prio); + } else if (!target_list) { target_list = &proc->todo; - else + } else { BUG_ON(target_list != &node->async_todo); + } binder_enqueue_work_ilocked(&t->work, target_list); @@ -2743,7 +2784,6 @@ static void binder_transaction(struct binder_proc *proc, } thread->transaction_stack = in_reply_to->to_parent; binder_inner_proc_unlock(proc); - binder_set_priority(current, in_reply_to->saved_priority); target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; @@ -3153,6 +3193,7 @@ static void binder_transaction(struct binder_proc *proc, binder_enqueue_work_ilocked(&t->work, &target_thread->todo); binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); + binder_set_priority(current, in_reply_to->saved_priority); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); @@ -3241,6 +3282,7 @@ err_no_context_mgr_node: BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { + binder_set_priority(current, in_reply_to->saved_priority); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; binder_enqueue_work(thread->proc, &thread->return_error.work, @@ -4052,26 +4094,13 @@ retry: BUG_ON(t->buffer == NULL); if (t->buffer->target_node) { struct binder_node *target_node = t->buffer->target_node; - struct binder_priority prio = t->priority; + struct binder_priority node_prio; tr.target.ptr = target_node->ptr; tr.cookie = target_node->cookie; - t->saved_priority.sched_policy = current->policy; - t->saved_priority.prio = current->normal_prio; - if (target_node->min_priority < t->priority.prio || - (target_node->min_priority == t->priority.prio && - target_node->sched_policy == SCHED_FIFO)) { - /* - * In case the minimum priority on the node is - * higher (lower value), use that priority. If - * the priority is the same, but the node uses - * SCHED_FIFO, prefer SCHED_FIFO, since it can - * run unbounded, unlike SCHED_RR. - */ - prio.sched_policy = target_node->sched_policy; - prio.prio = target_node->min_priority; - } - binder_set_priority(current, prio); + node_prio.sched_policy = target_node->sched_policy; + node_prio.prio = target_node->min_priority; + binder_transaction_priority(current, t, node_prio); cmd = BR_TRANSACTION; } else { tr.target.ptr = 0; @@ -4247,6 +4276,8 @@ static struct binder_thread *binder_get_thread_ilocked( binder_stats_created(BINDER_STAT_THREAD); thread->proc = proc; thread->pid = current->pid; + get_task_struct(current); + thread->task = current; atomic_set(&thread->tmp_ref, 0); init_waitqueue_head(&thread->wait); INIT_LIST_HEAD(&thread->todo); @@ -4297,6 +4328,7 @@ static void binder_free_thread(struct binder_thread *thread) BUG_ON(!list_empty(&thread->todo)); binder_stats_deleted(BINDER_STAT_THREAD); binder_proc_dec_tmpref(thread->proc); + put_task_struct(thread->task); kfree(thread); } From 39140a0f385c456a43c2dd25bc01503cc3841b97 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 23 Jun 2017 10:13:43 -0700 Subject: [PATCH 045/176] ANDROID: binder: add RT inheritance flag to node. Allows a binder node to specify whether it wants to inherit real-time scheduling policy from a caller. Change-Id: I375b6094bf441c19f19cba06d5a6be02cd07d714 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 22 +++++++++++++++++----- include/uapi/linux/android/binder.h | 8 ++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d046e7798b19..6b24f70dbff8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -359,6 +359,8 @@ struct binder_error { * (invariant after initialized) * @min_priority: minimum scheduling priority * (invariant after initialized) + * @inherit_rt: inherit RT scheduling policy from caller + * (invariant after initialized) * @async_todo: list of async work items * (protected by @proc->inner_lock) * @@ -395,6 +397,7 @@ struct binder_node { * invariant after initialization */ u8 sched_policy:2; + u8 inherit_rt:1; u8 accept_fds:1; u8 min_priority; }; @@ -1166,7 +1169,8 @@ static void binder_set_priority(struct task_struct *task, static void binder_transaction_priority(struct task_struct *task, struct binder_transaction *t, - struct binder_priority node_prio) + struct binder_priority node_prio, + bool inherit_rt) { struct binder_priority desired_prio; @@ -1177,8 +1181,13 @@ static void binder_transaction_priority(struct task_struct *task, t->saved_priority.sched_policy = task->policy; t->saved_priority.prio = task->normal_prio; - desired_prio.prio = t->priority.prio; - desired_prio.sched_policy = t->priority.sched_policy; + if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) { + desired_prio.prio = NICE_TO_PRIO(0); + desired_prio.sched_policy = SCHED_NORMAL; + } else { + desired_prio.prio = t->priority.prio; + desired_prio.sched_policy = t->priority.sched_policy; + } if (node_prio.prio < t->priority.prio || (node_prio.prio == t->priority.prio && @@ -1283,6 +1292,7 @@ static struct binder_node *binder_init_node_ilocked( FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT; node->min_priority = to_kernel_prio(node->sched_policy, priority); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); @@ -2702,7 +2712,8 @@ static bool binder_proc_transaction(struct binder_transaction *t, if (thread) { target_list = &thread->todo; - binder_transaction_priority(thread->task, t, node_prio); + binder_transaction_priority(thread->task, t, node_prio, + node->inherit_rt); } else if (!target_list) { target_list = &proc->todo; } else { @@ -4100,7 +4111,8 @@ retry: tr.cookie = target_node->cookie; node_prio.sched_policy = target_node->sched_policy; node_prio.prio = target_node->min_priority; - binder_transaction_priority(current, t, node_prio); + binder_transaction_priority(current, t, node_prio, + target_node->inherit_rt); cmd = BR_TRANSACTION; } else { tr.target.ptr = 0; diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 026558ac254d..70e252bf0be0 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -79,6 +79,14 @@ enum flat_binder_object_flags { */ FLAT_BINDER_FLAG_SCHED_POLICY_MASK = 3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT, + + /** + * @FLAT_BINDER_FLAG_INHERIT_RT: whether the node inherits RT policy + * + * Only when set, calls into this node will inherit a real-time + * scheduling policy from the caller (for synchronous transactions). + */ + FLAT_BINDER_FLAG_INHERIT_RT = 0x800, }; #ifdef BINDER_IPC_32BIT From ccb973e681d8f9fedb398b87a115b48ed96a6214 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Apr 2017 09:42:24 +0200 Subject: [PATCH 046/176] fs: add a VALID_OPEN_FLAGS commit 80f18379a7c350c011d30332658aa15fe49a8fa5 upstream. Add a central define for all valid open flags, and use it in the uniqueness check. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/fcntl.c | 14 ++++---------- include/linux/fcntl.h | 6 ++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index ee85cd4e136a..62376451bbce 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -740,16 +740,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( - O_RDONLY | O_WRONLY | O_RDWR | - O_CREAT | O_EXCL | O_NOCTTY | - O_TRUNC | O_APPEND | /* O_NONBLOCK | */ - __O_SYNC | O_DSYNC | FASYNC | - O_DIRECT | O_LARGEFILE | O_DIRECTORY | - O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH | __O_TMPFILE | - __FMODE_NONOTIFY - )); + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + HWEIGHT32( + (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | + __FMODE_EXEC | __FMODE_NONOTIFY)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 76ce329e656d..1b48d9c9a561 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -3,6 +3,12 @@ #include +/* list of all valid flags for the open/openat flags argument: */ +#define VALID_OPEN_FLAGS \ + (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ + O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \ + FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) #ifndef force_o_largefile #define force_o_largefile() (BITS_PER_LONG != 32) From 4043d5bca587eab2dfcca5e2da7e770d16b127c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Apr 2017 09:42:25 +0200 Subject: [PATCH 047/176] fs: completely ignore unknown open flags commit 629e014bb8349fcf7c1e4df19a842652ece1c945 upstream. Currently we just stash anything we got into file->f_flags, and the report it in fcntl(F_GETFD). This patch just clears out all unknown flags so that we don't pass them to the fs or report them. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/open.c b/fs/open.c index 157b9940dd73..fbc5c7b230b3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -885,6 +885,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; + /* + * Clear out all open flags we don't know about so that we don't report + * them in fcntl(F_GETFD) or similar interfaces. + */ + flags &= VALID_OPEN_FLAGS; + if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else From c01ace719ebe6353f0c96e56f6c75c22ad3f67b0 Mon Sep 17 00:00:00 2001 From: Adrian Salido Date: Tue, 25 Apr 2017 16:55:26 -0700 Subject: [PATCH 048/176] driver core: platform: fix race condition with driver_override commit 6265539776a0810b7ce6398c27866ddb9c6bd154 upstream. The driver_override implementation is susceptible to race condition when different threads are reading vs storing a different driver override. Add locking to avoid race condition. Fixes: 3d713e0e382e ("driver core: platform: add device binding path 'driver_override'") Cc: stable@vger.kernel.org Signed-off-by: Adrian Salido Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ba66330cea67..cb4ad6e98b28 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -807,7 +807,7 @@ static ssize_t driver_override_store(struct device *dev, const char *buf, size_t count) { struct platform_device *pdev = to_platform_device(dev); - char *driver_override, *old = pdev->driver_override, *cp; + char *driver_override, *old, *cp; if (count > PATH_MAX) return -EINVAL; @@ -820,12 +820,15 @@ static ssize_t driver_override_store(struct device *dev, if (cp) *cp = '\0'; + device_lock(dev); + old = pdev->driver_override; if (strlen(driver_override)) { pdev->driver_override = driver_override; } else { kfree(driver_override); pdev->driver_override = NULL; } + device_unlock(dev); kfree(old); @@ -836,8 +839,12 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *pdev = to_platform_device(dev); + ssize_t len; - return sprintf(buf, "%s\n", pdev->driver_override); + device_lock(dev); + len = sprintf(buf, "%s\n", pdev->driver_override); + device_unlock(dev); + return len; } static DEVICE_ATTR_RW(driver_override); From 1f97e170cfc54da377ff4e375a0f7226d06534c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 12 Apr 2016 13:30:45 +0200 Subject: [PATCH 049/176] bgmac: reset & enable Ethernet core before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b4dfd8e92956b396d3438212bc9a0be6267b8b34 upstream. This fixes Ethernet on D-Link DIR-885L with BCM47094 SoC. Felix reported similar fix was needed for his BCM4709 device (Buffalo WXR-1900DHP?). I tested this for regressions on BCM4706, BCM4708A0 and BCM47081A0. Cc: Felix Fietkau Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 70da30095b89..a5e4b4b93d1b 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1583,6 +1583,11 @@ static int bgmac_probe(struct bcma_device *core) dev_warn(&core->dev, "Using random MAC: %pM\n", mac); } + /* This (reset &) enable is not preset in specs or reference driver but + * Broadcom does it in arch PCI code when enabling fake PCI device. + */ + bcma_core_enable(core, 0); + /* Allocation and references */ net_dev = alloc_etherdev(sizeof(*bgmac)); if (!net_dev) From 78f20db86418dcda1e60f9ebe4be5086d00d2006 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 4 Jul 2017 10:45:43 +0200 Subject: [PATCH 050/176] mm: fix classzone_idx underflow in shrink_zones() [Not upstream as that would take 34+ patches] We've got reported a BUG in do_try_to_free_pages(): BUG: unable to handle kernel paging request at ffff8ffffff28990 IP: [] do_try_to_free_pages+0x140/0x490 PGD 0 Oops: 0000 [#1] SMP megaraid_sas sg scsi_mod efivarfs autofs4 Supported: No, Unsupported modules are loaded Workqueue: kacpi_hotplug acpi_hotplug_work_fn task: ffff88ffd0d4c540 ti: ffff88ffd0e48000 task.ti: ffff88ffd0e48000 RIP: 0010:[] [] do_try_to_free_pages+0x140/0x490 RSP: 0018:ffff88ffd0e4ba60 EFLAGS: 00010206 RAX: 000006fffffff900 RBX: 00000000ffffffff RCX: ffff88fffff29000 RDX: 000000ffffffff00 RSI: 0000000000000003 RDI: 00000000024200c8 RBP: 0000000001320122 R08: 0000000000000000 R09: ffff88ffd0e4bbac R10: 0000000000000000 R11: 0000000000000000 R12: ffff88ffd0e4bae0 R13: 0000000000000e00 R14: ffff88fffff2a500 R15: ffff88fffff2b300 FS: 0000000000000000(0000) GS:ffff88ffe6440000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8ffffff28990 CR3: 0000000001c0a000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: 00000002db570a80 024200c80000001e ffff88fffff2b300 0000000000000000 ffff88fffffd5700 ffff88ffd0d4c540 ffff88ffd0d4c540 ffffffff0000000c 0000000000000000 0000000000000040 00000000024200c8 ffff88ffd0e4bae0 Call Trace: [] try_to_free_pages+0xba/0x170 [] __alloc_pages_nodemask+0x53f/0xb20 [] alloc_pages_current+0x7f/0x100 [] migrate_pages+0x202/0x710 [] __offline_pages.constprop.23+0x4ba/0x790 [] memory_subsys_offline+0x43/0x70 [] device_offline+0x7d/0xa0 [] acpi_bus_offline+0xa5/0xef [] acpi_device_hotplug+0x21b/0x41f [] acpi_hotplug_work_fn+0x1a/0x23 [] process_one_work+0x14e/0x410 [] worker_thread+0x116/0x490 [] kthread+0xbd/0xe0 [] ret_from_fork+0x3f/0x70 This translates to the loop in shrink_zone(): classzone_idx = requested_highidx; while (!populated_zone(zone->zone_pgdat->node_zones + classzone_idx)) classzone_idx--; where no zone is populated, so classzone_idx becomes -1 (in RBX). Added debugging output reveals that we enter the function with sc->gfp_mask == GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE requested_highidx = gfp_zone(sc->gfp_mask) == 2 (ZONE_NORMAL) Inside the for loop, however: gfp_zone(sc->gfp_mask) == 3 (ZONE_MOVABLE) This means we have gone through this branch: if (buffer_heads_over_limit) sc->gfp_mask |= __GFP_HIGHMEM; This changes the gfp_zone() result, but requested_highidx remains unchanged. On nodes where the only populated zone is movable, the inner while loop will check only lower zones, which are not populated, and underflow classzone_idx. To sum up, the bug occurs in configurations with ZONE_MOVABLE (such as when booted with the movable_node parameter) and only in situations when buffer_heads_over_limit is true, and there's an allocation with __GFP_MOVABLE and without __GFP_HIGHMEM performing direct reclaim. This patch makes sure that classzone_idx starts with the correct zone. Mainline has been affected in versions 4.6 and 4.7, but the culprit commit has been also included in stable trees. In mainline, this has been fixed accidentally as part of 34-patch series (plus follow-up fixes) "Move LRU page reclaim from zones to nodes", which makes the mainline commit unsuitable for stable backport, unfortunately. Fixes: 7bf52fb891b6 ("mm: vmscan: reclaim highmem zone if buffer_heads is over limit") Obsoleted-by: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis") Debugged-by: Michal Hocko Signed-off-by: Vlastimil Babka Cc: Minchan Kim Cc: Johannes Weiner Acked-by: Mel Gorman Acked-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bfc5050cbd01..440c2df9be82 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2529,7 +2529,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) if (!populated_zone(zone)) continue; - classzone_idx = requested_highidx; + classzone_idx = gfp_zone(sc->gfp_mask); while (!populated_zone(zone->zone_pgdat->node_zones + classzone_idx)) classzone_idx--; From fe0bb2ac167b21843d74465e196e9f9ea5852d07 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 22 Jun 2017 11:24:42 +0200 Subject: [PATCH 051/176] tracing/kprobes: Allow to create probe with a module name starting with a digit commit 9e52b32567126fe146f198971364f68d3bc5233f upstream. Always try to parse an address, since kstrtoul() will safely fail when given a symbol as input. If that fails (which will be the case for a symbol), try to parse a symbol instead. This allows creating a probe such as: p:probe/vlan_gro_receive 8021q:vlan_gro_receive+0 Which is necessary for this command to work: perf probe -m 8021q -a vlan_gro_receive Link: http://lkml.kernel.org/r/fd72d666f45b114e2c5b9cf7e27b91de1ec966f1.1498122881.git.sd@queasysnail.net Fixes: 413d37d1e ("tracing: Add kprobe-based event tracer") Acked-by: Masami Hiramatsu Signed-off-by: Sabrina Dubroca Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 12ea4ea619ee..e9092a0247bf 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -659,30 +659,25 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Probe point is not specified.\n"); return -EINVAL; } - if (isdigit(argv[1][0])) { - if (is_return) { - pr_info("Return probe point must be a symbol.\n"); - return -EINVAL; - } - /* an address specified */ - ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); - if (ret) { - pr_info("Failed to parse address.\n"); - return ret; - } - } else { + + /* try to parse an address. if that fails, try to read the + * input as a symbol. */ + if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) { /* a symbol specified */ symbol = argv[1]; /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret) { - pr_info("Failed to parse symbol.\n"); + pr_info("Failed to parse either an address or a symbol.\n"); return ret; } if (offset && is_return) { pr_info("Return probe must be used without offset.\n"); return -EINVAL; } + } else if (is_return) { + pr_info("Return probe point must be a symbol.\n"); + return -EINVAL; } argc -= 2; argv += 2; From a080ec466c3412dcf58cff69d6c05d1e670adc0c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 6 Apr 2017 17:59:40 +0200 Subject: [PATCH 052/176] drm/virtio: don't leak bo on drm_gem_object_init failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 385aee965b4e4c36551c362a334378d2985b722a upstream. Reported-by: 李强 Signed-off-by: Gerd Hoffmann Link: http://patchwork.freedesktop.org/patch/msgid/20170406155941.458-1-kraxel@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index f300eba95bb1..1244cdf52859 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -81,8 +81,10 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, return -ENOMEM; size = roundup(size, PAGE_SIZE); ret = drm_gem_object_init(vgdev->ddev, &bo->gem_base, size); - if (ret != 0) + if (ret != 0) { + kfree(bo); return ret; + } bo->dumb = false; virtio_gpu_init_ttm_placement(bo, pinned); From c8130087ff4d3e21c9971708cec71ba42075d77d Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 17 May 2017 15:57:45 +0300 Subject: [PATCH 053/176] usb: dwc3: replace %p with %pK commit 04fb365c453e14ff9e8a28f1c46050d920a27a4a upstream. %p will leak kernel pointers, so let's not expose the information on dmesg and instead use %pK. %pK will only show the actual addresses if explicitly enabled under /proc/sys/kernel/kptr_restrict. Acked-by: Greg Kroah-Hartman Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-st.c | 2 +- drivers/usb/dwc3/gadget.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 5c0adb9c6fb2..81db2fa08cad 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -224,7 +224,7 @@ static int st_dwc3_probe(struct platform_device *pdev) dwc3_data->syscfg_reg_off = res->start; - dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n", + dev_vdbg(&pdev->dev, "glue-logic addr 0x%pK, syscfg-reg offset 0x%x\n", dwc3_data->glue_base, dwc3_data->syscfg_reg_off); dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown"); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ec7a50f98f57..d3bd1afd6302 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1219,7 +1219,7 @@ static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, goto out; } - if (WARN(req->dep != dep, "request %p belongs to '%s'\n", + if (WARN(req->dep != dep, "request %pK belongs to '%s'\n", request, req->dep->name)) { ret = -EINVAL; goto out; @@ -1264,7 +1264,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, dwc3_stop_active_transfer(dwc, dep->number, true); goto out1; } - dev_err(dwc->dev, "request %p was not queued to %s\n", + dev_err(dwc->dev, "request %pK was not queued to %s\n", request, ep->name); ret = -EINVAL; goto out0; @@ -1866,7 +1866,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, * would help. Lets hope that if this occurs, someone * fixes the root cause instead of looking away :) */ - dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n", + dev_err(dwc->dev, "%s's TRB (%pK) still owned by HW\n", dep->name, trb); count = trb->size & DWC3_TRB_SIZE_MASK; From 45c73d0c7a96890e6a4e45576d4c544e15bed6e6 Mon Sep 17 00:00:00 2001 From: Jeremie Rapin Date: Wed, 28 Jun 2017 18:23:25 +0200 Subject: [PATCH 054/176] USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick commit fd90f73a9925f248d696bde1cfc836d9fda5570d upstream. Added the USB serial device ID for the CEL ZigBee EM3588 radio stick. Signed-off-by: Jeremie Rapin Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 33cec50978b8..b0dc6da3d970 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -134,6 +134,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ + { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ From ab96add78bfe764a4994545993ac1c32978f8dc0 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Tue, 27 Jun 2017 13:08:51 -0400 Subject: [PATCH 055/176] Add USB quirk for HVR-950q to avoid intermittent device resets commit 6836796de4019944f4ba4c99a360e8250fd2e735 upstream. The USB core and sysfs will attempt to enumerate certain parameters which are unsupported by the au0828 - causing inconsistent behavior and sometimes causing the chip to reset. Avoid making these calls. This problem manifested as intermittent cases where the au8522 would be reset on analog video startup, in particular when starting up ALSA audio streaming in parallel - the sysfs entries created by snd-usb-audio on streaming startup would result in unsupported control messages being sent during tuning which would put the chip into an unknown state. Signed-off-by: Devin Heitmueller Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 96b21b0dac1e..3116edfcdc18 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -223,6 +223,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Blackmagic Design UltraStudio SDI */ { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + /* Hauppauge HVR-950q */ + { USB_DEVICE(0x2040, 0x7200), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, From c6bf62a36175fceeb4b83caf8791cd1085aeb9cc Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 22 May 2017 13:02:44 +0200 Subject: [PATCH 056/176] usb: usbip: set buffer pointers to NULL after free commit b3b51417d0af63fb9a06662dc292200aed9ea53f upstream. The usbip stack dynamically allocates the transfer_buffer and setup_packet of each urb that got generated by the tcp to usb stub code. As these pointers are always used only once we will set them to NULL after use. This is done likewise to the free_urb code in vudc_dev.c. This patch fixes double kfree situations where the usbip remote side added the URB_FREE_BUFFER. Signed-off-by: Michael Grzeschik Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 4 ++++ drivers/usb/usbip/stub_tx.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 44ab43fc4fcc..af10f7b131a4 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -262,7 +262,11 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) kmem_cache_free(stub_priv_cache, priv); kfree(urb->transfer_buffer); + urb->transfer_buffer = NULL; + kfree(urb->setup_packet); + urb->setup_packet = NULL; + usb_free_urb(urb); } } diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index dbcabc9dbe0d..021003c4de53 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -28,7 +28,11 @@ static void stub_free_priv_and_urb(struct stub_priv *priv) struct urb *urb = priv->urb; kfree(urb->setup_packet); + urb->setup_packet = NULL; + kfree(urb->transfer_buffer); + urb->transfer_buffer = NULL; + list_del(&priv->list); kmem_cache_free(stub_priv_cache, priv); usb_free_urb(urb); From 93f526ffe8ae1e870d511aa574e7b4670255124b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 13 Jun 2017 16:01:13 +1000 Subject: [PATCH 057/176] usb: Fix typo in the definition of Endpoint[out]Request commit 7cf916bd639bd26db7214f2205bccdb4b9306256 upstream. The current definition is wrong. This breaks my upcoming Aspeed virtual hub driver. Signed-off-by: Benjamin Herrenschmidt Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 3993b21f3d11..757c554408ce 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -560,9 +560,9 @@ extern void usb_ep0_reinit(struct usb_device *); ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) #define EndpointRequest \ - ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) + ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) #define EndpointOutRequest \ - ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) + ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) /* class requests from the USB 2.0 hub spec, table 11-15 */ /* GetBusState and SetHubDescriptor are optional, omitted */ From 2e42225b85ee59e3fea83895256386c1544adfa6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Feb 2017 10:27:37 +0000 Subject: [PATCH 058/176] mac80211_hwsim: Replace bogus hrtimer clockid commit 8fbcfeb8a9cc803464d6c166e7991913711c612c upstream. mac80211_hwsim initializes a hrtimer with clockid CLOCK_MONOTONIC_RAW. That's not supported. Use CLOCK_MONOTONIC instead. Signed-off-by: Thomas Gleixner Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d59769e858f4..019d7165a045 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2539,7 +2539,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, tasklet_hrtimer_init(&data->beacon_timer, mac80211_hwsim_beacon, - CLOCK_MONOTONIC_RAW, HRTIMER_MODE_ABS); + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); spin_lock_bh(&hwsim_radio_lock); list_add_tail(&data->list, &hwsim_radios); From e8505e6432c2eb7296d9c7724146d1ba1966a040 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 7 Apr 2017 23:51:06 +0800 Subject: [PATCH 059/176] sysctl: don't print negative flag for proc_douintvec commit 5380e5644afbba9e3d229c36771134976f05c91e upstream. I saw some very confusing sysctl output on my system: # cat /proc/sys/net/core/xfrm_aevent_rseqth -2 # cat /proc/sys/net/core/xfrm_aevent_etime -10 # cat /proc/sys/net/ipv4/tcp_notsent_lowat -4294967295 Because we forget to set the *negp flag in proc_douintvec, so it will become a garbage value. Since the value related to proc_douintvec is always an unsigned integer, so we can set *negp to false explictily to fix this issue. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Liping Zhang Cc: Subash Abhinov Kasiviswanathan Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 464a7864e4c5..5f3a51614ecf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2070,6 +2070,7 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, *valp = *lvalp; } else { unsigned int val = *valp; + *negp = false; *lvalp = (unsigned long)val; } return 0; From a2148222e3751fe2e02c71533dced8f030e94295 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 7 Apr 2017 23:51:07 +0800 Subject: [PATCH 060/176] sysctl: report EINVAL if value is larger than UINT_MAX for proc_douintvec commit 425fffd886bae3d127a08fa6a17f2e31e24ed7ff upstream. Currently, inputting the following command will succeed but actually the value will be truncated: # echo 0x12ffffffff > /proc/sys/net/ipv4/tcp_notsent_lowat This is not friendly to the user, so instead, we should report error when the value is larger than UINT_MAX. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Liping Zhang Cc: Subash Abhinov Kasiviswanathan Cc: Andrew Morton Cc: Eric W. Biederman Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5f3a51614ecf..002ec084124b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2067,6 +2067,8 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, if (write) { if (*negp) return -EINVAL; + if (*lvalp > UINT_MAX) + return -EINVAL; *valp = *lvalp; } else { unsigned int val = *valp; From 5d030f597f4728776b98337bd639fe4638386b78 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 30 Mar 2017 23:20:48 +0300 Subject: [PATCH 061/176] pinctrl: sh-pfc: r8a7791: Fix SCIF2 pinmux data commit 58439280f84e6b39fd7d61f25ab30489c1aaf0a9 upstream. PINMUX_IPSR_MSEL() macro invocation for the TX2 signal has apparently wrong 1st argument -- most probably a result of cut&paste programming... Fixes: 508845196238 ("pinctrl: sh-pfc: r8a7791 PFC support") Signed-off-by: Sergei Shtylyov Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/pfc-r8a7791.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index 87a4f44147c1..42ffa8708abc 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -1102,7 +1102,7 @@ static const u16 pinmux_data[] = { PINMUX_IPSR_MSEL(IP6_5_3, FMIN_E, SEL_FM_4), PINMUX_IPSR_DATA(IP6_7_6, AUDIO_CLKOUT), PINMUX_IPSR_MSEL(IP6_7_6, MSIOF1_SS1_B, SEL_SOF1_1), - PINMUX_IPSR_MSEL(IP6_5_3, TX2, SEL_SCIF2_0), + PINMUX_IPSR_MSEL(IP6_7_6, TX2, SEL_SCIF2_0), PINMUX_IPSR_MSEL(IP6_7_6, SCIFA2_TXD, SEL_SCIFA2_0), PINMUX_IPSR_DATA(IP6_9_8, IRQ0), PINMUX_IPSR_MSEL(IP6_9_8, SCIFB1_RXD_D, SEL_SCIFB1_3), From 3c4720871bdd48645d831cdc5619a7282cfaa0de Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 1 Apr 2017 15:59:21 +0200 Subject: [PATCH 062/176] pinctrl: meson: meson8b: fix the NAND DQS pins commit 97ba26b8a9343008504d4e3a87d212bc07b05212 upstream. The nand_groups table uses different names for the NAND DQS pins than the GROUP() definition in meson8b_cbus_groups (nand_dqs_0 vs nand_dqs0). This prevents using the NAND DQS pins in the devicetree. Fix this by ensuring that the GROUP() definition and the meson8b_cbus_groups use the same name for these pins. Fixes: 0fefcb6876d0 ("pinctrl: Add support for Meson8b") Signed-off-by: Martin Blumenstingl Acked-by: Kevin Hilman Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/meson/pinctrl-meson8b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index 9677807db364..b505b87661f8 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -732,8 +732,8 @@ static const char * const sdxc_c_groups[] = { static const char * const nand_groups[] = { "nand_io", "nand_io_ce0", "nand_io_ce1", "nand_io_rb0", "nand_ale", "nand_cle", - "nand_wen_clk", "nand_ren_clk", "nand_dqs0", - "nand_dqs1" + "nand_wen_clk", "nand_ren_clk", "nand_dqs_0", + "nand_dqs_1" }; static const char * const nor_groups[] = { From 79565741fce0e70e5b208125adf331f26839ad37 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 May 2017 14:25:49 +0800 Subject: [PATCH 063/176] pinctrl: sunxi: Fix SPDIF function name for A83T commit 7903d4f5e1dec53963cba9b1bc472a76a3532e07 upstream. We use well known standard names for functions that have name, such as I2C, SPI, SPDIF, etc.. Fix the function name of SPDIF, which was named OWA (One Wire Audio) based on Allwinner datasheets. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 90b973e15982..a7c81e988656 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -394,7 +394,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x3, "owa")), /* DOUT */ + SUNXI_FUNCTION(0x3, "spdif")), /* DOUT */ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 19), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out")), From 45424528a184f8d3b698268db4bb58eaa52cbd6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 18 May 2017 11:23:55 +0200 Subject: [PATCH 064/176] pinctrl: mxs: atomically switch mux and drive strength config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit da6c2addf66d7ff7d0b090d6267d4292f951e4e6 upstream. To set the mux mode of a pin two bits must be set. Up to now this is implemented using the following idiom: writel(mask, reg + CLR); writel(value, reg + SET); . This however results in the mux mode being 0 between the two writes. On my machine there is an IC's reset pin connected to LCD_D20. The bootloader configures this pin as GPIO output-high (i.e. not holding the IC in reset). When Linux reconfigures the pin to GPIO the short time LCD_D20 is muxed as LCD_D20 instead of GPIO_1_20 is enough to confuse the connected IC. The same problem is present for the pin's drive strength setting which is reset to low drive strength before using the right value. So instead of relying on the hardware to modify the register setting using two writes implement the bit toggling using read-modify-write. Fixes: 17723111e64f ("pinctrl: add pinctrl-mxs support") Signed-off-by: Uwe Kleine-König Acked-by: Shawn Guo Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-mxs.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-mxs.c b/drivers/pinctrl/freescale/pinctrl-mxs.c index 6bbda6b4ab50..5da9c95dccb7 100644 --- a/drivers/pinctrl/freescale/pinctrl-mxs.c +++ b/drivers/pinctrl/freescale/pinctrl-mxs.c @@ -195,6 +195,16 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } +static void mxs_pinctrl_rmwl(u32 value, u32 mask, u8 shift, void __iomem *reg) +{ + u32 tmp; + + tmp = readl(reg); + tmp &= ~(mask << shift); + tmp |= value << shift; + writel(tmp, reg); +} + static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, unsigned group) { @@ -212,8 +222,7 @@ static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, reg += bank * 0x20 + pin / 16 * 0x10; shift = pin % 16 * 2; - writel(0x3 << shift, reg + CLR); - writel(g->muxsel[i] << shift, reg + SET); + mxs_pinctrl_rmwl(g->muxsel[i], 0x3, shift, reg); } return 0; @@ -280,8 +289,7 @@ static int mxs_pinconf_group_set(struct pinctrl_dev *pctldev, /* mA */ if (config & MA_PRESENT) { shift = pin % 8 * 4; - writel(0x3 << shift, reg + CLR); - writel(ma << shift, reg + SET); + mxs_pinctrl_rmwl(ma, 0x3, shift, reg); } /* vol */ From 174ddbcc69e239481ba43799a2e6118dbbbf2016 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 9 Mar 2017 19:20:48 +0100 Subject: [PATCH 065/176] pinctrl: sh-pfc: Update info pointer after SoC-specific init commit 3091ae775fae17084013021d01513bc1ad274e6a upstream. Update the sh_pfc_soc_info pointer after calling the SoC-specific initialization function, as it may have been updated to e.g. handle different SoC revisions. This makes sure the correct subdriver name is printed later. Fixes: 0c151062f32c9db8 ("sh-pfc: Add support for SoC-specific initialization") Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index 2b0d70217bbd..699efb1a8c45 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -543,6 +543,9 @@ static int sh_pfc_probe(struct platform_device *pdev) ret = info->ops->init(pfc); if (ret < 0) return ret; + + /* .init() may have overridden pfc->info */ + info = pfc->info; } /* Enable dummy states for those platforms without pinctrl support */ From 1a04853de06986936ca6853dbfbe9152e928a0a0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 12 Jun 2017 16:30:16 +0200 Subject: [PATCH 066/176] USB: serial: option: add two Longcheer device ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8fb060da715ad10fe956d7c0077b2fb0c12bb9d7 upstream. Add two Longcheer device-id entries which specifically enables a Telewell TW-3G HSPA+ branded modem (0x9801). Reported-by: Teemu Likonen Reported-by: Bjørn Mork Reported-by: Lars Melin Tested-by: Teemu Likonen Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3bf61acfc26b..ebe51f11105d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1877,6 +1877,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&four_g_w100_blacklist }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, + { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, From ac12d6abed6cf495908c896b1c4357a906cc7303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 13 Jun 2017 19:11:42 +0200 Subject: [PATCH 067/176] USB: serial: qcserial: new Sierra Wireless EM7305 device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 996fab55d864ed604158f71724ff52db1c2454a3 upstream. A new Sierra Wireless EM7305 device ID used in a Toshiba laptop. Reported-by: Petr Kloc Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index fd509ed6cf70..652b4334b26d 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -158,6 +158,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9063)}, /* Sierra Wireless EM7305 */ {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */ {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ From e952c291df099fe71d3d1737c68063734907f418 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 7 Jul 2017 13:22:05 -0500 Subject: [PATCH 068/176] gfs2: Fix glock rhashtable rcu bug commit 961ae1d83d055a4b9ebbfb4cc8ca62ec1a7a3b74 upstream. Before commit 88ffbf3e03 "GFS2: Use resizable hash table for glocks", glocks were freed via call_rcu to allow reading the glock hashtable locklessly using rcu. This was then changed to free glocks immediately, which made reading the glock hashtable unsafe. Bring back the original code for freeing glocks via call_rcu. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 11 +++++++++-- fs/gfs2/incore.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9cd8c92b953d..070901e76653 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = { static struct rhashtable gl_hash_table; -void gfs2_glock_free(struct gfs2_glock *gl) +static void gfs2_glock_dealloc(struct rcu_head *rcu) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); if (gl->gl_ops->go_flags & GLOF_ASPACE) { kmem_cache_free(gfs2_glock_aspace_cachep, gl); @@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl) kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(gfs2_glock_cachep, gl); } +} + +void gfs2_glock_free(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index be519416c112..4a9077ec9313 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -367,6 +367,7 @@ struct gfs2_glock { loff_t end; } gl_vm; }; + struct rcu_head gl_rcu; struct rhash_head gl_node; }; From 0214a8ca61e2e26c488ec6c0a5f4de91063681a2 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Thu, 15 Dec 2016 13:45:13 +0100 Subject: [PATCH 069/176] x86/tools: Fix gcc-7 warning in relocs.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7ebb916782949621ff6819acf373a06902df7679 upstream. gcc-7 warns: In file included from arch/x86/tools/relocs_64.c:17:0: arch/x86/tools/relocs.c: In function ‘process_64’: arch/x86/tools/relocs.c:953:2: warning: argument 1 null where non-null expected [-Wnonnull] qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from arch/x86/tools/relocs.h:6:0, from arch/x86/tools/relocs_64.c:1: /usr/include/stdlib.h:741:13: note: in a call to function ‘qsort’ declared here extern void qsort This happens because relocs16 is not used for ELF_BITS == 64, so there is no point in trying to sort it. Make the sort_relocs(&relocs16) call 32bit only. Signed-off-by: Markus Trippelsdorf Link: http://lkml.kernel.org/r/20161215124513.GA289@x4 Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/tools/relocs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 0c2fae8d929d..73eb7fd4aec4 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -992,11 +992,12 @@ static void emit_relocs(int as_text, int use_real_mode) die("Segment relocations found but --realmode not specified\n"); /* Order the relocations for more efficient processing */ - sort_relocs(&relocs16); sort_relocs(&relocs32); #if ELF_BITS == 64 sort_relocs(&relocs32neg); sort_relocs(&relocs64); +#else + sort_relocs(&relocs16); #endif /* Print the relocations */ From e47e717666408553b5b057a515b33ba6c4a3d2ca Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 29 Jun 2017 15:55:58 +0200 Subject: [PATCH 070/176] x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream. According to the Intel datasheet, the REP MOVSB instruction exposes a pretty heavy setup cost (50 ticks), which hurts short string copy operations. This change tries to avoid this cost by calling the explicit loop available in the unrolled code for strings shorter than 64 bytes. The 64 bytes cutoff value is arbitrary from the code logic point of view - it has been selected based on measurements, as the largest value that still ensures a measurable gain. Micro benchmarks of the __copy_from_user() function with lengths in the [0-63] range show this performance gain (shorter the string, larger the gain): - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4 - in the [72%-9%] range on Intel Core i7-4810MQ Other tested CPUs - namely Intel Atom S1260 and AMD Opteron 8216 - show no difference, because they do not expose the ERMS feature bit. Signed-off-by: Paolo Abeni Acked-by: Linus Torvalds Cc: Alan Cox Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hannes Frederic Sowa Cc: Josh Poimboeuf Cc: Kees Cook Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com [ Clarified the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mel Gorman --- arch/x86/lib/copy_user_64.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 27f89c79a44b..423644c230e7 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -80,7 +80,7 @@ ENTRY(copy_user_generic_unrolled) movl %edx,%ecx andl $63,%edx shrl $6,%ecx - jz 17f + jz .L_copy_short_string 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 3: movq 2*8(%rsi),%r10 @@ -101,7 +101,8 @@ ENTRY(copy_user_generic_unrolled) leaq 64(%rdi),%rdi decl %ecx jnz 1b -17: movl %edx,%ecx +.L_copy_short_string: + movl %edx,%ecx andl $7,%edx shrl $3,%ecx jz 20f @@ -215,6 +216,8 @@ ENDPROC(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) ASM_STAC + cmpl $64,%edx + jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ movl %edx,%ecx 1: rep movsb From 483b1c69655d61a4c15648cf96011cfb20aa2000 Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Thu, 15 Dec 2016 11:23:22 +0200 Subject: [PATCH 071/176] ath10k: override CE5 config for QCA9377 commit b08b5b53a1ed2bd7a883f8fd29232c8f03604671 upstream. Similarly to QCA6174, QCA9377 requires the CE5 configuration to be available for other feature. Use the ath10k_pci_override_ce_config() for it as well. This is required for TF2.0 firmware. Previous FW revisions were working fine without this patch. Fixes: a70587b3389a ("ath10k: configure copy engine 5 for HTT messages") Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 930785a724e1..907fd60c4241 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3050,7 +3050,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, goto err_core_destroy; } - if (QCA_REV_6174(ar)) + if (QCA_REV_6174(ar) || QCA_REV_9377(ar)) ath10k_pci_override_ce_config(ar); ret = ath10k_pci_alloc_pipes(ar); From 92e90c20dc38fe01e817b537c59182ea1ec03b16 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Feb 2017 17:17:52 +0000 Subject: [PATCH 072/176] KEYS: Fix an error code in request_master_key() commit 57cb17e764ba0aaa169d07796acce54ccfbc6cae upstream. This function has two callers and neither are able to handle a NULL return. Really, -EINVAL is the correct thing return here anyway. This fixes some static checker warnings like: security/keys/encrypted-keys/encrypted.c:709 encrypted_key_decrypt() error: uninitialized symbol 'master_key'. Fixes: 7e70cb497850 ("keys: add new key-type encrypted") Signed-off-by: Dan Carpenter Acked-by: Mimi Zohar Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/encrypted-keys/encrypted.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 696ccfa08d10..31898856682e 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -428,7 +428,7 @@ static int init_blkcipher_desc(struct blkcipher_desc *desc, const u8 *key, static struct key *request_master_key(struct encrypted_key_payload *epayload, const u8 **master_key, size_t *master_keylen) { - struct key *mkey = NULL; + struct key *mkey = ERR_PTR(-EINVAL); if (!strncmp(epayload->master_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { From 5fd2bba08c17c30d74b26a8b9f6ce67026b254b4 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Tue, 27 Jun 2017 15:09:13 +0300 Subject: [PATCH 073/176] RDMA/uverbs: Check port number supplied by user verbs cmds commit 5ecce4c9b17bed4dc9cb58bfb10447307569b77b upstream. The ib_uverbs_create_ah() ind ib_uverbs_modify_qp() calls receive the port number from user input as part of its attributes and assumes it is valid. Down on the stack, that parameter is used to access kernel data structures. If the value is invalid, the kernel accesses memory it should not. To prevent this, verify the port number before using it. BUG: KASAN: use-after-free in ib_uverbs_create_ah+0x6d5/0x7b0 Read of size 4 at addr ffff880018d67ab8 by task syz-executor/313 BUG: KASAN: slab-out-of-bounds in modify_qp.isra.4+0x19d0/0x1ef0 Read of size 4 at addr ffff88006c40ec58 by task syz-executor/819 Fixes: 67cdb40ca444 ("[IB] uverbs: Implement more commands") Cc: Yevgeny Kliteynik Cc: Tziporet Koren Cc: Alex Polak Signed-off-by: Boris Pismenny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1c02deab068f..9eca4b41fa0a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2287,6 +2287,10 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (cmd.port_num < rdma_start_port(ib_dev) || + cmd.port_num > rdma_end_port(ib_dev)) + return -EINVAL; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); @@ -2827,6 +2831,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (cmd.attr.port_num < rdma_start_port(ib_dev) || + cmd.attr.port_num > rdma_end_port(ib_dev)) + return -EINVAL; + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; From 034e10b4f8348c2c267e491d1e2872023c35f310 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 9 Jul 2017 13:19:55 -0700 Subject: [PATCH 074/176] mqueue: fix a use-after-free in sys_mq_notify() commit f991af3daabaecff34684fd51fac80319d1baad1 upstream. The retry logic for netlink_attachskb() inside sys_mq_notify() is nasty and vulnerable: 1) The sock refcnt is already released when retry is needed 2) The fd is controllable by user-space because we already release the file refcnt so we when retry but the fd has been just closed by user-space during this small window, we end up calling netlink_detachskb() on the error path which releases the sock again, later when the user-space closes this socket a use-after-free could be triggered. Setting 'sock' to NULL here should be sufficient to fix it. Reported-by: GeneBlue Signed-off-by: Cong Wang Cc: Andrew Morton Cc: Manfred Spraul Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/mqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 161a1807e6ef..5e24eb0ab5dd 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1251,8 +1251,10 @@ retry: timeo = MAX_SCHEDULE_TIMEOUT; ret = netlink_attachskb(sock, nc, &timeo, NULL); - if (ret == 1) + if (ret == 1) { + sock = NULL; goto retry; + } if (ret) { sock = NULL; nc = NULL; From e1b7b00a1bd575d37919780a2694622019c028f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Feb 2017 17:01:46 -0300 Subject: [PATCH 075/176] tools include: Add a __fallthrough statement commit b5bf1733d6a391c4e90ea8f8468d83023be74a2a upstream. For cases where implicit fall through case labels are intended, to let us inform that to gcc >= 7: CC /tmp/build/perf/util/string.o util/string.c: In function 'perf_atoll': util/string.c:22:7: error: this statement may fall through [-Werror=implicit-fallthrough=] if (*p) ^ util/string.c:24:3: note: here case '\0': ^~~~ So we introduce: #define __fallthrough __attribute__ ((fallthrough)) And use it in such cases. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: William Cohen Link: http://lkml.kernel.org/n/tip-qnpig0xfop4hwv6k4mv1wts5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/compiler.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index fa7208a32d76..8a679b21f0c4 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -115,4 +115,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define WRITE_ONCE(x, val) \ ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#ifndef __fallthrough +# if defined(__GNUC__) && __GNUC__ >= 7 +# define __fallthrough __attribute__ ((fallthrough)) +# else +# define __fallthrough +# endif +#endif + #endif /* _TOOLS_LINUX_COMPILER_H */ From 57a4c2504b77b7223934a78e67a7de18fb2c1fc6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Feb 2017 17:01:46 -0300 Subject: [PATCH 076/176] tools string: Use __fallthrough in perf_atoll() commit 94bdd5edb34e472980d1e18b4600d6fb92bd6b0a upstream. The implicit fall through case label here is intended, so let us inform that to gcc >= 7: CC /tmp/build/perf/util/string.o util/string.c: In function 'perf_atoll': util/string.c:22:7: error: this statement may fall through [-Werror=implicit-fallthrough=] if (*p) ^ util/string.c:24:3: note: here case '\0': ^~~~ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0ophb30v9apkk6o95el0rqlq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/string.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index fc8781de62db..accb7ece1d3c 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -21,6 +21,8 @@ s64 perf_atoll(const char *str) case 'b': case 'B': if (*p) goto out_err; + + __fallthrough; case '\0': return length; default: From 1ca0a40980050023aca49c96764bb30de36111db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Feb 2017 17:01:46 -0300 Subject: [PATCH 077/176] tools strfilter: Use __fallthrough commit d64b721d27aef3fbeb16ecda9dd22ee34818ff70 upstream. The implicit fall through case label here is intended, so let us inform that to gcc >= 7: util/strfilter.c: In function 'strfilter_node__sprint': util/strfilter.c:270:6: error: this statement may fall through [-Werror=implicit-fallthrough=] if (len < 0) ^ util/strfilter.c:272:2: note: here case '!': ^~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-z2dpywg7u8fim000hjfbpyfm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/strfilter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index bcae659b6546..efb53772e0ec 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -269,6 +269,7 @@ static int strfilter_node__sprint(struct strfilter_node *node, char *buf) len = strfilter_node__sprint_pt(node->l, buf); if (len < 0) return len; + __fallthrough; case '!': if (buf) { *(buf + len++) = *node->p; From 0586f5cd419596674ec015174c8b1055008e4243 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Feb 2017 17:01:46 -0300 Subject: [PATCH 078/176] perf top: Use __fallthrough commit 7b0214b702ad8e124e039a317beeebb3f020d125 upstream. The implicit fall through case label here is intended, so let us inform that to gcc >= 7: CC /tmp/build/perf/builtin-top.o builtin-top.c: In function 'display_thread': builtin-top.c:644:7: error: this statement may fall through [-Werror=implicit-fallthrough=] if (errno == EINTR) ^ builtin-top.c:647:3: note: here default: ^~~~~~~ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-lmcfnnyx9ic0m6j0aud98p4e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7e2e72e6d9d1..4a8a02c302d2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -636,7 +636,7 @@ repeat: case -1: if (errno == EINTR) continue; - /* Fall trhu */ + __fallthrough; default: c = getc(stdin); tcsetattr(0, TCSAFLUSH, &save); From e5a9078a8e0e50681210b08ecfd67b53ca0262f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Feb 2017 15:22:22 -0300 Subject: [PATCH 079/176] perf intel-pt: Use __fallthrough commit 7ea6856d6f5629d742edc23b8b76e6263371ef45 upstream. To address new warnings emmited by gcc 7, e.g.:: CC /tmp/build/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.o CC /tmp/build/perf/tests/parse-events.o util/intel-pt-decoder/intel-pt-pkt-decoder.c: In function 'intel_pt_pkt_desc': util/intel-pt-decoder/intel-pt-pkt-decoder.c:499:6: error: this statement may fall through [-Werror=implicit-fallthrough=] if (!(packet->count)) ^ util/intel-pt-decoder/intel-pt-pkt-decoder.c:501:2: note: here case INTEL_PT_CYC: ^~~~ CC /tmp/build/perf/util/intel-pt-decoder/intel-pt-decoder.o cc1: all warnings being treated as errors Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-mf0hw789pu9x855us5l32c83@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 5 +++++ tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 71df7acf8643..933a509a90f8 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "../cache.h" #include "../util.h" @@ -1708,6 +1709,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; + __fallthrough; case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: intel_pt_log("ERROR: Unexpected packet\n"); @@ -1762,6 +1764,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; intel_pt_clear_tx_flags(decoder); + __fallthrough; + case INTEL_PT_TNT: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); @@ -1802,6 +1806,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; + __fallthrough; case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index b1257c816310..9b2fce25162b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "intel-pt-pkt-decoder.h" @@ -488,6 +489,7 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, case INTEL_PT_FUP: if (!(packet->count)) return snprintf(buf, buf_len, "%s no ip", name); + __fallthrough; case INTEL_PT_CYC: case INTEL_PT_VMCS: case INTEL_PT_MTC: From c9cf7ce0bbc4e9184ac656c5fe3be99ae16655df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Feb 2017 17:01:46 -0300 Subject: [PATCH 080/176] perf thread_map: Correctly size buffer used with dirent->dt_name commit bdf23a9a190d7ecea092fd5c4aabb7d4bd0a9980 upstream. The size of dirent->dt_name is NAME_MAX + 1, but the size for the 'path' buffer is hard coded at 256, which may truncate it because we also prepend "/proc/", so that all that into account and thank gcc 7 for this warning: /git/linux/tools/perf/util/thread_map.c: In function 'thread_map__new_by_uid': /git/linux/tools/perf/util/thread_map.c:119:39: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 250 [-Werror=format-truncation=] snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); ^~ In file included from /usr/include/stdio.h:939:0, from /git/linux/tools/perf/util/thread_map.c:5: /usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' output between 7 and 262 bytes into a destination of size 256 return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __bos (__s), __fmt, __va_arg_pack ()); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-csy0r8zrvz5efccgd4k12c82@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/thread_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 6ec3c5ca438f..fd8d4384454c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) { DIR *proc; int max_threads = 32, items, i; - char path[256]; + char path[NAME_MAX + 1 + 6]; struct dirent dirent, *next, **namelist = NULL; struct thread_map *threads = thread_map__alloc(max_threads); From 071ca0bcf337e4c058bc9844a4aa6fe1b67b612b Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Sun, 12 Feb 2017 10:46:55 +0800 Subject: [PATCH 081/176] perf scripting perl: Fix compile error with some perl5 versions commit d7dd112ea5cacf91ae72c0714c3b911eb6016fea upstream. Fix below compile error: CC util/scripting-engines/trace-event-perl.o In file included from /usr/lib/perl5/5.22.2/i686-linux/CORE/perl.h:5673:0, from util/scripting-engines/trace-event-perl.c:31: /usr/lib/perl5/5.22.2/i686-linux/CORE/inline.h: In function 'S__is_utf8_char_slow': /usr/lib/perl5/5.22.2/i686-linux/CORE/inline.h:270:5: error: nested extern declaration of 'Perl___notused' [-Werror=nested-externs] dTHX; /* The function called below requires thread context */ ^ cc1: all warnings being treated as errors After digging perl5 repository, I find out that we will meet this compile error with perl from v5.21.1 to v5.25.4 Signed-off-by: Wang YanQing Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170212024655.GA15997@udknight Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/scripting-engines/Build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 6516e220c247..82d28c67e0f3 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,6 +1,6 @@ libperf-$(CONFIG_LIBPERL) += trace-event-perl.o libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o -CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow From fb3c230b7ec28fc540e210e16e0b40bcff441b16 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Feb 2017 14:48:46 -0300 Subject: [PATCH 082/176] perf tests: Avoid possible truncation with dirent->d_name + snprintf commit 2e2bbc039fad9eabad6c4c1a473c8b2554cdd2d4 upstream. Addressing a few cases spotted by a new warning in gcc 7: tests/parse-events.c: In function 'test_pmu_events': tests/parse-events.c:1790:39: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 90 [-Werror=format-truncation=] snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); ^~ In file included from /usr/include/stdio.h:939:0, from /git/linux/tools/perf/util/map.h:9, from /git/linux/tools/perf/util/symbol.h:7, from /git/linux/tools/perf/util/evsel.h:10, from tests/parse-events.c:3: /usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' output between 13 and 268 bytes into a destination of size 100 return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __bos (__s), __fmt, __va_arg_pack ()); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ tests/parse-events.c:1798:29: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 100 [-Werror=format-truncation=] snprintf(name, MAX_NAME, "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 945aea220bb8 ("perf tests: Move test objects into 'tests' directory") Link: http://lkml.kernel.org/n/tip-ty4q2p8zp1dp3mskvubxskm5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/parse-events.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 636d7b42d844..54af2f2e2ee4 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1727,15 +1727,14 @@ static int test_pmu_events(void) } while (!ret && (ent = readdir(dir))) { -#define MAX_NAME 100 struct evlist_test e; - char name[MAX_NAME]; + char name[2 * NAME_MAX + 1 + 12 + 3]; if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) continue; - snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); + snprintf(name, sizeof(name), "cpu/event=%s/u", ent->d_name); e.name = name; e.check = test__checkevent_pmu_events; @@ -1743,11 +1742,10 @@ static int test_pmu_events(void) ret = test_event(&e); if (ret) break; - snprintf(name, MAX_NAME, "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); + snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); e.name = name; e.check = test__checkevent_pmu_events_mix; ret = test_event(&e); -#undef MAX_NAME } closedir(dir); From 144eb3e8e4d938a3c0f341a7b10b035b3d63595c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Feb 2017 14:39:42 -0300 Subject: [PATCH 083/176] perf bench numa: Avoid possible truncation when using snprintf() commit 3aff8ba0a4c9c9191bb788171a1c54778e1246a2 upstream. Addressing this warning from gcc 7: CC /tmp/build/perf/bench/numa.o bench/numa.c: In function '__bench_numa': bench/numa.c:1582:42: error: '%d' directive output may be truncated writing between 1 and 10 bytes into a region of size between 8 and 17 [-Werror=format-truncation=] snprintf(tname, 32, "process%d:thread%d", p, t); ^~ bench/numa.c:1582:25: note: directive argument in the range [0, 2147483647] snprintf(tname, 32, "process%d:thread%d", p, t); ^~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/stdio.h:939:0, from bench/../util/util.h:47, from bench/../builtin.h:4, from bench/numa.c:11: /usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' output between 17 and 35 bytes into a destination of size 32 return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __bos (__s), __fmt, __va_arg_pack ()); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Petr Holasek Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-twa37vsfqcie5gwpqwnjuuz9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/bench/numa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 492df2752a2d..b4eb5b679081 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1570,13 +1570,13 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[32]; + char tname[14 + 2 * 10 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { - memset(tname, 0, 32); + memset(tname, 0, sizeof(tname)); td = g->threads + p*g->p.nr_threads + t; - snprintf(tname, 32, "process%d:thread%d", p, t); + snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); print_res(tname, td->speed_gbs, "GB/sec", "thread-speed", "GB/sec/thread speed"); print_res(tname, td->system_time_ns / 1e9, From 055ce585a864a999f6c22c2af75c424122c2ee1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:32:15 -0300 Subject: [PATCH 084/176] perf tools: Use readdir() instead of deprecated readdir_r() commit 7093b4c963cc4e344e490c774924a180602a7092 upstream. The readdir() function is thread safe as long as just one thread uses a DIR, which is the case when synthesizing events for pre-existing threads by traversing /proc, so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. CC /tmp/build/perf/util/event.o util/event.c: In function '__event__synthesize_thread': util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations] while (!readdir_r(tasks, &dirent, &next) && next) { ^~~~~ In file included from /usr/include/features.h:368:0, from /usr/include/stdint.h:25, from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9, from /git/linux/tools/include/linux/types.h:6, from util/event.c:1: /usr/include/dirent.h:189:12: note: declared here Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/event.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 956187bf1a85..26cba64345e3 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -416,7 +416,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, { char filename[PATH_MAX]; DIR *tasks; - struct dirent dirent, *next; + struct dirent *dirent; pid_t tgid, ppid; int rc = 0; @@ -445,11 +445,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, return 0; } - while (!readdir_r(tasks, &dirent, &next) && next) { + while ((dirent = readdir(tasks)) != NULL) { char *end; pid_t _pid; - _pid = strtol(dirent.d_name, &end, 10); + _pid = strtol(dirent->d_name, &end, 10); if (*end) continue; @@ -558,7 +558,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, { DIR *proc; char proc_path[PATH_MAX]; - struct dirent dirent, *next; + struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; @@ -583,9 +583,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (proc == NULL) goto out_free_fork; - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; From 56e75cee2af34900e60656a5b727640bbb5135f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:31:24 -0300 Subject: [PATCH 085/176] perf thread_map: Use readdir() instead of deprecated readdir_r() commit 3354cf71104de49326d19d2f9bdb1f66eea52ef4 upstream. The readdir() function is thread safe as long as just one thread uses a DIR, which is the case in thread_map, so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-del8h2a0f40z75j4r42l96l0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/thread_map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index fd8d4384454c..4e666b95b87e 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -93,7 +93,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) DIR *proc; int max_threads = 32, items, i; char path[NAME_MAX + 1 + 6]; - struct dirent dirent, *next, **namelist = NULL; + struct dirent *dirent, **namelist = NULL; struct thread_map *threads = thread_map__alloc(max_threads); if (threads == NULL) @@ -106,16 +106,16 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) threads->nr = 0; atomic_set(&threads->refcnt, 1); - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; struct stat st; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); if (stat(path, &st) != 0) continue; From c7b8d98bff822922ae0fa579c7fbdf07903bcad1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:25:59 -0300 Subject: [PATCH 086/176] perf script: Use readdir() instead of deprecated readdir_r() commit a5e8e825bd1704c488bf6a46936aaf3b9f203d6a upstream. The readdir() function is thread safe as long as just one thread uses a DIR, which is the case in 'perf script', so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-mt3xz7n2hl49ni2vx7kuq74g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-script.c | 68 ++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 72b5deb4bd79..20f0e27918dd 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1252,21 +1252,19 @@ static int is_directory(const char *base_path, const struct dirent *dent) return S_ISDIR(st.st_mode); } -#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\ - while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ - lang_next) \ - if ((lang_dirent.d_type == DT_DIR || \ - (lang_dirent.d_type == DT_UNKNOWN && \ - is_directory(scripts_path, &lang_dirent))) && \ - (strcmp(lang_dirent.d_name, ".")) && \ - (strcmp(lang_dirent.d_name, ".."))) +#define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ + while ((lang_dirent = readdir(scripts_dir)) != NULL) \ + if ((lang_dirent->d_type == DT_DIR || \ + (lang_dirent->d_type == DT_UNKNOWN && \ + is_directory(scripts_path, lang_dirent))) && \ + (strcmp(lang_dirent->d_name, ".")) && \ + (strcmp(lang_dirent->d_name, ".."))) -#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\ - while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ - script_next) \ - if (script_dirent.d_type != DT_DIR && \ - (script_dirent.d_type != DT_UNKNOWN || \ - !is_directory(lang_path, &script_dirent))) +#define for_each_script(lang_path, lang_dir, script_dirent) \ + while ((script_dirent = readdir(lang_dir)) != NULL) \ + if (script_dirent->d_type != DT_DIR && \ + (script_dirent->d_type != DT_UNKNOWN || \ + !is_directory(lang_path, script_dirent))) #define RECORD_SUFFIX "-record" @@ -1412,7 +1410,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset __maybe_unused) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; char script_path[MAXPATHLEN]; @@ -1427,19 +1425,19 @@ static int list_available_scripts(const struct option *opt __maybe_unused, if (!scripts_dir) return -1; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - script_root = get_script_root(&script_dirent, REPORT_SUFFIX); + for_each_script(lang_path, lang_dir, script_dirent) { + script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -1527,7 +1525,7 @@ static int check_ev_match(char *dir_name, char *scriptname, */ int find_scripts(char **scripts_array, char **scripts_path_array) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; @@ -1550,9 +1548,9 @@ int find_scripts(char **scripts_array, char **scripts_path_array) return -1; } - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -1566,16 +1564,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array) if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { + for_each_script(lang_path, lang_dir, script_dirent) { /* Skip those real time scripts: xxxtop.p[yl] */ - if (strstr(script_dirent.d_name, "top.")) + if (strstr(script_dirent->d_name, "top.")) continue; sprintf(scripts_path_array[i], "%s/%s", lang_path, - script_dirent.d_name); - temp = strchr(script_dirent.d_name, '.'); + script_dirent->d_name); + temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], - (temp - script_dirent.d_name) + 1, - "%s", script_dirent.d_name); + (temp - script_dirent->d_name) + 1, + "%s", script_dirent->d_name); if (check_ev_match(lang_path, scripts_array[i], session)) @@ -1593,7 +1591,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array) static char *get_script_path(const char *script_root, const char *suffix) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; char script_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; @@ -1606,21 +1604,21 @@ static char *get_script_path(const char *script_root, const char *suffix) if (!scripts_dir) return NULL; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - __script_root = get_script_root(&script_dirent, suffix); + for_each_script(lang_path, lang_dir, script_dirent) { + __script_root = get_script_root(script_dirent, suffix); if (__script_root && !strcmp(script_root, __script_root)) { free(__script_root); closedir(lang_dir); closedir(scripts_dir); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); From 0717b5dfab93ea418a9ab9ea49177a65b957d7cf Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Mon, 25 Apr 2016 10:47:54 +0100 Subject: [PATCH 087/176] perf tools: Remove duplicate const qualifier commit 3b556bced46aa6b1873da7faa18eff235e896adc upstream. Signed-off-by: Eric Engestrom Cc: Adrian Hunter Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1461577678-29517-1-git-send-email-eric.engestrom@imgtec.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0a9ae8014729..829508a21448 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -227,7 +227,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, struct addr_location *al) { size_t i; - const u8 const cpumodes[] = { + const u8 cpumodes[] = { PERF_RECORD_MISC_USER, PERF_RECORD_MISC_KERNEL, PERF_RECORD_MISC_GUEST_USER, From 0ba2f070bf4556b732633591c3a38f0248a8c3ec Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Mon, 14 Dec 2015 16:44:03 +0100 Subject: [PATCH 088/176] perf annotate browser: Fix behaviour of Shift-Tab with nothing focussed commit d4913cbd05bab685e49c8174896e563b2487d054 upstream. The issue was pointed out by gcc-6's -Wmisleading-indentation. Signed-off-by: Markus Trippelsdorf Acked-by: Ingo Molnar Cc: Ben Hutchings Cc: Matt Fleming Cc: Peter Zijlstra Fixes: c97cf42219b7 ("perf top: Live TUI Annotation") Link: http://lkml.kernel.org/r/20151214154403.GB1409@x4 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browsers/annotate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d4d7cc27252f..718bd46d47fa 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser, nd = browser->curr_hot; break; case K_UNTAB: - if (nd != NULL) + if (nd != NULL) { nd = rb_next(nd); if (nd == NULL) nd = rb_first(&browser->entries); - else + } else nd = browser->curr_hot; break; case K_F1: From 300d4fea8de01d33001c08268b890923f2d2e68b Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Mon, 14 Dec 2015 16:44:40 +0100 Subject: [PATCH 089/176] perf pmu: Fix misleadingly indented assignment (whitespace) commit d85ce830eef6c10d1e9617172dea4681f02b8424 upstream. One line in perf_pmu__parse_unit() is indented wrongly, leading to a warning (=> error) from gcc 6: util/pmu.c:156:3: error: statement is indented as if it were guarded by... [-Werror=misleading-indentation] sret = read(fd, alias->unit, UNIT_MAX_LEN); ^~~~ util/pmu.c:153:2: note: ...this 'if' clause, but it is not if (fd == -1) ^~ Signed-off-by: Markus Trippelsdorf Acked-by: Ingo Molnar Cc: Ben Hutchings Cc: Matt Fleming Cc: Peter Zijlstra Fixes: 410136f5dd96 ("tools/perf/stat: Add event unit and scale support") Link: http://lkml.kernel.org/r/20151214154440.GC1409@x4 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6f2a0279476c..593066c68e3d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -153,7 +153,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n if (fd == -1) return -1; - sret = read(fd, alias->unit, UNIT_MAX_LEN); + sret = read(fd, alias->unit, UNIT_MAX_LEN); if (sret < 0) goto error; From 5a25a08ad5e68792f32da5c934fe5f50a6791a20 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 12:04:29 -0300 Subject: [PATCH 090/176] perf dwarf: Guard !x86_64 definitions under #ifdef else clause commit 62aa0e177d278462145a29c30d3c8501ae57e200 upstream. To fix the build on Fedora Rawhide (gcc 6.0.0 20160311 (Red Hat 6.0.0-0.17): CC /tmp/build/perf/arch/x86/util/dwarf-regs.o arch/x86/util/dwarf-regs.c:66:36: error: 'x86_32_regoffset_table' defined but not used [-Werror=unused-const-variable=] static const struct pt_regs_offset x86_32_regoffset_table[] = { ^~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fghuksc1u8ln82bof4lwcj0o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/x86/util/dwarf-regs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 9223c164e545..1f86ee8fb831 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -63,6 +63,8 @@ struct pt_regs_offset { # define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} #endif +/* TODO: switching by dwarf address size */ +#ifndef __x86_64__ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_NAME_32("%ax", eax), REG_OFFSET_NAME_32("%cx", ecx), @@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_END, }; +#define regoffset_table x86_32_regoffset_table +#else static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_NAME_64("%ax", rax), REG_OFFSET_NAME_64("%dx", rdx), @@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_END, }; -/* TODO: switching by dwarf address size */ -#ifdef __x86_64__ #define regoffset_table x86_64_regoffset_table -#else -#define regoffset_table x86_32_regoffset_table #endif /* Minus 1 for the ending REG_OFFSET_END */ From 1991cbd2c4862abb34d2ae86cd768fb5a3ed47f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 Mar 2016 16:51:17 -0300 Subject: [PATCH 091/176] perf trace: Do not process PERF_RECORD_LOST twice commit 3ed5ca2efff70e9f589087c2013789572901112d upstream. We catch this record to provide a visual indication that events are getting lost, then call the default method to allow extra logging shared with the other tools to take place. This extra logging was done twice because we were continuing to the "default" clause where machine__process_event() will end up calling machine__process_lost_event() again, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wus2zlhw3qo24ye84ewu4aqw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c783d8fd3a80..ebe7115c751a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1617,6 +1617,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); ret = machine__process_lost_event(machine, event, sample); + break; default: ret = machine__process_event(machine, event, sample); break; From c04b8bbd64be97047d277c871f82d79f03375718 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Mon, 14 Dec 2015 16:43:35 +0100 Subject: [PATCH 092/176] perf tests: Remove wrong semicolon in while loop in CQM test commit cf89813a5b514bff9b3b5e7eaf2090f22fba62e0 upstream. The while loop was spinning. Fix by removing a semicolon. The issue was pointed out by gcc-6's -Wmisleading-indentation. Signed-off-by: Markus Trippelsdorf Reviewed-by: Matt Fleming Acked-by: Ingo Molnar Cc: Ben Hutchings Cc: Peter Zijlstra Fixes: 035827e9f2bd ("perf tests: Add Intel CQM test") Link: http://lkml.kernel.org/r/20151214154335.GA1409@x4 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/x86/tests/intel-cqm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index d28c1b6a3b54..fa5d17af88b7 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -17,7 +17,7 @@ static pid_t spawn(void) if (pid) return pid; - while(1); + while(1) sleep(5); return 0; } From 2b4f81fb56cf7044278fc0e6c46351ac3da94148 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:53:02 -0300 Subject: [PATCH 093/176] perf tools: Use readdir() instead of deprecated readdir_r() again commit 22a9f41b555673e7499b97acf3ffb07bf0af31ad upstream. The readdir() function is thread safe as long as just one thread uses a DIR, which is the case when parsing tracepoint event definitions, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wddn49r6bz6wq4ee3dxbl7lo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/parse-events.c | 60 +++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a35db828bd0d..38304b7e4f81 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -138,11 +138,11 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ - while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ - if (sys_dirent.d_type == DT_DIR && \ - (strcmp(sys_dirent.d_name, ".")) && \ - (strcmp(sys_dirent.d_name, ".."))) +#define for_each_subsystem(sys_dir, sys_dirent) \ + while ((sys_dirent = readdir(sys_dir)) != NULL) \ + if (sys_dirent->d_type == DT_DIR && \ + (strcmp(sys_dirent->d_name, ".")) && \ + (strcmp(sys_dirent->d_name, ".."))) static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) { @@ -159,12 +159,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \ - while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ - if (evt_dirent.d_type == DT_DIR && \ - (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, "..")) && \ - (!tp_event_has_id(&sys_dirent, &evt_dirent))) +#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ + while ((evt_dirent = readdir(evt_dir)) != NULL) \ + if (evt_dirent->d_type == DT_DIR && \ + (strcmp(evt_dirent->d_name, ".")) && \ + (strcmp(evt_dirent->d_name, "..")) && \ + (!tp_event_has_id(sys_dirent, evt_dirent))) #define MAX_EVENT_LENGTH 512 @@ -173,7 +173,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char id_buf[24]; int fd; u64 id; @@ -184,18 +184,18 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) if (!sys_dir) return NULL; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent.d_name); + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; @@ -220,9 +220,9 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) free(path); return NULL; } - strncpy(path->system, sys_dirent.d_name, + strncpy(path->system, sys_dirent->d_name, MAX_EVENT_LENGTH); - strncpy(path->name, evt_dirent.d_name, + strncpy(path->name, evt_dirent->d_name, MAX_EVENT_LENGTH); return path; } @@ -1629,7 +1629,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; char **evt_list = NULL; @@ -1647,20 +1647,20 @@ restart: goto out_close_sys_dir; } - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && - !strglobmatch(sys_dirent.d_name, subsys_glob)) + !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { if (event_glob != NULL && - !strglobmatch(evt_dirent.d_name, event_glob)) + !strglobmatch(evt_dirent->d_name, event_glob)) continue; if (!evt_num_known) { @@ -1669,7 +1669,7 @@ restart: } snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); evt_list[evt_i] = strdup(evt_path); if (evt_list[evt_i] == NULL) @@ -1722,7 +1722,7 @@ out_close_sys_dir: int is_valid_tracepoint(const char *event_string) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; @@ -1730,17 +1730,17 @@ int is_valid_tracepoint(const char *event_string) if (!sys_dir) return 0; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); if (!strcmp(evt_path, event_string)) { closedir(evt_dir); closedir(sys_dir); From e20894aad9e875e4aec2c73e7badcd440f02cde4 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:49:12 +0800 Subject: [PATCH 094/176] md: fix incorrect use of lexx_to_cpu in does_sb_need_changing commit 1345921393ba23b60d3fcf15933e699232ad25ae upstream. The sb->layout is of type __le32, so we shoud use le32_to_cpu. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index eff554a12fb4..060a79a7ef4e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2273,7 +2273,7 @@ static bool does_sb_need_changing(struct mddev *mddev) /* Check if any mddev parameters have changed */ if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || - (mddev->layout != le64_to_cpu(sb->layout)) || + (mddev->layout != le32_to_cpu(sb->layout)) || (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) return true; From ff62c63109d9a10fa9cb34bcf933fc2ce89dfdaa Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:27:23 +0800 Subject: [PATCH 095/176] md: fix super_offset endianness in super_1_rdev_size_change commit 3fb632e40d7667d8bedfabc28850ac06d5493f54 upstream. The sb->super_offset should be big-endian, but the rdev->sb_start is in host byte order, so fix this by adding cpu_to_le64. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 060a79a7ef4e..0a856cb181e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1866,7 +1866,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); From 627f3abeeac1354a4b15eaf0f22b8ffb901029cb Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 25 Jan 2016 14:01:53 -0800 Subject: [PATCH 096/176] tcp: fix tcp_mark_head_lost to check skb len before fragmenting commit d88270eef4b56bd7973841dd1fed387ccfa83709 upstream. This commit fixes a corner case in tcp_mark_head_lost() which was causing the WARN_ON(len > skb->len) in tcp_fragment() to fire. tcp_mark_head_lost() was assuming that if a packet has tcp_skb_pcount(skb) of N, then it's safe to fragment off a prefix of M*mss bytes, for any M < N. But with the tricky way TCP pcounts are maintained, this is not always true. For example, suppose the sender sends 4 1-byte packets and have the last 3 packet sacked. It will merge the last 3 packets in the write queue into an skb with pcount = 3 and len = 3 bytes. If another recovery happens after a sack reneging event, tcp_mark_head_lost() may attempt to split the skb assuming it has more than 2*MSS bytes. This sounds very counterintuitive, but as the commit description for the related commit c0638c247f55 ("tcp: don't fragment SACKed skbs in tcp_mark_head_lost()") notes, this is because tcp_shifted_skb() coalesces adjacent regions of SACKed skbs, and when doing this it preserves the sum of their packet counts in order to reflect the real-world dynamics on the wire. The c0638c247f55 commit tried to avoid problems by not fragmenting SACKed skbs, since SACKed skbs are where the non-proportionality between pcount and skb->len/mss is known to be possible. However, that commit did not handle the case where during a reneging event one of these weird SACKed skbs becomes an un-SACKed skb, which tcp_mark_head_lost() can then try to fragment. The fix is to simply mark the entire skb lost when this happens. This makes the recovery slightly more aggressive in such corner cases before we detect reordering. But once we detect reordering this code path is by-passed because FACK is disabled. Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Vinson Lee Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 87791f803627..8f13b2eaabf8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2165,8 +2165,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt, oldcnt; - int err; + int cnt, oldcnt, lost; unsigned int mss; /* Use SACK to deduce losses of new sequences sent during recovery */ const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; @@ -2206,9 +2205,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; mss = tcp_skb_mss(skb); - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, - mss, GFP_ATOMIC); - if (err < 0) + /* If needed, chop off the prefix to mark as lost. */ + lost = (packets - oldcnt) * mss; + if (lost < skb->len && + tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) break; cnt = packets; } From 370d06c6b075f3617e957fa697065107df3962b5 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 29 Apr 2017 13:03:44 +0100 Subject: [PATCH 097/176] staging: vt6556: vnt_start Fix missing call to vnt_key_init_table. commit dc32190f2cd41c7dba25363ea7d618d4f5172b4e upstream. The key table is not intialized correctly without this call. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 01e642db311e..f35ee85f61b5 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -529,6 +529,9 @@ static int vnt_start(struct ieee80211_hw *hw) goto free_all; } + if (vnt_key_init_table(priv)) + goto free_all; + priv->int_interval = 1; /* bInterval is set to 1 */ vnt_int_start_interrupt(priv); From cb2bc13d0b556427cb99524aea274b8ddc941130 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 16 Jun 2017 19:35:34 +0100 Subject: [PATCH 098/176] staging: comedi: fix clean-up of comedi_class in comedi_init() commit a9332e9ad09c2644c99058fcf6ae2f355e93ce74 upstream. There is a clean-up bug in the core comedi module initialization functions, `comedi_init()`. If the `comedi_num_legacy_minors` module parameter is non-zero (and valid), it creates that many "legacy" devices and registers them in SysFS. A failure causes the function to clean up and return an error. Unfortunately, it fails to destroy the "comedi" class that was created earlier. Fix it by adding a call to `class_destroy(comedi_class)` at the appropriate place in the clean-up sequence. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 7b4af519e17e..8fed55342b0f 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2911,6 +2911,7 @@ static int __init comedi_init(void) dev = comedi_alloc_board_minor(NULL); if (IS_ERR(dev)) { comedi_cleanup_board_minors(); + class_destroy(comedi_class); cdev_del(&comedi_cdev); unregister_chrdev_region(MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS); From ad5a88c54cd299f3bb2bf53810dd5bb5805369c3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Jun 2017 01:08:22 -0400 Subject: [PATCH 099/176] ext4: check return value of kstrtoull correctly in reserved_clusters_store commit 1ea1516fbbab2b30bf98c534ecaacba579a35208 upstream. kstrtoull returns 0 on success, however, in reserved_clusters_store we will return -EINVAL if kstrtoull returns 0, it makes us fail to update reserved_clusters value through sysfs. Fixes: 76d33bca5581b1dd5c3157fa168db849a784ada4 Signed-off-by: Chao Yu Signed-off-by: Miao Xie Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 5d09ea585840..c2ee23acf359 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a, int ret; ret = kstrtoull(skip_spaces(buf), 0, &val); - if (!ret || val >= clusters) + if (ret || val >= clusters) return -EINVAL; atomic64_set(&sbi->s_resv_clusters, val); From 646b65808b0da1fb2f72daf4dabdd9bc8e398f49 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 4 Jul 2017 19:04:23 -0400 Subject: [PATCH 100/176] x86/mm/pat: Don't report PAT on CPUs that don't support it commit 99c13b8c8896d7bcb92753bf0c63a8de4326e78d upstream. The pat_enabled() logic is broken on CPUs which do not support PAT and where the initialization code fails to call pat_init(). Due to that the enabled flag stays true and pat_enabled() returns true wrongfully. As a consequence the mappings, e.g. for Xorg, are set up with the wrong caching mode and the required MTRR setups are omitted. To cure this the following changes are required: 1) Make pat_enabled() return true only if PAT initialization was invoked and successful. 2) Invoke init_cache_modes() unconditionally in setup_arch() and remove the extra callsites in pat_disable() and the pat disabled code path in pat_init(). Also rename __pat_enabled to pat_disabled to reflect the real purpose of this variable. Fixes: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled") Signed-off-by: Mikulas Patocka Signed-off-by: Thomas Gleixner Cc: Bernhard Held Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: "Luis R. Rodriguez" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1707041749300.3456@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pat.h | 1 + arch/x86/kernel/setup.c | 7 +++++++ arch/x86/mm/pat.c | 28 ++++++++++++---------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 0b1ff4c1c14e..fffb2794dd89 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -7,6 +7,7 @@ bool pat_enabled(void); void pat_disable(const char *reason); extern void pat_init(void); +extern void init_cache_modes(void); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d2bbe343fda7..e67b834279b2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1048,6 +1048,13 @@ void __init setup_arch(char **cmdline_p) if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820_end_of_ram_pfn(); + /* + * This call is required when the CPU does not support PAT. If + * mtrr_bp_init() invoked it already via pat_init() the call has no + * effect. + */ + init_cache_modes(); + #ifdef CONFIG_X86_32 /* max_low_pfn get updated here */ find_low_pfn_range(); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 6ad687d104ca..3f1bb4f93a5a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -36,14 +36,14 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool boot_cpu_done; - -static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); -static void init_cache_modes(void); +static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __read_mostly pat_initialized; +static bool __read_mostly init_cm_done; void pat_disable(const char *reason) { - if (!__pat_enabled) + if (pat_disabled) return; if (boot_cpu_done) { @@ -51,10 +51,8 @@ void pat_disable(const char *reason) return; } - __pat_enabled = 0; + pat_disabled = true; pr_info("x86/PAT: %s\n", reason); - - init_cache_modes(); } static int __init nopat(char *str) @@ -66,7 +64,7 @@ early_param("nopat", nopat); bool pat_enabled(void) { - return !!__pat_enabled; + return pat_initialized; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -204,6 +202,8 @@ static void __init_cache_modes(u64 pat) update_cache_mode_entry(i, cache); } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); + + init_cm_done = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) @@ -224,6 +224,7 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); + pat_initialized = true; __init_cache_modes(pat); } @@ -241,10 +242,9 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -static void init_cache_modes(void) +void init_cache_modes(void) { u64 pat = 0; - static int init_cm_done; if (init_cm_done) return; @@ -286,8 +286,6 @@ static void init_cache_modes(void) } __init_cache_modes(pat); - - init_cm_done = 1; } /** @@ -305,10 +303,8 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!pat_enabled()) { - init_cache_modes(); + if (pat_disabled) return; - } if ((c->x86_vendor == X86_VENDOR_INTEL) && (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || From 52b9815d37fca7d87fc1ff44b4916b4804c8383d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sat, 2 Jul 2016 20:27:46 -0300 Subject: [PATCH 101/176] saa7134: fix warm Medion 7134 EEPROM read commit 5a91206ff0d0548939f3e85a65fb76b400fb0e89 upstream. When saa7134 module driving a Medion 7134 card is reloaded reads of this card EEPROM (required for automatic detection of tuner model) will be corrupted due to I2C gate in DVB-T demod being left closed. This sometimes also happens on first saa7134 module load after a warm reboot. Fix this by opening this I2C gate before doing EEPROM read during i2c initialization. Signed-off-by: Maciej S. Szmigiero Signed-off-by: Mauro Carvalho Chehab Cc: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7134/saa7134-i2c.c | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/media/pci/saa7134/saa7134-i2c.c b/drivers/media/pci/saa7134/saa7134-i2c.c index 8ef6399d794f..bc957528f69f 100644 --- a/drivers/media/pci/saa7134/saa7134-i2c.c +++ b/drivers/media/pci/saa7134/saa7134-i2c.c @@ -355,12 +355,43 @@ static struct i2c_client saa7134_client_template = { /* ----------------------------------------------------------- */ +/* On Medion 7134 reading EEPROM needs DVB-T demod i2c gate open */ +static void saa7134_i2c_eeprom_md7134_gate(struct saa7134_dev *dev) +{ + u8 subaddr = 0x7, dmdregval; + u8 data[2]; + int ret; + struct i2c_msg i2cgatemsg_r[] = { {.addr = 0x08, .flags = 0, + .buf = &subaddr, .len = 1}, + {.addr = 0x08, + .flags = I2C_M_RD, + .buf = &dmdregval, .len = 1} + }; + struct i2c_msg i2cgatemsg_w[] = { {.addr = 0x08, .flags = 0, + .buf = data, .len = 2} }; + + ret = i2c_transfer(&dev->i2c_adap, i2cgatemsg_r, 2); + if ((ret == 2) && (dmdregval & 0x2)) { + pr_debug("%s: DVB-T demod i2c gate was left closed\n", + dev->name); + + data[0] = subaddr; + data[1] = (dmdregval & ~0x2); + if (i2c_transfer(&dev->i2c_adap, i2cgatemsg_w, 1) != 1) + pr_err("%s: EEPROM i2c gate open failure\n", + dev->name); + } +} + static int saa7134_i2c_eeprom(struct saa7134_dev *dev, unsigned char *eedata, int len) { unsigned char buf; int i,err; + if (dev->board == SAA7134_BOARD_MD7134) + saa7134_i2c_eeprom_md7134_gate(dev); + dev->i2c_client.addr = 0xa0 >> 1; buf = 0; if (1 != (err = i2c_master_send(&dev->i2c_client,&buf,1))) { From 9e0499d7d30c6a91eb381e604c31519af88737d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 15 Jul 2017 11:58:03 +0200 Subject: [PATCH 102/176] Linux 4.4.77 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 902ab134446e..bf49a61d02e2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = Blurry Fish Butt From bab9c2fbe48e9ebe6af7fa19ade1bf558dcc2eac Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:04:04 +0100 Subject: [PATCH 103/176] UPSTREAM: cpufreq: schedutil: Avoid indented labels Switch to the more common practice of writing labels. Suggested-by: Peter Zijlstra Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 8e2ddb03643eb9d0bc4926946d7ce0d308eef0a5) Signed-off-by: Chris Redpath Change-Id: Ida75c99cf3dff5cae24d3866454c83bcdb3385b9 --- kernel/sched/cpufreq_schedutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 75bfbb336722..ff9da5144d8a 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -623,13 +623,13 @@ static int sugov_init(struct cpufreq_policy *policy) if (ret) goto fail; - out: +out: mutex_unlock(&global_tunables_lock); cpufreq_enable_fast_switch(policy); return 0; - fail: +fail: policy->governor_data = NULL; sugov_tunables_free(tunables); From ceed1eb2b409d74ca694cbf4cdee536476a280c8 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:06:27 +0100 Subject: [PATCH 104/176] UPSTREAM: cpufreq: schedutil: enable fast switch earlier The fast_switch_enabled flag will be used by both sugov_policy_alloc() and sugov_policy_free() with a later patch. Prepare for that by moving the calls to enable and disable it to the beginning of sugov_init() and end of sugov_exit(). Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 4a71ce4348bb61740d411822357061f8bf870f4c) Signed-off-by: Chris Redpath Change-Id: Ia174f423ca02d59360657ac2e77a5098ce5cf99c --- kernel/sched/cpufreq_schedutil.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index ff9da5144d8a..de24e1983614 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -578,9 +578,13 @@ static int sugov_init(struct cpufreq_policy *policy) if (policy->governor_data) return -EBUSY; + cpufreq_enable_fast_switch(policy); + sg_policy = sugov_policy_alloc(policy); - if (!sg_policy) - return -ENOMEM; + if (!sg_policy) { + ret = -ENOMEM; + goto disable_fast_switch; + } ret = sugov_kthread_create(sg_policy); if (ret) @@ -625,8 +629,6 @@ static int sugov_init(struct cpufreq_policy *policy) out: mutex_unlock(&global_tunables_lock); - - cpufreq_enable_fast_switch(policy); return 0; fail: @@ -640,6 +642,10 @@ free_sg_policy: mutex_unlock(&global_tunables_lock); sugov_policy_free(sg_policy); + +disable_fast_switch: + cpufreq_disable_fast_switch(policy); + pr_err("initialization failed (error %d)\n", ret); return ret; } @@ -650,8 +656,6 @@ static int sugov_exit(struct cpufreq_policy *policy) struct sugov_tunables *tunables = sg_policy->tunables; unsigned int count; - cpufreq_disable_fast_switch(policy); - mutex_lock(&global_tunables_lock); count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); @@ -664,6 +668,7 @@ static int sugov_exit(struct cpufreq_policy *policy) sugov_kthread_stop(sg_policy); sugov_policy_free(sg_policy); + cpufreq_disable_fast_switch(policy); return 0; } From d9e7d036e7897f0c523fafe596316248783f61f7 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:10:26 +0100 Subject: [PATCH 105/176] UPSTREAM: cpufreq: schedutil: irq-work and mutex are only used in slow path Execute the irq-work specific initialization/exit code only when the fast path isn't available. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 21ef57297b15a49b0c4dd4e7135c1a08e9a29a1c) Signed-off-by: Chris Redpath Change-Id: Icfd68f455ef71846d799fcd2d8ec6aa1bf59573e --- kernel/sched/cpufreq_schedutil.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index de24e1983614..8de497e64a13 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -489,15 +489,12 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) return NULL; sg_policy->policy = policy; - init_irq_work(&sg_policy->irq_work, sugov_irq_work); - mutex_init(&sg_policy->work_lock); raw_spin_lock_init(&sg_policy->update_lock); return sg_policy; } static void sugov_policy_free(struct sugov_policy *sg_policy) { - mutex_destroy(&sg_policy->work_lock); kfree(sg_policy); } @@ -531,6 +528,9 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) sg_policy->thread = thread; kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + mutex_init(&sg_policy->work_lock); + wake_up_process(thread); return 0; @@ -544,6 +544,7 @@ static void sugov_kthread_stop(struct sugov_policy *sg_policy) flush_kthread_worker(&sg_policy->worker); kthread_stop(sg_policy->thread); + mutex_destroy(&sg_policy->work_lock); } static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) @@ -719,9 +720,10 @@ static int sugov_stop(struct cpufreq_policy *policy) synchronize_sched(); - irq_work_sync(&sg_policy->irq_work); - kthread_cancel_work_sync(&sg_policy->work); - + if (!policy->fast_switch_enabled) { + irq_work_sync(&sg_policy->irq_work); + kthread_cancel_work_sync(&sg_policy->work); + } return 0; } From 69fc75780d1521448ba5288545a5d522a43ad062 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Nov 2016 13:51:11 +0530 Subject: [PATCH 106/176] UPSTREAM: cpufreq: schedutil: Rectify comment in sugov_irq_work() function This patch rectifies a comment present in sugov_irq_work() function to follow proper grammar. Suggested-by: Ingo Molnar Signed-off-by: Viresh Kumar Acked-by: Ingo Molnar Signed-off-by: Rafael J. Wysocki (cherry picked from commit d06e622d3d9206e6a2cc45a0f9a3256da8773ff4) Signed-off-by: Chris Redpath Change-Id: Iaf996445d411725639d511432cc424086892a146 --- kernel/sched/cpufreq_schedutil.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 8de497e64a13..b76286a8e118 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -371,15 +371,15 @@ static void sugov_irq_work(struct irq_work *irq_work) sg_policy = container_of(irq_work, struct sugov_policy, irq_work); /* - * For Real Time and Deadline tasks, schedutil governor shoots the - * frequency to maximum. And special care must be taken to ensure that - * this kthread doesn't result in that. + * For RT and deadline tasks, the schedutil governor shoots the + * frequency to maximum. Special care must be taken to ensure that this + * kthread doesn't result in the same behavior. * * This is (mostly) guaranteed by the work_in_progress flag. The flag is - * updated only at the end of the sugov_work() and before that schedutil - * rejects all other frequency scaling requests. + * updated only at the end of the sugov_work() function and before that + * the schedutil governor rejects all other frequency scaling requests. * - * Though there is a very rare case where the RT thread yields right + * There is a very rare case though, where the RT thread yields right * after the work_in_progress flag is cleared. The effects of that are * neglected for now. */ From 0646dd35928e446d8d2f8075a349b3abb521fa06 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:12:38 +0100 Subject: [PATCH 107/176] UPSTREAM: cpufreq: schedutil: move cached_raw_freq to struct sugov_policy cached_raw_freq applies to the entire cpufreq policy and not individual CPUs. Apart from wasting per-cpu memory, it is actually wrong to keep it in struct sugov_cpu as we may end up comparing next_freq with a stale cached_raw_freq of a random CPU. Move cached_raw_freq to struct sugov_policy. Fixes: 5cbea46984d6 (cpufreq: schedutil: map raw required frequency to driver frequency) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry-picked from 6c4f0fa643cb9e775dcc976e3db00d649468ff1d) Signed-off-by: Chris Redpath Change-Id: Ie91420f710819b383947f9031da9be1f3bb7f636 --- kernel/sched/cpufreq_schedutil.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index b76286a8e118..a8ca00a02ded 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -47,6 +47,7 @@ struct sugov_policy { s64 up_rate_delay_ns; s64 down_rate_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -63,7 +64,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -180,9 +180,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -687,6 +687,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -697,7 +698,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->max = 0; sg_cpu->flags = SCHED_CPUFREQ_DL; sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; sg_cpu->iowait_boost = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, From cbaccedead5cc87296194c6a668ea38de8d7c17c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Mar 2017 14:03:21 +0530 Subject: [PATCH 108/176] UPSTREAM: cpufreq: schedutil: Pass sg_policy to get_next_freq() get_next_freq() uses sg_cpu only to get sg_policy, which the callers of get_next_freq() already have. Pass sg_policy instead of sg_cpu to get_next_freq(), to make it more efficient. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 655cb1ebff4b7918fc560502c3297af2d3c7d114) Signed-off-by: Chris Redpath Change-Id: Ia210058da32930a6cdb18258aa679cd1a44a747e --- kernel/sched/cpufreq_schedutil.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a8ca00a02ded..9bf525a960f3 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -150,7 +150,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -170,10 +170,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; @@ -268,7 +267,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max, time); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -322,7 +321,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, From 7378c38a80fc434f8d6cf61167d843ca388468b5 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:15:33 +0100 Subject: [PATCH 109/176] UPSTREAM: cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start() sugov_start() only initializes struct sugov_cpu per-CPU structures for shared policies, but it should do that for single-CPU policies too. That in particular makes the IO-wait boost mechanism work in the cases when cpufreq policies correspond to individual CPUs. Fixes: 21ca6d2c52f8 (cpufreq: schedutil: Add iowait boosting) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.9+ # 4.9+ (cherry picked from commit 4296f23ed49a15d36949458adcc66ff993dee2a8) (we use SCHED_CPUFREQ_DL instead of SCHED_CPUFREQ_RT in cpu->flags) Signed-off-by: Chris Redpath Change-Id: I5b837a0ee4432115d85caa1a9808ea61e1e1b07f --- kernel/sched/cpufreq_schedutil.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9bf525a960f3..3a9283640f84 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -691,20 +691,14 @@ static int sugov_start(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_DL; - sg_cpu->last_update = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } + sg_cpu->flags = SCHED_CPUFREQ_DL; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + policy_is_shared(policy) ? + sugov_update_shared : + sugov_update_single); } return 0; } From a8a200d83b1ff334a6c607b8fda848551648689b Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:22:59 +0100 Subject: [PATCH 110/176] UPSTREAM: cpufreq: schedutil: Refactor sugov_next_freq_shared() The loop in sugov_next_freq_shared() contains an if block to skip the loop for the current CPU. This turns out to be an unnecessary conditional in the scheduler's hot-path for every CPU in the policy. It would be better to drop the conditional and make the loop treat all the CPUs in the same way. That would eliminate the need of calling sugov_iowait_boost() at the top of the routine. To keep the code optimized to return early if the current CPU has RT/DL flags set, move the flags check to sugov_update_shared() instead in order to avoid the function call entirely. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit cba1dfb57b94c234728b689d9b00d4267fa1a879) (modified for SCHED_CPUFREQ_DL vs SCHED_CPUFREQ_RT) Signed-off-by: Chris Redpath Change-Id: Ie046fdc8eda46821356750edd0fb6f7d077af363 --- kernel/sched/cpufreq_schedutil.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3a9283640f84..e1a2ed98991f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -272,30 +272,19 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned long util = 0, max = 1; unsigned int j; - if (flags & SCHED_CPUFREQ_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; + struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; s64 delta_ns; - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); /* * If the CPU utilization was last updated before the previous * frequency update and the time elapsed between the last update @@ -309,7 +298,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_DL) - return max_f; + return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; @@ -344,7 +333,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); + if (flags & SCHED_CPUFREQ_DL) + next_f = sg_policy->policy->cpuinfo.max_freq; + else + next_f = sugov_next_freq_shared(sg_cpu); + sugov_update_commit(sg_policy, time, next_f); } From 537d19226a8c4d36cc376f6b177576dff989a97d Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:24:58 +0100 Subject: [PATCH 111/176] UPSTREAM: cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely The way the schedutil governor uses the PELT metric causes it to underestimate the CPU utilization in some cases. That can be easily demonstrated by running kernel compilation on a Sandy Bridge Intel processor, running turbostat in parallel with it and looking at the values written to the MSR_IA32_PERF_CTL register. Namely, the expected result would be that when all CPUs were 100% busy, all of them would be requested to run in the maximum P-state, but observation shows that this clearly isn't the case. The CPUs run in the maximum P-state for a while and then are requested to run slower and go back to the maximum P-state after a while again. That causes the actual frequency of the processor to visibly oscillate below the sustainable maximum in a jittery fashion which clearly is not desirable. That has been attributed to CPU utilization metric updates on task migration that cause the total utilization value for the CPU to be reduced by the utilization of the migrated task. If that happens, the schedutil governor may see a CPU utilization reduction and will attempt to reduce the CPU frequency accordingly right away. That may be premature, though, for example if the system is generally busy and there are other runnable tasks waiting to be run on that CPU already. This is unlikely to be an issue on systems where cpufreq policies are shared between multiple CPUs, because in those cases the policy utilization is computed as the maximum of the CPU utilization values over the whole policy and if that turns out to be low, reducing the frequency for the policy most likely is a good idea anyway. On systems with one CPU per policy, however, it may affect performance adversely and even lead to increased energy consumption in some cases. On those systems it may be addressed by taking another utilization metric into consideration, like whether or not the CPU whose frequency is about to be reduced has been idle recently, because if that's not the case, the CPU is likely to be busy in the near future and its frequency should not be reduced. To that end, use the counter of idle calls in the timekeeping code. Namely, make the schedutil governor look at that counter for the current CPU every time before its frequency is about to be reduced. If the counter has not changed since the previous iteration of the governor computations for that CPU, the CPU has been busy for all that time and its frequency should not be decreased, so if the new frequency would be lower than the one set previously, the governor will skip the frequency update. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Reviewed-by: Joel Fernandes (cherry picked from commit b7eaf1aab9f8bd2e49fceed77ebc66c1b5800718) (simple CPUFREQ_RT_DL vs CPUFREQ_DL usage conflicts) Signed-off-by: Chris Redpath Change-Id: I531ec02c052944ee07a904dc2a25c59948ee762b --- include/linux/tick.h | 1 + kernel/sched/cpufreq_schedutil.c | 27 +++++++++++++++++++++++++++ kernel/time/tick-sched.c | 12 ++++++++++++ 3 files changed, 40 insertions(+) diff --git a/include/linux/tick.h b/include/linux/tick.h index e312219ff823..af5ac7f91a3b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -103,6 +103,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_calls(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e1a2ed98991f..49922187b57c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -72,6 +72,11 @@ struct sugov_cpu { unsigned long util; unsigned long max; unsigned int flags; + + /* The field below is for single-CPU policies only. */ +#ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +#endif }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -247,6 +252,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, sg_cpu->iowait_boost >>= 1; } +#ifdef CONFIG_NO_HZ_COMMON +static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) +{ + unsigned long idle_calls = tick_nohz_get_idle_calls(); + bool ret = idle_calls == sg_cpu->saved_idle_calls; + + sg_cpu->saved_idle_calls = idle_calls; + return ret; +} +#else +static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +#endif /* CONFIG_NO_HZ_COMMON */ + static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int flags) { @@ -255,6 +273,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, struct cpufreq_policy *policy = sg_policy->policy; unsigned long util, max; unsigned int next_f; + bool busy; sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -262,12 +281,20 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; + busy = sugov_cpu_is_busy(sg_cpu); + if (flags & SCHED_CPUFREQ_DL) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max, time); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); + /* + * Do not reduce the frequency if the CPU has not been idle + * recently, as the reduction is likely to be premature then. + */ + if (busy && next_f < sg_policy->next_freq) + next_f = sg_policy->next_freq; } sugov_update_commit(sg_policy, time, next_f); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 22c57e191a23..34f9a9c417d9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -870,6 +870,18 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls - return the current idle calls counter value + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls(void) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + return ts->idle_calls; +} + static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE From 2ee9941b0bbcc6a9b047d78bcb9f18be98a15625 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Thu, 25 May 2017 15:27:07 +0100 Subject: [PATCH 112/176] UPSTREAM: cpufreq: schedutil: Trace frequency only if it has changed sugov_update_commit() calls trace_cpu_frequency() to record the current CPU frequency if it has not changed in the fast switch case to prevent utilities from getting confused (they may report that the CPU is idle if the frequency has not been recorded for too long, for example). However, that may cause the tracepoint to be triggered quite often for no real reason (if the frequency doesn't change, we will not modify the last update time stamp and governor computations may run again shortly when that happens), so don't do that (arguably, it is done to work around a utilities bug anyway). That allows code duplication in sugov_update_commit() to be reduced somewhat too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar (cherry picked from commit 38d4ea229d25d30be6bf41bcd6cd663a587866ca) (conflicts with sugov_up_down_rate_limit resolved) Signed-off-by: Chris Redpath Change-Id: Ia019dda29b8c1c4cf3553da75c88d066eb5674e9 --- kernel/sched/cpufreq_schedutil.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 49922187b57c..e12309c1b07b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -132,22 +132,20 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (sugov_up_down_rate_limit(sg_policy, time, next_freq)) return; + if (sg_policy->next_freq == next_freq) + return; + + sg_policy->next_freq = next_freq; + sg_policy->last_freq_update_time = time; + if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; - sg_policy->last_freq_update_time = time; next_freq = cpufreq_driver_fast_switch(policy, next_freq); if (next_freq == CPUFREQ_ENTRY_INVALID) return; policy->cur = next_freq; trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; - sg_policy->last_freq_update_time = time; + } else { sg_policy->work_in_progress = true; irq_work_queue(&sg_policy->irq_work); } From 89ce9d97e661ee44741bd1ab471c4d9a8a27d077 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 20 Jun 2017 13:54:44 -0700 Subject: [PATCH 113/176] Add BINDER_GET_NODE_DEBUG_INFO ioctl The BINDER_GET_NODE_DEBUG_INFO ioctl will return debug info on a node. Each successive call reusing the previous return value will return the next node. The data will be used by libmemunreachable to mark the pointers with kernel references as reachable. Bug: 28275695 Change-Id: Idbbafa648a33822dc023862cd92b51a595cf7c1c Signed-off-by: Colin Cross Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 42 +++++++++++++++++++++++++++++ include/uapi/linux/android/binder.h | 14 ++++++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6b24f70dbff8..4247d7d8b8e6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4548,6 +4548,30 @@ out: return ret; } +static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, + struct binder_node_debug_info *info) { + struct rb_node *n; + binder_uintptr_t ptr = info->ptr; + + memset(info, 0, sizeof(*info)); + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { + struct binder_node *node = rb_entry(n, struct binder_node, + rb_node); + if (node->ptr > ptr) { + info->ptr = node->ptr; + info->cookie = node->cookie; + info->has_strong_ref = node->has_strong_ref; + info->has_weak_ref = node->has_weak_ref; + break; + } + } + binder_inner_proc_unlock(proc); + + return 0; +} + static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; @@ -4615,6 +4639,24 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; } + case BINDER_GET_NODE_DEBUG_INFO: { + struct binder_node_debug_info info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_debug_info(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + break; + } default: ret = -EINVAL; goto err; diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 70e252bf0be0..5539933b3491 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -233,6 +233,19 @@ struct binder_version { #define BINDER_CURRENT_PROTOCOL_VERSION 8 #endif +/* + * Use with BINDER_GET_NODE_DEBUG_INFO, driver reads ptr, writes to all fields. + * Set ptr to NULL for the first call to get the info for the first node, and + * then repeat the call passing the previously returned value to get the next + * nodes. ptr will be 0 when there are no more nodes. + */ +struct binder_node_debug_info { + binder_uintptr_t ptr; + binder_uintptr_t cookie; + __u32 has_strong_ref; + __u32 has_weak_ref; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -240,6 +253,7 @@ struct binder_version { #define BINDER_SET_CONTEXT_MGR _IOW('b', 7, __s32) #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) #define BINDER_VERSION _IOWR('b', 9, struct binder_version) +#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) /* * NOTE: Two special error codes you should check for when calling From 76b376eac7a2e9584afb58af87d5451f55b82e3d Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 26 May 2017 10:48:56 -0700 Subject: [PATCH 114/176] ANDROID: binder: don't check prio permissions on restore. Because we have disabled RT priority inheritance for the regular binder domain, the following can happen: 1) thread A (prio 98) calls into thread B 2) because RT prio inheritance is disabled, thread B runs at the lowest nice (prio 100) instead 3) thread B calls back into A; A will run at prio 100 for the duration of the transaction 4) When thread A is done with the call from B, we will try to restore the prio back to 98. But, we fail because the process doesn't hold CAP_SYS_NICE, neither is RLIMIT_RT_PRIO set. While the proper fix going forward will be to correctly apply CAP_SYS_NICE or RLIMIT_RT_PRIO, for now it seems reasonable to not check permissions on the restore path. Change-Id: Ibede5960c9b7bb786271c001e405de50be64d944 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4247d7d8b8e6..29e3e3f21443 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1109,8 +1109,9 @@ static int to_kernel_prio(int policy, int user_priority) return MAX_USER_RT_PRIO - 1 - user_priority; } -static void binder_set_priority(struct task_struct *task, - struct binder_priority desired) +static void binder_do_set_priority(struct task_struct *task, + struct binder_priority desired, + bool verify) { int priority; /* user-space prio value */ bool has_cap_nice; @@ -1123,7 +1124,7 @@ static void binder_set_priority(struct task_struct *task, priority = to_userspace_prio(policy, desired.prio); - if (is_rt_policy(policy) && !has_cap_nice) { + if (verify && is_rt_policy(policy) && !has_cap_nice) { long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO); if (max_rtprio == 0) { @@ -1134,7 +1135,7 @@ static void binder_set_priority(struct task_struct *task, } } - if (is_fair_policy(policy) && !has_cap_nice) { + if (verify && is_fair_policy(policy) && !has_cap_nice) { long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE)); if (min_nice > MAX_NICE) { @@ -1167,6 +1168,18 @@ static void binder_set_priority(struct task_struct *task, set_user_nice(task, priority); } +static void binder_set_priority(struct task_struct *task, + struct binder_priority desired) +{ + binder_do_set_priority(task, desired, /* verify = */ true); +} + +static void binder_restore_priority(struct task_struct *task, + struct binder_priority desired) +{ + binder_do_set_priority(task, desired, /* verify = */ false); +} + static void binder_transaction_priority(struct task_struct *task, struct binder_transaction *t, struct binder_priority node_prio, @@ -3204,7 +3217,7 @@ static void binder_transaction(struct binder_proc *proc, binder_enqueue_work_ilocked(&t->work, &target_thread->todo); binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); - binder_set_priority(current, in_reply_to->saved_priority); + binder_restore_priority(current, in_reply_to->saved_priority); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); @@ -3293,7 +3306,7 @@ err_no_context_mgr_node: BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { - binder_set_priority(current, in_reply_to->saved_priority); + binder_restore_priority(current, in_reply_to->saved_priority); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; binder_enqueue_work(thread->proc, &thread->return_error.work, @@ -3893,7 +3906,7 @@ retry: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); } - binder_set_priority(current, proc->default_priority); + binder_restore_priority(current, proc->default_priority); } if (non_block) { From d01a860b54f7fe60ae199d05c6e463be8e616bc1 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 19 Jul 2017 17:16:43 -0700 Subject: [PATCH 115/176] ANDROID: sdcardfs: Remove unnecessary lock The mmap_sem lock does not appear to be protecting anything, and has been removed in Samsung's more recent versions of sdcardfs. Signed-off-by: Daniel Rosenberg Change-Id: I76ff3e33002716b8384fc8be368028ed63dffe4e Bug: 63785372 --- fs/sdcardfs/inode.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 60fea424835f..103dc45a131f 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -766,13 +766,9 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct * afterwards in the other cases: we fsstack_copy_inode_size from * the lower level. */ - if (current->mm) - down_write(¤t->mm->mmap_sem); if (ia->ia_valid & ATTR_SIZE) { err = inode_newsize_ok(&tmp, ia->ia_size); if (err) { - if (current->mm) - up_write(¤t->mm->mmap_sem); goto out; } truncate_setsize(inode, ia->ia_size); @@ -795,8 +791,6 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ NULL); mutex_unlock(&d_inode(lower_dentry)->i_mutex); - if (current->mm) - up_write(¤t->mm->mmap_sem); if (err) goto out; From 492a6047e7b5a1383627189e8bff99ff181ef4a4 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 3 Jun 2016 13:16:59 -0700 Subject: [PATCH 116/176] ANDROID: android-verity: mark dev as rw for linear target Mark as rw when adding as linear target to allow changes to the underlying filesystem through adb disable verity and adb remount. (Cherry-picked from https://partner-android-review.googlesource.com/#/c/613573/ 79a3032bb62da65a5d724eb70c8bdc662945d475) BUG: 28845874 Signed-off-by: Badhri Jagan Sridharan Change-Id: If41e9cad8e0f054f4778c09a6e2f0cb8af6fddaf --- drivers/md/dm-android-verity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index c3c9502baf18..c521df010ee3 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -645,6 +645,8 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) android_verity_target.iterate_devices = dm_linear_iterate_devices, android_verity_target.io_hints = NULL; + set_disk_ro(dm_disk(dm_table_get_md(ti->table)), 0); + err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); if (!err) { From 0be4c96e7c57a5359e35eba32fd3559824f8c751 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2017 10:31:49 -0800 Subject: [PATCH 117/176] net_sched: fix error recovery at qdisc creation commit 87b60cfacf9f17cf71933c6e33b66e68160af71d upstream. Dmitry reported uses after free in qdisc code [1] The problem here is that ops->init() can return an error. qdisc_create_dflt() then call ops->destroy(), while qdisc_create() does _not_ call it. Four qdisc chose to call their own ops->destroy(), assuming their caller would not. This patch makes sure qdisc_create() calls ops->destroy() and fixes the four qdisc to avoid double free. [1] BUG: KASAN: use-after-free in mq_destroy+0x242/0x290 net/sched/sch_mq.c:33 at addr ffff8801d415d440 Read of size 8 by task syz-executor2/5030 CPU: 0 PID: 5030 Comm: syz-executor2 Not tainted 4.3.5-smp-DEV #119 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 0000000000000046 ffff8801b435b870 ffffffff81bbbed4 ffff8801db000400 ffff8801d415d440 ffff8801d415dc40 ffff8801c4988510 ffff8801b435b898 ffffffff816682b1 ffff8801b435b928 ffff8801d415d440 ffff8801c49880c0 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x6c/0x98 lib/dump_stack.c:51 [] kasan_object_err+0x21/0x70 mm/kasan/report.c:158 [] print_address_description mm/kasan/report.c:196 [inline] [] kasan_report_error+0x1b4/0x4b0 mm/kasan/report.c:285 [] kasan_report mm/kasan/report.c:305 [inline] [] __asan_report_load8_noabort+0x43/0x50 mm/kasan/report.c:326 [] mq_destroy+0x242/0x290 net/sched/sch_mq.c:33 [] qdisc_destroy+0x12d/0x290 net/sched/sch_generic.c:953 [] qdisc_create_dflt+0xf0/0x120 net/sched/sch_generic.c:848 [] attach_default_qdiscs net/sched/sch_generic.c:1029 [inline] [] dev_activate+0x6ad/0x880 net/sched/sch_generic.c:1064 [] __dev_open+0x221/0x320 net/core/dev.c:1403 [] __dev_change_flags+0x15e/0x3e0 net/core/dev.c:6858 [] dev_change_flags+0x8e/0x140 net/core/dev.c:6926 [] dev_ifsioc+0x446/0x890 net/core/dev_ioctl.c:260 [] dev_ioctl+0x1ba/0xb80 net/core/dev_ioctl.c:546 [] sock_do_ioctl+0x99/0xb0 net/socket.c:879 [] sock_ioctl+0x2a0/0x390 net/socket.c:958 [] vfs_ioctl fs/ioctl.c:44 [inline] [] do_vfs_ioctl+0x8a8/0xe50 fs/ioctl.c:611 [] SYSC_ioctl fs/ioctl.c:626 [inline] [] SyS_ioctl+0x94/0xc0 fs/ioctl.c:617 [] entry_SYSCALL_64_fastpath+0x12/0x17 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 2 ++ net/sched/sch_hhf.c | 8 ++++++-- net/sched/sch_mq.c | 10 +++------- net/sched/sch_mqprio.c | 19 ++++++------------- net/sched/sch_sfq.c | 3 ++- 5 files changed, 19 insertions(+), 23 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 95b560f0b253..35ed0d8c53b0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1004,6 +1004,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, return sch; } + /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 13d6f83ec491..45d4b2f22f62 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -636,7 +636,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * sizeof(u32)); if (!q->hhf_arrays[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } @@ -647,7 +649,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / BITS_PER_BYTE); if (!q->hhf_valid_bits[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 3e82f047caaf..d9c84328e7eb 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) + if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { @@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); - if (qdisc == NULL) - goto err; + if (!qdisc) + return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mq_destroy(sch); - return -ENOMEM; } static void mq_attach(struct Qdisc *sch) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ad70ecf57ce7..66bccc5ff4ea 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -117,20 +117,17 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) { - err = -ENOMEM; - goto err; - } + if (!priv->qdiscs) + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); - if (qdisc == NULL) { - err = -ENOMEM; - goto err; - } + if (!qdisc) + return -ENOMEM; + priv->qdiscs[i] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -143,7 +140,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); if (err) - goto err; + return err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -157,10 +154,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mqprio_destroy(sch); - return err; } static void mqprio_attach(struct Qdisc *sch) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 498f0a2cb47f..4431e2833e45 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -742,9 +742,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); if (!q->ht || !q->slots) { - sfq_destroy(sch); + /* Note: sfq_destroy() will be called by our caller */ return -ENOMEM; } + for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; From c485792ed6491865037abc1e18bbd66240112c93 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 28 Jun 2017 12:53:54 +0800 Subject: [PATCH 118/176] net: sched: Fix one possible panic when no destroy callback commit c1a4872ebfb83b1af7144f7b29ac8c4b344a12a8 upstream. When qdisc fail to init, qdisc_create would invoke the destroy callback to cleanup. But there is no check if the callback exists really. So it would cause the panic if there is no real destroy callback like the qdisc codel, fq, and so on. Take codel as an example following: When a malicious user constructs one invalid netlink msg, it would cause codel_init->codel_change->nla_parse_nested failed. Then kernel would invoke the destroy callback directly but qdisc codel doesn't define one. It causes one panic as a result. Now add one the check for destroy to avoid the possible panic. Fixes: 87b60cfacf9f ("net_sched: fix error recovery at qdisc creation") Signed-off-by: Gao Feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 35ed0d8c53b0..6d340cd6e2a7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1005,7 +1005,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, return sch; } /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ - ops->destroy(sch); + if (ops->destroy) + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); From f71e5140242ae9268cb44ccaa4118e8cda69910c Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 20 Jun 2017 12:48:11 -0500 Subject: [PATCH 119/176] net/phy: micrel: configure intterupts after autoneg workaround commit b866203d872d5deeafcecd25ea429d6748b5bd56 upstream. The commit ("net/phy: micrel: Add workaround for bad autoneg") fixes an autoneg failure case by resetting the hardware. This turns off intterupts. Things will work themselves out if the phy polls, as it will figure out it's state during a poll. However if the phy uses only intterupts, the phy will stall, since interrupts are off. This patch fixes the issue by calling config_intr after resetting the phy. Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg ") Signed-off-by: Zach Brown Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e13ad6cdcc22..c8b85f1069ff 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -539,6 +539,8 @@ static int ksz9031_read_status(struct phy_device *phydev) if ((regval & 0xFF) == 0xFF) { phy_init_hw(phydev); phydev->link = 0; + if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) + phydev->drv->config_intr(phydev); } return 0; From 6a87cca39f29aa9a3f2ea03ea4ed909656ce3980 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 21 Jun 2017 14:34:58 -0700 Subject: [PATCH 120/176] ipv6: avoid unregistering inet6_dev for loopback commit 60abc0be96e00ca71bac083215ac91ad2e575096 upstream. The per netns loopback_dev->ip6_ptr is unregistered and set to NULL when its mtu is set to smaller than IPV6_MIN_MTU, this leads to that we could set rt->rt6i_idev NULL after a rt6_uncached_list_flush_dev() and then crash after another call. In this case we should just bring its inet6_dev down, rather than unregistering it, at least prior to commit 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") we always override the case for loopback. Thanks a lot to Andrey for finding a reliable reproducer. Fixes: 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Daniel Lezcano Cc: David Ahern Signed-off-by: Cong Wang Acked-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 03dadbf6cc5e..56627f0f4088 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3143,6 +3143,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); + struct net *net = dev_net(dev); int run_pending = 0; int err; @@ -3158,7 +3159,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEMTU: /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) { - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); break; } @@ -3271,7 +3272,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); } break; From ccff2f4afbc183a163b2feb83132ead13d9283ed Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 23 Jun 2017 17:51:31 +0200 Subject: [PATCH 121/176] net: dp83640: Avoid NULL pointer dereference. commit db9d8b29d19d2801793e4419f4c6272bf8951c62 upstream. The function, skb_complete_tx_timestamp(), used to allow passing in a NULL pointer for the time stamps, but that was changed in commit 62bccb8cdb69051b95a55ab0c489e3cab261c8ef ("net-timestamp: Make the clone operation stand-alone from phy timestamping"), and the existing call sites, all of which are in the dp83640 driver, were fixed up. Even though the kernel-doc was subsequently updated in commit 7a76a021cd5a292be875fbc616daf03eab1e6996 ("net-timestamp: Update skb_complete_tx_timestamp comment"), still a bug fix from Manfred Rudigier came into the driver using the old semantics. Probably Manfred derived that patch from an older kernel version. This fix should be applied to the stable trees as well. Fixes: 81e8f2e930fe ("net: dp83640: Fix tx timestamp overflow handling.") Signed-off-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 84b9cca152eb..e83acc608678 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -907,7 +907,7 @@ static void decode_txts(struct dp83640_private *dp83640, if (overflow) { pr_debug("tx timestamp queue overflow, count %d\n", overflow); while (skb) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); skb = skb_dequeue(&dp83640->tx_queue); } return; From 32a44f1ba7edf3d58afb1fdce852f800f476b622 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 24 Jun 2017 23:50:30 -0700 Subject: [PATCH 122/176] tcp: reset sk_rx_dst in tcp_disconnect() commit d747a7a51b00984127a88113cdbbc26f91e9d815 upstream. We have to reset the sk->sk_rx_dst when we disconnect a TCP connection, because otherwise when we re-connect it this dst reference is simply overridden in tcp_finish_connect(). This fixes a dst leak which leads to a loopback dev refcnt leak. It is a long-standing bug, Kevin reported a very similar (if not same) bug before. Thanks to Andrei for providing such a reliable reproducer which greatly narrows down the problem. Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Reported-by: Andrei Vagin Reported-by: Kevin Xu Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bd8678329d6..0870a86e9d96 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2263,6 +2263,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); From d598f7ff24db728c7417fb02871aa6578d97e224 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Jun 2017 07:02:20 -0700 Subject: [PATCH 123/176] net: prevent sign extension in dev_get_stats() commit 6f64ec74515925cced6df4571638b5a099a49aae upstream. Similar to the fix provided by Dominik Heidler in commit 9b3dc0a17d73 ("l2tp: cast l2tp traffic counter to unsigned") we need to take care of 32bit kernels in dev_get_stats(). When using atomic_long_read(), we add a 'long' to u64 and might misinterpret high order bit, unless we cast to unsigned. Fixes: caf586e5f23ce ("net: add a core netdev->rx_dropped counter") Fixes: 015f0688f57ca ("net: net: add a core netdev->tx_dropped counter") Fixes: 6e7333d315a76 ("net: add rx_nohandler stat counter") Signed-off-by: Eric Dumazet Cc: Jarod Wilson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 524d8b28e690..c8f0385ea492 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7052,8 +7052,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += atomic_long_read(&dev->rx_dropped); - storage->tx_dropped += atomic_long_read(&dev->tx_dropped); + storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); From 1a4f13e0a99a85c455ff2f6dc117f6f049c039fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Jun 2017 03:04:59 +0200 Subject: [PATCH 124/176] bpf: prevent leaking pointer via xadd on unpriviledged commit 6bdf6abc56b53103324dfd270a86580306e1a232 upstream. Leaking kernel addresses on unpriviledged is generally disallowed, for example, verifier rejects the following: 0: (b7) r0 = 0 1: (18) r2 = 0xffff897e82304400 3: (7b) *(u64 *)(r1 +48) = r2 R2 leaks addr into ctx Doing pointer arithmetic on them is also forbidden, so that they don't turn into unknown value and then get leaked out. However, there's xadd as a special case, where we don't check the src reg for being a pointer register, e.g. the following will pass: 0: (b7) r0 = 0 1: (7b) *(u64 *)(r1 +48) = r0 2: (18) r2 = 0xffff897e82304400 ; map 4: (db) lock *(u64 *)(r1 +48) += r2 5: (95) exit We could store the pointer into skb->cb, loose the type context, and then read it out from there again to leak it eventually out of a map value. Or more easily in a different variant, too: 0: (bf) r6 = r1 1: (7a) *(u64 *)(r10 -8) = 0 2: (bf) r2 = r10 3: (07) r2 += -8 4: (18) r1 = 0x0 6: (85) call bpf_map_lookup_elem#1 7: (15) if r0 == 0x0 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R6=ctx R10=fp 8: (b7) r3 = 0 9: (7b) *(u64 *)(r0 +0) = r3 10: (db) lock *(u64 *)(r0 +0) += r6 11: (b7) r0 = 0 12: (95) exit from 7 to 11: R0=inv,min_value=0,max_value=0 R6=ctx R10=fp 11: (b7) r0 = 0 12: (95) exit Prevent this by checking xadd src reg for pointer types. Also add a couple of test cases related to this. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cbfba78d3db..863e24f1e62e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -754,6 +754,11 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) if (err) return err; + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d leaks addr into mem\n", insn->src_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); From 38ae32c9f13d3f0c642d373f35489752aa60d466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 29 Jun 2017 11:13:36 +0200 Subject: [PATCH 125/176] net: handle NAPI_GRO_FREE_STOLEN_HEAD case also in napi_frags_finish() commit e44699d2c28067f69698ccb68dd3ddeacfebc434 upstream. Recently I started seeing warnings about pages with refcount -1. The problem was traced to packets being reused after their head was merged into a GRO packet by skb_gro_receive(). While bisecting the issue pointed to commit c21b48cc1bbf ("net: adjust skb->truesize in ___pskb_trim()") and I have never seen it on a kernel with it reverted, I believe the real problem appeared earlier when the option to merge head frag in GRO was implemented. Handling NAPI_GRO_FREE_STOLEN_HEAD state was only added to GRO_MERGED_FREE branch of napi_skb_finish() so that if the driver uses napi_gro_frags() and head is merged (which in my case happens after the skb_condense() call added by the commit mentioned above), the skb is reused including the head that has been merged. As a result, we release the page reference twice and eventually end up with negative page refcount. To fix the problem, handle NAPI_GRO_FREE_STOLEN_HEAD in napi_frags_finish() the same way it's done in napi_skb_finish(). Fixes: d7e8883cfcf4 ("net: make GRO aware of skb->head_frag") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c8f0385ea492..dc5d3d546150 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4375,6 +4375,12 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); +static void napi_skb_free_stolen_head(struct sk_buff *skb) +{ + skb_dst_drop(skb); + kmem_cache_free(skbuff_head_cache, skb); +} + static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { @@ -4388,12 +4394,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { - skb_dst_drop(skb); - kmem_cache_free(skbuff_head_cache, skb); - } else { + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else __kfree_skb(skb); - } break; case GRO_HELD: @@ -4459,10 +4463,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_DROP: - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else + napi_reuse_skb(napi, skb); + break; + case GRO_MERGED: break; } From 0c32b01ee49d4784ac85fb4a4ee903ac86e7d183 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 29 Jun 2017 16:56:54 +0200 Subject: [PATCH 126/176] ipv6: dad: don't remove dynamic addresses if link is down commit ec8add2a4c9df723c94a863b8fcd6d93c472deed upstream. Currently, when the link for $DEV is down, this command succeeds but the address is removed immediately by DAD (1): ip addr add 1111::12/64 dev $DEV valid_lft 3600 preferred_lft 1800 In the same situation, this will succeed and not remove the address (2): ip addr add 1111::12/64 dev $DEV ip addr change 1111::12/64 dev $DEV valid_lft 3600 preferred_lft 1800 The comment in addrconf_dad_begin() when !IF_READY makes it look like this is the intended behavior, but doesn't explain why: * If the device is not ready: * - keep it tentative if it is a permanent address. * - otherwise, kill it. We clearly cannot prevent userspace from doing (2), but we can make (1) work consistently with (2). addrconf_dad_stop() is only called in two cases: if DAD failed, or to skip DAD when the link is down. In that second case, the fix is to avoid deleting the address, like we already do for permanent addresses. Fixes: 3c21edbd1137 ("[IPV6]: Defer IPv6 device initialization until the link becomes ready.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 56627f0f4088..735b22b1b4ea 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1772,17 +1772,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { - if (ifp->flags&IFA_F_PERMANENT) { - spin_lock_bh(&ifp->lock); - addrconf_del_dad_work(ifp); - ifp->flags |= IFA_F_TENTATIVE; - if (dad_failed) - ifp->flags |= IFA_F_DADFAILED; - spin_unlock_bh(&ifp->lock); - if (dad_failed) - ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); - } else if (ifp->flags&IFA_F_TEMPORARY) { + if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); ifpub = ifp->ifpub; @@ -1795,6 +1785,16 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); + } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) { + spin_lock_bh(&ifp->lock); + addrconf_del_dad_work(ifp); + ifp->flags |= IFA_F_TENTATIVE; + if (dad_failed) + ifp->flags |= IFA_F_DADFAILED; + spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); } else { ipv6_del_addr(ifp); } From eb7bef1db6f31cdba61a0d8794839fab57da5ed8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 5 Jul 2017 14:41:46 -0600 Subject: [PATCH 127/176] net: ipv6: Compare lwstate in detecting duplicate nexthops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f06b7549b79e29a672336d4e134524373fb7a232 upstream. Lennert reported a failure to add different mpls encaps in a multipath route: $ ip -6 route add 1234::/16 \ nexthop encap mpls 10 via fe80::1 dev ens3 \ nexthop encap mpls 20 via fe80::1 dev ens3 RTNETLINK answers: File exists The problem is that the duplicate nexthop detection does not compare lwtunnel configuration. Add it. Fixes: 19e42e451506 ("ipv6: support for fib route lwtunnel encap attributes") Signed-off-by: David Ahern Reported-by: João Taveira Araújo Reported-by: Lennert Buytenhek Acked-by: Roopa Prabhu Tested-by: Lennert Buytenhek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_route.h | 8 ++++++++ net/ipv6/ip6_fib.c | 5 +---- net/ipv6/route.c | 8 +------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6275d651f76e..b8a8d4239e85 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -21,6 +21,7 @@ struct route_info { #include #include #include +#include #include #include #include @@ -208,4 +209,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } +static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +{ + return a->dst.dev == b->dst.dev && + a->rt6i_idev == b->rt6i_idev && + ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && + !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); +} #endif diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1ac06723f0d7..f60e8caea767 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -767,10 +767,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, goto next_iter; } - if (iter->dst.dev == rt->dst.dev && - iter->rt6i_idev == rt->rt6i_idev && - ipv6_addr_equal(&iter->rt6i_gateway, - &rt->rt6i_gateway)) { + if (rt6_duplicate_nexthop(iter, rt)) { if (rt->rt6i_nsiblings) rt->rt6i_nsiblings = 0; if (!(iter->rt6i_flags & RTF_EXPIRES)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f4177a1d4f5..ef335070e98a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2833,17 +2833,11 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, struct rt6_info *rt, struct fib6_config *r_cfg) { struct rt6_nh *nh; - struct rt6_info *rtnh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { /* check if rt6_info already exists */ - rtnh = nh->rt6_info; - - if (rtnh->dst.dev == rt->dst.dev && - rtnh->rt6i_idev == rt->rt6i_idev && - ipv6_addr_equal(&rtnh->rt6i_gateway, - &rt->rt6i_gateway)) + if (rt6_duplicate_nexthop(nh->rt6_info, rt)) return err; } From 89e7f17f8f64763394c164ffe218c599cda4ab70 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 6 Jul 2017 15:24:40 +0300 Subject: [PATCH 128/176] vrf: fix bug_on triggered by rx when destroying a vrf commit f630c38ef0d785101363a8992bbd4f302180f86f upstream. When destroying a VRF device we cleanup the slaves in its ndo_uninit() function, but that causes packets to be switched (skb->dev == vrf being destroyed) even though we're pass the point where the VRF should be receiving any packets while it is being dismantled. This causes a BUG_ON to trigger if we have raw sockets (trace below). The reason is that the inetdev of the VRF has been destroyed but we're still sending packets up the stack with it, so let's free the slaves in the dellink callback as David Ahern suggested. Note that this fix doesn't prevent packets from going up when the VRF device is admin down. [ 35.631371] ------------[ cut here ]------------ [ 35.631603] kernel BUG at net/ipv4/fib_frontend.c:285! [ 35.631854] invalid opcode: 0000 [#1] SMP [ 35.631977] Modules linked in: [ 35.632081] CPU: 2 PID: 22 Comm: ksoftirqd/2 Not tainted 4.12.0-rc7+ #45 [ 35.632247] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 35.632477] task: ffff88005ad68000 task.stack: ffff88005ad64000 [ 35.632632] RIP: 0010:fib_compute_spec_dst+0xfc/0x1ee [ 35.632769] RSP: 0018:ffff88005ad67978 EFLAGS: 00010202 [ 35.632910] RAX: 0000000000000001 RBX: ffff880059a7f200 RCX: 0000000000000000 [ 35.633084] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff82274af0 [ 35.633256] RBP: ffff88005ad679f8 R08: 000000000001ef70 R09: 0000000000000046 [ 35.633430] R10: ffff88005ad679f8 R11: ffff880037731cb0 R12: 0000000000000001 [ 35.633603] R13: ffff8800599e3000 R14: 0000000000000000 R15: ffff8800599cb852 [ 35.634114] FS: 0000000000000000(0000) GS:ffff88005d900000(0000) knlGS:0000000000000000 [ 35.634306] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.634456] CR2: 00007f3563227095 CR3: 000000000201d000 CR4: 00000000000406e0 [ 35.634632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 35.634865] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 35.635055] Call Trace: [ 35.635271] ? __lock_acquire+0xf0d/0x1117 [ 35.635522] ipv4_pktinfo_prepare+0x82/0x151 [ 35.635831] raw_rcv_skb+0x17/0x3c [ 35.636062] raw_rcv+0xe5/0xf7 [ 35.636287] raw_local_deliver+0x169/0x1d9 [ 35.636534] ip_local_deliver_finish+0x87/0x1c4 [ 35.636820] ip_local_deliver+0x63/0x7f [ 35.637058] ip_rcv_finish+0x340/0x3a1 [ 35.637295] ip_rcv+0x314/0x34a [ 35.637525] __netif_receive_skb_core+0x49f/0x7c5 [ 35.637780] ? lock_acquire+0x13f/0x1d7 [ 35.638018] ? lock_acquire+0x15e/0x1d7 [ 35.638259] __netif_receive_skb+0x1e/0x94 [ 35.638502] ? __netif_receive_skb+0x1e/0x94 [ 35.638748] netif_receive_skb_internal+0x74/0x300 [ 35.639002] ? dev_gro_receive+0x2ed/0x411 [ 35.639246] ? lock_is_held_type+0xc4/0xd2 [ 35.639491] napi_gro_receive+0x105/0x1a0 [ 35.639736] receive_buf+0xc32/0xc74 [ 35.639965] ? detach_buf+0x67/0x153 [ 35.640201] ? virtqueue_get_buf_ctx+0x120/0x176 [ 35.640453] virtnet_poll+0x128/0x1c5 [ 35.640690] net_rx_action+0x103/0x343 [ 35.640932] __do_softirq+0x1c7/0x4b7 [ 35.641171] run_ksoftirqd+0x23/0x5c [ 35.641403] smpboot_thread_fn+0x24f/0x26d [ 35.641646] ? sort_range+0x22/0x22 [ 35.641878] kthread+0x129/0x131 [ 35.642104] ? __list_add+0x31/0x31 [ 35.642335] ? __list_add+0x31/0x31 [ 35.642568] ret_from_fork+0x2a/0x40 [ 35.642804] Code: 05 bd 87 a3 00 01 e8 1f ef 98 ff 4d 85 f6 48 c7 c7 f0 4a 27 82 41 0f 94 c4 31 c9 31 d2 41 0f b6 f4 e8 04 71 a1 ff 45 84 e4 74 02 <0f> 0b 0f b7 93 c4 00 00 00 4d 8b a5 80 05 00 00 48 03 93 d0 00 [ 35.644342] RIP: fib_compute_spec_dst+0xfc/0x1ee RSP: ffff88005ad67978 Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Reported-by: Chris Cormier Signed-off-by: Nikolay Aleksandrov Acked-by: David Ahern Signed-off-by: David S. Miller [backport to 4.4 - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 349aecbc210a..ac945f8781ac 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -733,15 +733,15 @@ static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) static void vrf_dev_uninit(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - struct slave_queue *queue = &vrf->queue; - struct list_head *head = &queue->all_slaves; - struct slave *slave, *next; +// struct slave_queue *queue = &vrf->queue; +// struct list_head *head = &queue->all_slaves; +// struct slave *slave, *next; vrf_rtable_destroy(vrf); vrf_rt6_destroy(vrf); - list_for_each_entry_safe(slave, next, head, list) - vrf_del_slave(dev, slave->dev); +// list_for_each_entry_safe(slave, next, head, list) +// vrf_del_slave(dev, slave->dev); free_percpu(dev->dstats); dev->dstats = NULL; @@ -914,6 +914,14 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[]) static void vrf_dellink(struct net_device *dev, struct list_head *head) { + struct net_vrf *vrf = netdev_priv(dev); + struct slave_queue *queue = &vrf->queue; + struct list_head *all_slaves = &queue->all_slaves; + struct slave *slave, *next; + + list_for_each_entry_safe(slave, next, all_slaves, list) + vrf_del_slave(dev, slave->dev); + unregister_netdevice_queue(dev, head); } From 9618eb4af306e7f21330a6acf22cbec037d17f22 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 6 Jul 2017 08:15:06 -0700 Subject: [PATCH 129/176] rds: tcp: use sock_create_lite() to create the accept socket commit 0933a578cd55b02dc80f219dc8f2efb17ec61c9a upstream. There are two problems with calling sock_create_kern() from rds_tcp_accept_one() 1. it sets up a new_sock->sk that is wasteful, because this ->sk is going to get replaced by inet_accept() in the subsequent ->accept() 2. The new_sock->sk is a leaked reference in sock_graft() which expects to find a null parent->sk Avoid these problems by calling sock_create_lite(). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/tcp_listen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0936a4a32b47..e353e3255206 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp; - ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, + ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); if (ret) From 4c7021c2fb74047649c03845ce6fd13626a5a418 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 7 Jul 2017 21:09:06 +0100 Subject: [PATCH 130/176] brcmfmac: fix possible buffer overflow in brcmf_cfg80211_mgmt_tx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8f44c9a41386729fea410e688959ddaa9d51be7c upstream. The lower level nl80211 code in cfg80211 ensures that "len" is between 25 and NL80211_ATTR_FRAME (2304). We subtract DOT11_MGMT_HDR_LEN (24) from "len" so thats's max of 2280. However, the action_frame->data[] buffer is only BRCMF_FIL_ACTION_FRAME_SIZE (1800) bytes long so this memcpy() can overflow. memcpy(action_frame->data, &buf[DOT11_MGMT_HDR_LEN], le16_to_cpu(action_frame->len)); Fixes: 18e2f61db3b70 ("brcmfmac: P2P action frame tx.") Reported-by: "freenerguo(郭大兴)" Signed-off-by: Arend van Spriel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c index 70a6985334d5..da5826d788d6 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c @@ -4472,6 +4472,11 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true, GFP_KERNEL); } else if (ieee80211_is_action(mgmt->frame_control)) { + if (len > BRCMF_FIL_ACTION_FRAME_SIZE + DOT11_MGMT_HDR_LEN) { + brcmf_err("invalid action frame length\n"); + err = -EINVAL; + goto exit; + } af_params = kzalloc(sizeof(*af_params), GFP_KERNEL); if (af_params == NULL) { brcmf_err("unable to allocate frame\n"); From 05bf0b6ef9ce7e8967c96fd419ad0ee5d7fe5418 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 7 Jul 2017 01:43:41 +0300 Subject: [PATCH 131/176] cfg80211: Define nla_policy for NL80211_ATTR_LOCAL_MESH_POWER_MODE commit 8feb69c7bd89513be80eb19198d48f154b254021 upstream. Buffer overread may happen as nl80211_set_station() reads 4 bytes from the attribute NL80211_ATTR_LOCAL_MESH_POWER_MODE without validating the size of data received when userspace sends less than 4 bytes of data with NL80211_ATTR_LOCAL_MESH_POWER_MODE. Define nla_policy for NL80211_ATTR_LOCAL_MESH_POWER_MODE to avoid the buffer overread. Fixes: 3b1c5a5307f ("{cfg,nl}80211: mesh power mode primitives and userspace access") Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9d0953e5734f..c1dec30a226b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -359,6 +359,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 }, [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, From 24d0410712b06e102d330c6ccb817e56b3c85ec9 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 7 Jul 2017 01:43:42 +0300 Subject: [PATCH 132/176] cfg80211: Validate frequencies nested in NL80211_ATTR_SCAN_FREQUENCIES commit d7f13f7450369281a5d0ea463cc69890a15923ae upstream. validate_scan_freqs() retrieves frequencies from attributes nested in the attribute NL80211_ATTR_SCAN_FREQUENCIES with nla_get_u32(), which reads 4 bytes from each attribute without validating the size of data received. Attributes nested in NL80211_ATTR_SCAN_FREQUENCIES don't have an nla policy. Validate size of each attribute before parsing to avoid potential buffer overread. Fixes: 2a519311926 ("cfg80211/nl80211: scanning (and mac80211 update to use it)") Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c1dec30a226b..793291cabb44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5706,6 +5706,10 @@ static int validate_scan_freqs(struct nlattr *freqs) struct nlattr *attr1, *attr2; int n_channels = 0, tmp1, tmp2; + nla_for_each_nested(attr1, freqs, tmp1) + if (nla_len(attr1) != sizeof(u32)) + return 0; + nla_for_each_nested(attr1, freqs, tmp1) { n_channels++; /* From 2d3c10e215713f242e0e0d67a38a2deb00e92a1b Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 7 Jul 2017 01:43:39 +0300 Subject: [PATCH 133/176] cfg80211: Check if PMKID attribute is of expected size commit 9361df14d1cbf966409d5d6f48bb334384fbe138 upstream. nla policy checks for only maximum length of the attribute data when the attribute type is NLA_BINARY. If userspace sends less data than specified, the wireless drivers may access illegal memory. When type is NLA_UNSPEC, nla policy check ensures that userspace sends minimum specified length number of bytes. Remove type assignment to NLA_BINARY from nla_policy of NL80211_ATTR_PMKID to make this NLA_UNSPEC and to make sure minimum WLAN_PMKID_LEN bytes are received from userspace with NL80211_ATTR_PMKID. Fixes: 67fbb16be69d ("nl80211: PMKSA caching support") Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 793291cabb44..de10e3c0e2a4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -302,8 +302,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, - [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, - .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN }, [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, From 970616464f43b33e309996dc58bbf49a8a29f5e4 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 30 Jun 2017 10:58:28 +0100 Subject: [PATCH 134/176] irqchip/gic-v3: Fix out-of-bound access in gic_set_affinity commit 866d7c1b0a3c70387646c4e455e727a58c5d465a upstream. The GICv3 driver doesn't check if the target CPU for gic_set_affinity is valid before going ahead and making the changes. This triggers the following splat with KASAN: [ 141.189434] BUG: KASAN: global-out-of-bounds in gic_set_affinity+0x8c/0x140 [ 141.189704] Read of size 8 at addr ffff200009741d20 by task swapper/1/0 [ 141.189958] [ 141.190158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.12.0-rc7 [ 141.190458] Hardware name: Foundation-v8A (DT) [ 141.190658] Call trace: [ 141.190908] [] dump_backtrace+0x0/0x328 [ 141.191224] [] show_stack+0x14/0x20 [ 141.191507] [] dump_stack+0xa4/0xc8 [ 141.191858] [] print_address_description+0x13c/0x250 [ 141.192219] [] kasan_report+0x210/0x300 [ 141.192547] [] __asan_load8+0x84/0x98 [ 141.192874] [] gic_set_affinity+0x8c/0x140 [ 141.193158] [] irq_do_set_affinity+0x54/0xb8 [ 141.193473] [] irq_set_affinity_locked+0x64/0xf0 [ 141.193828] [] __irq_set_affinity+0x48/0x78 [ 141.194158] [] arm_perf_starting_cpu+0x104/0x150 [ 141.194513] [] cpuhp_invoke_callback+0x17c/0x1f8 [ 141.194783] [] notify_cpu_starting+0x8c/0xb8 [ 141.195130] [] secondary_start_kernel+0x15c/0x200 [ 141.195390] [<0000000080db81b4>] 0x80db81b4 [ 141.195603] [ 141.195685] The buggy address belongs to the variable: [ 141.196012] __cpu_logical_map+0x200/0x220 [ 141.196176] [ 141.196315] Memory state around the buggy address: [ 141.196586] ffff200009741c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 141.196913] ffff200009741c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 141.197158] >ffff200009741d00: 00 00 00 00 fa fa fa fa 00 00 00 00 00 00 00 00 [ 141.197487] ^ [ 141.197758] ffff200009741d80: 00 00 00 00 00 00 00 00 fa fa fa fa 00 00 00 00 [ 141.198060] ffff200009741e00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 141.198358] ================================================================== [ 141.198609] Disabling lock debugging due to kernel taint [ 141.198961] CPU1: Booted secondary processor [410fd051] This patch adds the check to make sure the cpu is valid. Fixes: commit 021f653791ad17e03f98 ("irqchip: gic-v3: Initial support for GICv3") Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index e33c729b9f48..5a1490b046ac 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -632,6 +632,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, int enabled; u64 val; + if (cpu >= nr_cpu_ids) + return -EINVAL; + if (gic_irq_in_rdist(d)) return -EINVAL; From e18ca17b9040bedd3768a257d46537471eff45a1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Jul 2017 22:00:41 +0200 Subject: [PATCH 135/176] parisc: Report SIGSEGV instead of SIGBUS when running out of stack commit 247462316f85a9e0479445c1a4223950b68ffac1 upstream. When a process runs out of stack the parisc kernel wrongly faults with SIGBUS instead of the expected SIGSEGV signal. This example shows how the kernel faults: do_page_fault() command='a.out' type=15 address=0xfaac2000 in libc-2.24.so[f8308000+16c000] trap #15: Data TLB miss fault, vm_start = 0xfa2c2000, vm_end = 0xfaac2000 The vma->vm_end value is the first address which does not belong to the vma, so adjust the check to include vma->vm_end to the range for which to send the SIGSEGV signal. This patch unbreaks building the debian libsigsegv package. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f9064449908a..d8c2f3bcfc18 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -303,7 +303,7 @@ bad_area: case 15: /* Data TLB miss fault/Data page fault */ /* send SIGSEGV when outside of vma */ if (!vma || - address < vma->vm_start || address > vma->vm_end) { + address < vma->vm_start || address >= vma->vm_end) { si.si_signo = SIGSEGV; si.si_code = SEGV_MAPERR; break; From f265641dc874b01f816fadd70f175ee8de806266 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 Jun 2017 23:18:30 -0700 Subject: [PATCH 136/176] parisc: use compat_sys_keyctl() commit b0f94efd5aa8daa8a07d7601714c2573266cd4c9 upstream. Architectures with a compat syscall table must put compat_sys_keyctl() in it, not sys_keyctl(). The parisc architecture was not doing this; fix it. Signed-off-by: Eric Biggers Acked-by: Helge Deller Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/syscall_table.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d4ffcfbc9885..041e1f9ec129 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -361,7 +361,7 @@ ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ - ENTRY_SAME(keyctl) + ENTRY_COMP(keyctl) ENTRY_SAME(ioprio_set) ENTRY_SAME(ioprio_get) ENTRY_SAME(inotify_init) From 635a58225c360e293154a05fc66843dc686989fe Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 3 Jul 2017 10:38:05 +0200 Subject: [PATCH 137/176] parisc: DMA API: return error instead of BUG_ON for dma ops on non dma devs commit 33f9e02495d15a061f0c94ef46f5103a2d0c20f3 upstream. Enabling parport pc driver on a B2600 (and probably other 64bit PARISC systems) produced following BUG: CPU: 0 PID: 1 Comm: swapper Not tainted 4.12.0-rc5-30198-g1132d5e #156 task: 000000009e050000 task.stack: 000000009e04c000 YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI PSW: 00001000000001101111111100001111 Not tainted r00-03 000000ff0806ff0f 000000009e04c990 0000000040871b78 000000009e04cac0 r04-07 0000000040c14de0 ffffffffffffffff 000000009e07f098 000000009d82d200 r08-11 000000009d82d210 0000000000000378 0000000000000000 0000000040c345e0 r12-15 0000000000000005 0000000040c345e0 0000000000000000 0000000040c9d5e0 r16-19 0000000040c345e0 00000000f00001c4 00000000f00001bc 0000000000000061 r20-23 000000009e04ce28 0000000000000010 0000000000000010 0000000040b89e40 r24-27 0000000000000003 0000000000ffffff 000000009d82d210 0000000040c14de0 r28-31 0000000000000000 000000009e04ca90 000000009e04cb40 0000000000000000 sr00-03 0000000000000000 0000000000000000 0000000000000000 0000000000000000 sr04-07 0000000000000000 0000000000000000 0000000000000000 0000000000000000 IASQ: 0000000000000000 0000000000000000 IAOQ: 00000000404aece0 00000000404aece4 IIR: 03ffe01f ISR: 0000000010340000 IOR: 000001781304cac8 CPU: 0 CR30: 000000009e04c000 CR31: 00000000e2976de2 ORIG_R28: 0000000000000200 IAOQ[0]: sba_dma_supported+0x80/0xd0 IAOQ[1]: sba_dma_supported+0x84/0xd0 RP(r2): parport_pc_probe_port+0x178/0x1200 Cause is a call to dma_coerce_mask_and_coherenet in parport_pc_probe_port, which PARISC DMA API doesn't handle very nicely. This commit gives back DMA_ERROR_CODE for DMA API calls, if device isn't capable of DMA transaction. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/dma-mapping.h | 11 +++++++---- drivers/parisc/ccio-dma.c | 12 ++++++++++++ drivers/parisc/dino.c | 5 ++++- drivers/parisc/lba_pci.c | 6 ++++-- drivers/parisc/sba_iommu.c | 14 ++++++++++++++ 5 files changed, 41 insertions(+), 7 deletions(-) diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index d8d60a57183f..f53725202955 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -39,6 +39,8 @@ struct hppa_dma_ops { ** flush/purge and allocate "regular" cacheable pages for everything. */ +#define DMA_ERROR_CODE (~(dma_addr_t)0) + #ifdef CONFIG_PA11 extern struct hppa_dma_ops pcxl_dma_ops; extern struct hppa_dma_ops pcx_dma_ops; @@ -209,12 +211,13 @@ parisc_walk_tree(struct device *dev) break; } } - BUG_ON(!dev->platform_data); return dev->platform_data; } - -#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu) - + +#define GET_IOC(dev) ({ \ + void *__pdata = parisc_walk_tree(dev); \ + __pdata ? HBA_DATA(__pdata)->iommu : NULL; \ +}) #ifdef CONFIG_IOMMU_CCIO struct parisc_device; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 8e11fb2831cd..34f1d6b41fb9 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -741,6 +741,8 @@ ccio_map_single(struct device *dev, void *addr, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) + return DMA_ERROR_CODE; BUG_ON(size <= 0); @@ -805,6 +807,10 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long)iova, size); @@ -908,6 +914,8 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) + return 0; DBG_RUN_SG("%s() START %d entries\n", __func__, nents); @@ -980,6 +988,10 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, BUG_ON(!dev); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } DBG_RUN_SG("%s() START %d entries, %p,%x\n", __func__, nents, sg_virt(sglist), sglist->length); diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index a0580afe1713..7b0ca1551d7b 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -154,7 +154,10 @@ struct dino_device }; /* Looks nice and keeps the compiler happy */ -#define DINO_DEV(d) ((struct dino_device *) d) +#define DINO_DEV(d) ({ \ + void *__pdata = d; \ + BUG_ON(!__pdata); \ + (struct dino_device *)__pdata; }) /* diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 42844c2bc065..d0c2759076a2 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -111,8 +111,10 @@ static u32 lba_t32; /* Looks nice and keeps the compiler happy */ -#define LBA_DEV(d) ((struct lba_device *) (d)) - +#define LBA_DEV(d) ({ \ + void *__pdata = d; \ + BUG_ON(!__pdata); \ + (struct lba_device *)__pdata; }) /* ** Only allow 8 subsidiary busses per LBA diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 225049b492e5..d6326144ce01 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -691,6 +691,8 @@ static int sba_dma_supported( struct device *dev, u64 mask) return 0; ioc = GET_IOC(dev); + if (!ioc) + return 0; /* * check if mask is >= than the current max IO Virt Address @@ -722,6 +724,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int pide; ioc = GET_IOC(dev); + if (!ioc) + return DMA_ERROR_CODE; /* save offset bits */ offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; @@ -803,6 +807,10 @@ sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } offset = iova & ~IOVP_MASK; iova ^= offset; /* clear offset bits */ size += offset; @@ -942,6 +950,8 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() START %d entries\n", __func__, nents); ioc = GET_IOC(dev); + if (!ioc) + return 0; /* Fast path single entry scatterlists. */ if (nents == 1) { @@ -1027,6 +1037,10 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, __func__, nents, sg_virt(sglist), sglist->length); ioc = GET_IOC(dev); + if (!ioc) { + WARN_ON(!ioc); + return; + } #ifdef SBA_COLLECT_STATS ioc->usg_calls++; From b29145746576a638f141e16f9438f0e98a8e3ea0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 29 May 2017 17:14:16 +0200 Subject: [PATCH 138/176] parisc/mm: Ensure IRQs are off in switch_mm() commit 649aa24254e85bf6bd7807dd372d083707852b1f upstream. This is because of commit f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler") in which switch_mm_irqs_off() is called by the scheduler, vs switch_mm() which is used by use_mm(). This patch lets the parisc code mirror the x86 and powerpc code, ie. it disables interrupts in switch_mm(), and optimises the scheduler case by defining switch_mm_irqs_off(). Signed-off-by: Helge Deller Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/mmu_context.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 59be25764433..a81226257878 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -49,15 +49,26 @@ static inline void load_context(mm_context_t context) mtctl(__space_to_prot(context), 8); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { - if (prev != next) { mtctl(__pa(next->pgd), 25); load_context(next->context); } } +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} +#define switch_mm_irqs_off switch_mm_irqs_off + #define deactivate_mm(tsk,mm) do { } while (0) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) From 0d6ee0bea866a40c537a0d2277e0bf12430e1334 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 25 May 2017 12:58:33 +0000 Subject: [PATCH 139/176] tools/lib/lockdep: Reduce MAX_LOCK_DEPTH to avoid overflowing lock_chain/: Depth commit 98dcea0cfd04e083ac74137ceb9a632604740e2d upstream. liblockdep has been broken since commit 75dd602a5198 ("lockdep: Fix lock_chain::base size"), as that adds a check that MAX_LOCK_DEPTH is within the range of lock_chain::depth and in liblockdep it is much too large. That should have resulted in a compiler error, but didn't because: - the check uses ARRAY_SIZE(), which isn't yet defined in liblockdep so is assumed to be an (undeclared) function - putting a function call inside a BUILD_BUG_ON() expression quietly turns it into some nonsense involving a variable-length array It did produce a compiler warning, but I didn't notice because liblockdep already produces too many warnings if -Wall is enabled (which I'll fix shortly). Even before that commit, which reduced lock_chain::depth from 8 bits to 6, MAX_LOCK_DEPTH was too large. Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Link: http://lkml.kernel.org/r/20170525130005.5947-3-alexander.levin@verizon.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/lib/lockdep/uinclude/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h index c808c7d02d21..e69118b2077e 100644 --- a/tools/lib/lockdep/uinclude/linux/lockdep.h +++ b/tools/lib/lockdep/uinclude/linux/lockdep.h @@ -8,7 +8,7 @@ #include #include -#define MAX_LOCK_DEPTH 2000UL +#define MAX_LOCK_DEPTH 255UL #define asmlinkage #define __visible From 717ce69e47f0d77571b5a564784d16c1d920ee9b Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 6 Jul 2017 15:35:31 -0700 Subject: [PATCH 140/176] kernel/extable.c: mark core_kernel_text notrace commit c0d80ddab89916273cb97114889d3f337bc370ae upstream. core_kernel_text is used by MIPS in its function graph trace processing, so having this method traced leads to an infinite set of recursive calls such as: Call Trace: ftrace_return_to_handler+0x50/0x128 core_kernel_text+0x10/0x1b8 prepare_ftrace_return+0x6c/0x114 ftrace_graph_caller+0x20/0x44 return_to_handler+0x10/0x30 return_to_handler+0x0/0x30 return_to_handler+0x0/0x30 ftrace_ops_no_ops+0x114/0x1bc core_kernel_text+0x10/0x1b8 core_kernel_text+0x10/0x1b8 core_kernel_text+0x10/0x1b8 ftrace_ops_no_ops+0x114/0x1bc core_kernel_text+0x10/0x1b8 prepare_ftrace_return+0x6c/0x114 ftrace_graph_caller+0x20/0x44 (...) Mark the function notrace to avoid it being traced. Link: http://lkml.kernel.org/r/1498028607-6765-1-git-send-email-marcin.nowakowski@imgtec.com Signed-off-by: Marcin Nowakowski Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Meyer Cc: Ingo Molnar Cc: Steven Rostedt Cc: Daniel Borkmann Cc: Paul Gortmaker Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/extable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/extable.c b/kernel/extable.c index e820ccee9846..4f06fc34313f 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,7 +66,7 @@ static inline int init_kernel_text(unsigned long addr) return 0; } -int core_kernel_text(unsigned long addr) +int notrace core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr < (unsigned long)_etext) From 2d0db02d2e8f45e215fd2e646820c669c6e70159 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 10 Jul 2017 15:49:57 -0700 Subject: [PATCH 141/176] mm/list_lru.c: fix list_lru_count_node() to be race free commit 2c80cd57c74339889a8752b20862a16c28929c3a upstream. list_lru_count_node() iterates over all memcgs to get the total number of entries on the node but it can race with memcg_drain_all_list_lrus(), which migrates the entries from a dead cgroup to another. This can return incorrect number of entries from list_lru_count_node(). Fix this by keeping track of entries per node and simply return it in list_lru_count_node(). Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org Signed-off-by: Sahitya Tummala Acked-by: Vladimir Davydov Cc: Jan Kara Cc: Alexander Polakov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/list_lru.h | 1 + mm/list_lru.c | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 2a6b9947aaa3..743b34f56f2b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -44,6 +44,7 @@ struct list_lru_node { /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg *memcg_lrus; #endif + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { diff --git a/mm/list_lru.c b/mm/list_lru.c index 5d8dffd5b57c..786176b1a0ee 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) l = list_lru_from_kmem(nlru, item); list_add_tail(item, &l->list); l->nr_items++; + nlru->nr_items++; spin_unlock(&nlru->lock); return true; } @@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) l = list_lru_from_kmem(nlru, item); list_del_init(item); l->nr_items--; + nlru->nr_items--; spin_unlock(&nlru->lock); return true; } @@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one); unsigned long list_lru_count_node(struct list_lru *lru, int nid) { - long count = 0; - int memcg_idx; + struct list_lru_node *nlru; - count += __list_lru_count_one(lru, nid, -1); - if (list_lru_memcg_aware(lru)) { - for_each_memcg_cache_index(memcg_idx) - count += __list_lru_count_one(lru, nid, memcg_idx); - } - return count; + nlru = &lru->node[nid]; + return nlru->nr_items; } EXPORT_SYMBOL_GPL(list_lru_count_node); @@ -226,6 +223,7 @@ restart: assert_spin_locked(&nlru->lock); case LRU_REMOVED: isolated++; + nlru->nr_items--; /* * If the lru lock has been dropped, our list * traversal is now invalid and so we have to From 68b0f5d85b3a25ee452424e18fe06b93da8dd4d2 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 10 Jul 2017 15:50:00 -0700 Subject: [PATCH 142/176] fs/dcache.c: fix spin lockup issue on nlru->lock commit b17c070fb624cf10162cf92ea5e1ec25cd8ac176 upstream. __list_lru_walk_one() acquires nlru spin lock (nlru->lock) for longer duration if there are more number of items in the lru list. As per the current code, it can hold the spin lock for upto maximum UINT_MAX entries at a time. So if there are more number of items in the lru list, then "BUG: spinlock lockup suspected" is observed in the below path: spin_bug+0x90 do_raw_spin_lock+0xfc _raw_spin_lock+0x28 list_lru_add+0x28 dput+0x1c8 path_put+0x20 terminate_walk+0x3c path_lookupat+0x100 filename_lookup+0x6c user_path_at_empty+0x54 SyS_faccessat+0xd0 el0_svc_naked+0x24 This nlru->lock is acquired by another CPU in this path - d_lru_shrink_move+0x34 dentry_lru_isolate_shrink+0x48 __list_lru_walk_one.isra.10+0x94 list_lru_walk_node+0x40 shrink_dcache_sb+0x60 do_remount_sb+0xbc do_emergency_remount+0xb0 process_one_work+0x228 worker_thread+0x2e0 kthread+0xf4 ret_from_fork+0x10 Fix this lockup by reducing the number of entries to be shrinked from the lru list to 1024 at once. Also, add cond_resched() before processing the lru list again. Link: http://marc.info/?t=149722864900001&r=1&w=2 Link: http://lkml.kernel.org/r/1498707575-2472-1-git-send-email-stummala@codeaurora.org Signed-off-by: Sahitya Tummala Suggested-by: Jan Kara Suggested-by: Vladimir Davydov Acked-by: Vladimir Davydov Cc: Alexander Polakov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 849c1c1e787b..3000cbb54949 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1128,11 +1128,12 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(dispose); freed = list_lru_walk(&sb->s_dentry_lru, - dentry_lru_isolate_shrink, &dispose, UINT_MAX); + dentry_lru_isolate_shrink, &dispose, 1024); this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); - } while (freed > 0); + cond_resched(); + } while (list_lru_count(&sb->s_dentry_lru) > 0); } EXPORT_SYMBOL(shrink_dcache_sb); From 4544e9ebef4c2f2536d3c7ca6c6a22d0f9f13d2f Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 10 Jul 2017 15:52:21 -0700 Subject: [PATCH 143/176] checkpatch: silence perl 5.26.0 unescaped left brace warnings commit 8d81ae05d0176da1c54aeaed697fa34be5c5575e upstream. As of perl 5, version 26, subversion 0 (v5.26.0) some new warnings have occurred when running checkpatch. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ <-- HERE \s*/ at scripts/checkpatch.pl line 3544. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ <-- HERE \s*/ at scripts/checkpatch.pl line 3885. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(\+.*(?:do|\))){ <-- HERE / at scripts/checkpatch.pl line 4374. It seems perfectly reasonable to do as the warning suggests and simply escape the left brace in these three locations. Link: http://lkml.kernel.org/r/20170607060135.17384-1-cyrilbur@gmail.com Signed-off-by: Cyril Bur Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/checkpatch.pl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2b3c22808c3b..6ac6550d751c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3252,7 +3252,7 @@ sub process { $fixedline =~ s/\s*=\s*$/ = {/; fix_insert_line($fixlinenr, $fixedline); $fixedline = $line; - $fixedline =~ s/^(.\s*){\s*/$1/; + $fixedline =~ s/^(.\s*)\{\s*/$1/; fix_insert_line($fixlinenr, $fixedline); } } @@ -3602,7 +3602,7 @@ sub process { my $fixedline = rtrim($prevrawline) . " {"; fix_insert_line($fixlinenr, $fixedline); $fixedline = $rawline; - $fixedline =~ s/^(.\s*){\s*/$1\t/; + $fixedline =~ s/^(.\s*)\{\s*/$1\t/; if ($fixedline !~ /^\+\s*$/) { fix_insert_line($fixlinenr, $fixedline); } @@ -4091,7 +4091,7 @@ sub process { if (ERROR("SPACING", "space required before the open brace '{'\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/; + $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/; } } From 7eb968cd04d404e6c73cd82c1122f6e06ad2d1e8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:52:37 -0700 Subject: [PATCH 144/176] binfmt_elf: use ELF_ET_DYN_BASE only for PIE commit eab09532d40090698b05a07c1c87f39fdbc5fab5 upstream. The ELF_ET_DYN_BASE position was originally intended to keep loaders away from ET_EXEC binaries. (For example, running "/lib/ld-linux.so.2 /bin/cat" might cause the subsequent load of /bin/cat into where the loader had been loaded.) With the advent of PIE (ET_DYN binaries with an INTERP Program Header), ELF_ET_DYN_BASE continued to be used since the kernel was only looking at ET_DYN. However, since ELF_ET_DYN_BASE is traditionally set at the top 1/3rd of the TASK_SIZE, a substantial portion of the address space is unused. For 32-bit tasks when RLIMIT_STACK is set to RLIM_INFINITY, programs are loaded above the mmap region. This means they can be made to collide (CVE-2017-1000370) or nearly collide (CVE-2017-1000371) with pathological stack regions. Lowering ELF_ET_DYN_BASE solves both by moving programs below the mmap region in all cases, and will now additionally avoid programs falling back to the mmap region by enforcing MAP_FIXED for program loads (i.e. if it would have collided with the stack, now it will fail to load instead of falling back to the mmap region). To allow for a lower ELF_ET_DYN_BASE, loaders (ET_DYN without INTERP) are loaded into the mmap region, leaving space available for either an ET_EXEC binary with a fixed location or PIE being loaded into mmap by the loader. Only PIE programs are loaded offset from ELF_ET_DYN_BASE, which means architectures can now safely lower their values without risk of loaders colliding with their subsequently loaded programs. For 64-bit, ELF_ET_DYN_BASE is best set to 4GB to allow runtimes to use the entire 32-bit address space for 32-bit pointers. Thanks to PaX Team, Daniel Micay, and Rik van Riel for inspiration and suggestions on how to implement this solution. Fixes: d1fd836dcf00 ("mm: split ET_DYN ASLR from mmap ASLR") Link: http://lkml.kernel.org/r/20170621173201.GA114489@beast Signed-off-by: Kees Cook Acked-by: Rik van Riel Cc: Daniel Micay Cc: Qualys Security Advisory Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Cc: Dmitry Safonov Cc: Andy Lutomirski Cc: Grzegorz Andrejczuk Cc: Masahiro Yamada Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Heiko Carstens Cc: James Hogan Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Pratyush Anand Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/elf.h | 13 +++++---- fs/binfmt_elf.c | 59 ++++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index d262f985bbc8..07cf288b692e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -245,12 +245,13 @@ extern int force_personality32; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 29ef427c0652..f44e93d2650d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr; + /* + * If we are loading ET_EXEC or we have already performed + * the ET_DYN load_addr calculations, proceed normally. + */ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { elf_flags |= MAP_FIXED; } else if (loc->elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the - * default mmap base, as well as whatever program they - * might try to exec. This is because the brk will - * follow the loader, and is not movable. */ - load_bias = ELF_ET_DYN_BASE - vaddr; - if (current->flags & PF_RANDOMIZE) - load_bias += arch_mmap_rnd(); - load_bias = ELF_PAGESTART(load_bias); + /* + * This logic is run once for the first LOAD Program + * Header for ET_DYN binaries to calculate the + * randomization (load_bias) for all the LOAD + * Program Headers, and to calculate the entire + * size of the ELF mapping (total_size). (Note that + * load_addr_set is set to true later once the + * initial mapping is performed.) + * + * There are effectively two types of ET_DYN + * binaries: programs (i.e. PIE: ET_DYN with INTERP) + * and loaders (ET_DYN without INTERP, since they + * _are_ the ELF interpreter). The loaders must + * be loaded away from programs since the program + * may otherwise collide with the loader (especially + * for ET_EXEC which does not have a randomized + * position). For example to handle invocations of + * "./ld.so someprog" to test out a new version of + * the loader, the subsequent program that the + * loader loads must avoid the loader itself, so + * they cannot share the same load range. Sufficient + * room for the brk must be allocated with the + * loader as well, since brk must be available with + * the loader. + * + * Therefore, programs are loaded offset from + * ELF_ET_DYN_BASE and loaders are loaded into the + * independently randomized mmap region (0 load_bias + * without MAP_FIXED). + */ + if (elf_interpreter) { + load_bias = ELF_ET_DYN_BASE; + if (current->flags & PF_RANDOMIZE) + load_bias += arch_mmap_rnd(); + elf_flags |= MAP_FIXED; + } else + load_bias = 0; + + /* + * Since load_bias is used for all subsequent loading + * calculations, we must lower it by the first vaddr + * so that the remaining calculations based on the + * ELF vaddrs will be correctly offset. The result + * is then page aligned. + */ + load_bias = ELF_PAGESTART(load_bias - vaddr); + total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { From d2471b5e84f32de4e09b58f5436a4ce3ee935e32 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:52:40 -0700 Subject: [PATCH 145/176] arm: move ELF_ET_DYN_BASE to 4MB commit 6a9af90a3bcde217a1c053e135f5f43e5d5fafbd upstream. Now that explicitly executed loaders are loaded in the mmap region, we have more freedom to decide where we position PIE binaries in the address space to avoid possible collisions with mmap or stack regions. 4MB is chosen here mainly to have parity with x86, where this is the traditional minimum load location, likely to avoid historically requiring a 4MB page table entry when only a portion of the first 4MB would be used (since the NULL address is avoided). For ARM the position could be 0x8000, the standard ET_EXEC load address, but that is needlessly close to the NULL address, and anyone running PIE on 32-bit ARM will have an MMU, so the tight mapping is not needed. Link: http://lkml.kernel.org/r/1498154792-49952-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: James Hogan Cc: Pratyush Anand Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Cc: Andy Lutomirski Cc: Daniel Micay Cc: Dmitry Safonov Cc: Grzegorz Andrejczuk Cc: Kees Cook Cc: Masahiro Yamada Cc: Qualys Security Advisory Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/elf.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index d2315ffd8f12..f13ae153fb24 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* This is the base location for PIE (ET_DYN with INTERP) loads. */ +#define ELF_ET_DYN_BASE 0x400000UL /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we From 43cf90f788aca3fc66d3cf5b03827bafecd2de24 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:52:44 -0700 Subject: [PATCH 146/176] arm64: move ELF_ET_DYN_BASE to 4GB / 4MB commit 02445990a96e60a67526510d8b00f7e3d14101c3 upstream. Now that explicitly executed loaders are loaded in the mmap region, we have more freedom to decide where we position PIE binaries in the address space to avoid possible collisions with mmap or stack regions. For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit address space for 32-bit pointers. On 32-bit use 4MB, to match ARM. This could be 0x8000, the standard ET_EXEC load address, but that is needlessly close to the NULL address, and anyone running arm compat PIE will have an MMU, so the tight mapping is not needed. Link: http://lkml.kernel.org/r/1498251600-132458-4-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/elf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 44dd892a4bbe..9e11dbe1cec3 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -120,12 +120,11 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define ELF_EXEC_PAGESIZE PAGE_SIZE /* - * This is the location that an ET_DYN program is loaded if exec'ed. Typical - * use of this is to invoke "./ld.so someprog" to test out a new version of - * the loader. We need to make sure that it is out of the way of the program - * that it will "exec", and that there is sufficient room for the brk. + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#define ELF_ET_DYN_BASE 0x100000000UL /* * When the program starts, a1 contains a pointer to a function to be @@ -165,7 +164,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #ifdef CONFIG_COMPAT -#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) +/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 From 72a333a0468ed9c492c4da6b017aac9456695c7a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:52:47 -0700 Subject: [PATCH 147/176] powerpc: move ELF_ET_DYN_BASE to 4GB / 4MB commit 47ebb09d54856500c5a5e14824781902b3bb738e upstream. Now that explicitly executed loaders are loaded in the mmap region, we have more freedom to decide where we position PIE binaries in the address space to avoid possible collisions with mmap or stack regions. For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit address space for 32-bit pointers. On 32-bit use 4MB, which is the traditional x86 minimum load location, likely to avoid historically requiring a 4MB page table entry when only a portion of the first 4MB would be used (since the NULL address is avoided). Link: http://lkml.kernel.org/r/1498154792-49952-4-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Michael Ellerman Acked-by: Michael Ellerman Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: James Hogan Cc: Pratyush Anand Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/elf.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index ee46ffef608e..743ad7a400d6 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -23,12 +23,13 @@ #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE 0x20000000 +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_32bit_task() ? 0x000400000UL : \ + 0x100000000UL) #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) From 7888c0296c87a6c83bf2a0311c8e929f5f11832f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:52:51 -0700 Subject: [PATCH 148/176] s390: reduce ELF_ET_DYN_BASE commit a73dc5370e153ac63718d850bddf0c9aa9d871e6 upstream. Now that explicitly executed loaders are loaded in the mmap region, we have more freedom to decide where we position PIE binaries in the address space to avoid possible collisions with mmap or stack regions. For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit address space for 32-bit pointers. On 32-bit use 4MB, which is the traditional x86 minimum load location, likely to avoid historically requiring a 4MB page table entry when only a portion of the first 4MB would be used (since the NULL address is avoided). For s390 the position could be 0x10000, but that is needlessly close to the NULL address. Link: http://lkml.kernel.org/r/1498154792-49952-5-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: James Hogan Cc: Pratyush Anand Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/elf.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..b9eb7b1a49d2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -154,14 +154,13 @@ extern unsigned int vdso_enabled; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. 64-bit - tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_32bit_task() ? \ - (STACK_TOP / 3 * 2) : \ - (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ From 86949eb96466ec182303afc3c386bd70cc67b991 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 7 Jul 2017 11:57:29 -0700 Subject: [PATCH 149/176] exec: Limit arg stack to at most 75% of _STK_LIM commit da029c11e6b12f321f36dac8771e833b65cec962 upstream. To avoid pathological stack usage or the need to special-case setuid execs, just limit all arg stack usage to at most 75% of _STK_LIM (6MB). Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 02153068a694..9c5ee2a880aa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -206,8 +206,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; - unsigned long ptr_size; - struct rlimit *rlim; + unsigned long ptr_size, limit; /* * Since the stack will hold pointers to the strings, we @@ -236,14 +235,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, return page; /* - * Limit to 1/4-th the stack size for the argv+env strings. + * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM + * (whichever is smaller) for the argv+env strings. * This ensures that: * - the remaining binfmt code will not run out of stack space, * - the program will have a reasonable amount of stack left * to work from. */ - rlim = current->signal->rlim; - if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + limit = _STK_LIM / 4 * 3; + limit = min(limit, rlimit(RLIMIT_STACK) / 4); + if (size > limit) goto fail; } From 050b074e228523d239e47576606be3c0de5dfa00 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sat, 3 Jun 2017 09:35:06 +0200 Subject: [PATCH 150/176] vt: fix unchecked __put_user() in tioclinux ioctls commit 6987dc8a70976561d22450b5858fc9767788cc1c upstream. Only read access is checked before this call. Actually, at the moment this is not an issue, as every in-tree arch does the same manual checks for VERIFY_READ vs VERIFY_WRITE, relying on the MMU to tell them apart, but this wasn't the case in the past and may happen again on some odd arch in the future. If anyone cares about 3.7 and earlier, this is a security hole (untested) on real 80386 CPUs. Signed-off-by: Adam Borowski Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5ab54ef4f304..e4f69bddcfb1 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2708,13 +2708,13 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) * related to the kernel should not use this. */ data = vt_get_shift_state(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_GETMOUSEREPORTING: console_lock(); /* May be overkill */ data = mouse_reporting(); console_unlock(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETVESABLANK: console_lock(); @@ -2723,7 +2723,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) break; case TIOCL_GETKMSGREDIRECT: data = vt_get_kmsg_redirect(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETKMSGREDIRECT: if (!capable(CAP_SYS_ADMIN)) { From 7cbc3955ef3b5bda9336654f1b10a803202e7ed0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 15 May 2017 14:42:07 -0500 Subject: [PATCH 151/176] mnt: In umount propagation reparent in a separate pass commit 570487d3faf2a1d8a220e6ee10f472163123d7da upstream. It was observed that in some pathlogical cases that the current code does not unmount everything it should. After investigation it was determined that the issue is that mnt_change_mntpoint can can change which mounts are available to be unmounted during mount propagation which is wrong. The trivial reproducer is: $ cat ./pathological.sh mount -t tmpfs test-base /mnt cd /mnt mkdir 1 2 1/1 mount --bind 1 1 mount --make-shared 1 mount --bind 1 2 mount --bind 1/1 1/1 mount --bind 1/1 1/1 echo grep test-base /proc/self/mountinfo umount 1/1 echo grep test-base /proc/self/mountinfo $ unshare -Urm ./pathological.sh The expected output looks like: 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 The output without the fix looks like: 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 That last mount in the output was in the propgation tree to be unmounted but was missed because the mnt_change_mountpoint changed it's parent before the walk through the mount propagation tree observed it. Fixes: 1064f874abc0 ("mnt: Tuck mounts under others instead of creating shadow/side mounts.") Acked-by: Andrei Vagin Reviewed-by: Ram Pai Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/mount.h | 1 + fs/namespace.c | 1 + fs/pnode.c | 35 ++++++++++++++++++++++++++++++----- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 13a4ebbbaa74..2352231d4f0f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -57,6 +57,7 @@ struct mount { struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ + struct list_head mnt_reparent; /* reparent list entry */ #ifdef CONFIG_FSNOTIFY struct hlist_head mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; diff --git a/fs/namespace.c b/fs/namespace.c index f26d18d69712..4c7174d2041e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -237,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); + INIT_LIST_HEAD(&mnt->mnt_reparent); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif diff --git a/fs/pnode.c b/fs/pnode.c index b394ca5307ec..948e85ad0374 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -441,7 +441,7 @@ static void mark_umount_candidates(struct mount *mnt) * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ -static void __propagate_umount(struct mount *mnt) +static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -466,17 +466,38 @@ static void __propagate_umount(struct mount *mnt) */ topper = find_topper(child); if (topper) - mnt_change_mountpoint(child->mnt_parent, child->mnt_mp, - topper); + list_add_tail(&topper->mnt_reparent, to_reparent); - if (list_empty(&child->mnt_mounts)) { + if (topper || list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); + list_del_init(&child->mnt_reparent); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } } } +static void reparent_mounts(struct list_head *to_reparent) +{ + while (!list_empty(to_reparent)) { + struct mount *mnt, *parent; + struct mountpoint *mp; + + mnt = list_first_entry(to_reparent, struct mount, mnt_reparent); + list_del_init(&mnt->mnt_reparent); + + /* Where should this mount be reparented to? */ + mp = mnt->mnt_mp; + parent = mnt->mnt_parent; + while (parent->mnt.mnt_flags & MNT_UMOUNT) { + mp = parent->mnt_mp; + parent = parent->mnt_parent; + } + + mnt_change_mountpoint(parent, mp, mnt); + } +} + /* * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. @@ -487,11 +508,15 @@ static void __propagate_umount(struct mount *mnt) int propagate_umount(struct list_head *list) { struct mount *mnt; + LIST_HEAD(to_reparent); list_for_each_entry_reverse(mnt, list, mnt_list) mark_umount_candidates(mnt); list_for_each_entry(mnt, list, mnt_list) - __propagate_umount(mnt); + __propagate_umount(mnt, &to_reparent); + + reparent_mounts(&to_reparent); + return 0; } From fdb8f10499924d96b8eb60cc1354b278f6fcf0e9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Oct 2016 16:16:13 -0500 Subject: [PATCH 152/176] mnt: In propgate_umount handle visiting mounts in any order commit 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 upstream. While investigating some poor umount performance I realized that in the case of overlapping mount trees where some of the mounts are locked the code has been failing to unmount all of the mounts it should have been unmounting. This failure to unmount all of the necessary mounts can be reproduced with: $ cat locked_mounts_test.sh mount -t tmpfs test-base /mnt mount --make-shared /mnt mkdir -p /mnt/b mount -t tmpfs test1 /mnt/b mount --make-shared /mnt/b mkdir -p /mnt/b/10 mount -t tmpfs test2 /mnt/b/10 mount --make-shared /mnt/b/10 mkdir -p /mnt/b/10/20 mount --rbind /mnt/b /mnt/b/10/20 unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi' sleep 1 umount -l /mnt/b wait %% $ unshare -Urm ./locked_mounts_test.sh This failure is corrected by removing the prepass that marks mounts that may be umounted. A first pass is added that umounts mounts if possible and if not sets mount mark if they could be unmounted if they weren't locked and adds them to a list to umount possibilities. This first pass reconsiders the mounts parent if it is on the list of umount possibilities, ensuring that information of umoutability will pass from child to mount parent. A second pass then walks through all mounts that are umounted and processes their children unmounting them or marking them for reparenting. A last pass cleans up the state on the mounts that could not be umounted and if applicable reparents them to their first parent that remained mounted. While a bit longer than the old code this code is much more robust as it allows information to flow up from the leaves and down from the trunk making the order in which mounts are encountered in the umount propgation tree irrelevant. Fixes: 0c56fe31420c ("mnt: Don't propagate unmounts to locked mounts") Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/mount.h | 2 +- fs/namespace.c | 2 +- fs/pnode.c | 144 +++++++++++++++++++++++++++++-------------------- 3 files changed, 88 insertions(+), 60 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 2352231d4f0f..37c64bbe840c 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -57,7 +57,7 @@ struct mount { struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ - struct list_head mnt_reparent; /* reparent list entry */ + struct list_head mnt_umounting; /* list entry for umount propagation */ #ifdef CONFIG_FSNOTIFY struct hlist_head mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; diff --git a/fs/namespace.c b/fs/namespace.c index 4c7174d2041e..ec4078d16eb7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -237,7 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); - INIT_LIST_HEAD(&mnt->mnt_reparent); + INIT_LIST_HEAD(&mnt->mnt_umounting); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif diff --git a/fs/pnode.c b/fs/pnode.c index 948e85ad0374..acc2eefbc4ff 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -415,86 +415,95 @@ void propagate_mount_unlock(struct mount *mnt) } } -/* - * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. - */ -static void mark_umount_candidates(struct mount *mnt) +static void umount_one(struct mount *mnt, struct list_head *to_umount) { - struct mount *parent = mnt->mnt_parent; - struct mount *m; - - BUG_ON(parent == mnt); - - for (m = propagation_next(parent, parent); m; - m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint); - if (!child || (child->mnt.mnt_flags & MNT_UMOUNT)) - continue; - if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) { - SET_MNT_MARK(child); - } - } + CLEAR_MNT_MARK(mnt); + mnt->mnt.mnt_flags |= MNT_UMOUNT; + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_umounting); + list_move_tail(&mnt->mnt_list, to_umount); } /* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ -static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent) +static bool __propagate_umount(struct mount *mnt, + struct list_head *to_umount, + struct list_head *to_restore) { - struct mount *parent = mnt->mnt_parent; - struct mount *m; + bool progress = false; + struct mount *child; - BUG_ON(parent == mnt); + /* + * The state of the parent won't change if this mount is + * already unmounted or marked as without children. + */ + if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) + goto out; - for (m = propagation_next(parent, parent); m; - m = propagation_next(m, parent)) { - struct mount *topper; - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint); - /* - * umount the child only if the child has no children - * and the child is marked safe to unmount. - */ - if (!child || !IS_MNT_MARKED(child)) + /* Verify topper is the only grandchild that has not been + * speculatively unmounted. + */ + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (child->mnt_mountpoint == mnt->mnt.mnt_root) continue; - CLEAR_MNT_MARK(child); + if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child)) + continue; + /* Found a mounted child */ + goto children; + } - /* If there is exactly one mount covering all of child - * replace child with that mount. - */ - topper = find_topper(child); - if (topper) - list_add_tail(&topper->mnt_reparent, to_reparent); + /* Mark mounts that can be unmounted if not locked */ + SET_MNT_MARK(mnt); + progress = true; - if (topper || list_empty(&child->mnt_mounts)) { - list_del_init(&child->mnt_child); - list_del_init(&child->mnt_reparent); - child->mnt.mnt_flags |= MNT_UMOUNT; - list_move_tail(&child->mnt_list, &mnt->mnt_list); + /* If a mount is without children and not locked umount it. */ + if (!IS_MNT_LOCKED(mnt)) { + umount_one(mnt, to_umount); + } else { +children: + list_move_tail(&mnt->mnt_umounting, to_restore); + } +out: + return progress; +} + +static void umount_list(struct list_head *to_umount, + struct list_head *to_restore) +{ + struct mount *mnt, *child, *tmp; + list_for_each_entry(mnt, to_umount, mnt_list) { + list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { + /* topper? */ + if (child->mnt_mountpoint == mnt->mnt.mnt_root) + list_move_tail(&child->mnt_umounting, to_restore); + else + umount_one(child, to_umount); } } } -static void reparent_mounts(struct list_head *to_reparent) +static void restore_mounts(struct list_head *to_restore) { - while (!list_empty(to_reparent)) { + /* Restore mounts to a clean working state */ + while (!list_empty(to_restore)) { struct mount *mnt, *parent; struct mountpoint *mp; - mnt = list_first_entry(to_reparent, struct mount, mnt_reparent); - list_del_init(&mnt->mnt_reparent); + mnt = list_first_entry(to_restore, struct mount, mnt_umounting); + CLEAR_MNT_MARK(mnt); + list_del_init(&mnt->mnt_umounting); - /* Where should this mount be reparented to? */ + /* Should this mount be reparented? */ mp = mnt->mnt_mp; parent = mnt->mnt_parent; while (parent->mnt.mnt_flags & MNT_UMOUNT) { mp = parent->mnt_mp; parent = parent->mnt_parent; } - - mnt_change_mountpoint(parent, mp, mnt); + if (parent != mnt->mnt_parent) + mnt_change_mountpoint(parent, mp, mnt); } } @@ -508,15 +517,34 @@ static void reparent_mounts(struct list_head *to_reparent) int propagate_umount(struct list_head *list) { struct mount *mnt; - LIST_HEAD(to_reparent); + LIST_HEAD(to_restore); + LIST_HEAD(to_umount); - list_for_each_entry_reverse(mnt, list, mnt_list) - mark_umount_candidates(mnt); + list_for_each_entry(mnt, list, mnt_list) { + struct mount *parent = mnt->mnt_parent; + struct mount *m; - list_for_each_entry(mnt, list, mnt_list) - __propagate_umount(mnt, &to_reparent); + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + struct mount *child = __lookup_mnt(&m->mnt, + mnt->mnt_mountpoint); + if (!child) + continue; - reparent_mounts(&to_reparent); + /* Check the child and parents while progress is made */ + while (__propagate_umount(child, + &to_umount, &to_restore)) { + /* Is the parent a umount candidate? */ + child = child->mnt_parent; + if (list_empty(&child->mnt_umounting)) + break; + } + } + } + + umount_list(&to_umount, &to_restore); + restore_mounts(&to_restore); + list_splice_tail(&to_umount, list); return 0; } From f07288cfb0f7091566fb8750672157eb10be53f4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Oct 2016 17:25:19 -0500 Subject: [PATCH 153/176] mnt: Make propagate_umount less slow for overlapping mount propagation trees commit 296990deb389c7da21c78030376ba244dc1badf5 upstream. Andrei Vagin pointed out that time to executue propagate_umount can go non-linear (and take a ludicrious amount of time) when the mount propogation trees of the mounts to be unmunted by a lazy unmount overlap. Make the walk of the mount propagation trees nearly linear by remembering which mounts have already been visited, allowing subsequent walks to detect when walking a mount propgation tree or a subtree of a mount propgation tree would be duplicate work and to skip them entirely. Walk the list of mounts whose propgatation trees need to be traversed from the mount highest in the mount tree to mounts lower in the mount tree so that odds are higher that the code will walk the largest trees first, allowing later tree walks to be skipped entirely. Add cleanup_umount_visitation to remover the code's memory of which mounts have been visited. Add the functions last_slave and skip_propagation_subtree to allow skipping appropriate parts of the mount propagation tree without needing to change the logic of the rest of the code. A script to generate overlapping mount propagation trees: $ cat runs.h set -e mount -t tmpfs zdtm /mnt mkdir -p /mnt/1 /mnt/2 mount -t tmpfs zdtm /mnt/1 mount --make-shared /mnt/1 mkdir /mnt/1/1 iteration=10 if [ -n "$1" ] ; then iteration=$1 fi for i in $(seq $iteration); do mount --bind /mnt/1/1 /mnt/1/1 done mount --rbind /mnt/1 /mnt/2 TIMEFORMAT='%Rs' nr=$(( ( 2 ** ( $iteration + 1 ) ) + 1 )) echo -n "umount -l /mnt/1 -> $nr " time umount -l /mnt/1 nr=$(cat /proc/self/mountinfo | grep zdtm | wc -l ) time umount -l /mnt/2 $ for i in $(seq 9 19); do echo $i; unshare -Urm bash ./run.sh $i; done Here are the performance numbers with and without the patch: mhash | 8192 | 8192 | 1048576 | 1048576 mounts | before | after | before | after ------------------------------------------------ 1025 | 0.040s | 0.016s | 0.038s | 0.019s 2049 | 0.094s | 0.017s | 0.080s | 0.018s 4097 | 0.243s | 0.019s | 0.206s | 0.023s 8193 | 1.202s | 0.028s | 1.562s | 0.032s 16385 | 9.635s | 0.036s | 9.952s | 0.041s 32769 | 60.928s | 0.063s | 44.321s | 0.064s 65537 | | 0.097s | | 0.097s 131073 | | 0.233s | | 0.176s 262145 | | 0.653s | | 0.344s 524289 | | 2.305s | | 0.735s 1048577 | | 7.107s | | 2.603s Andrei Vagin reports fixing the performance problem is part of the work to fix CVE-2016-6213. Fixes: a05964f3917c ("[PATCH] shared mounts handling: umount") Reported-by: Andrei Vagin Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/pnode.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/fs/pnode.c b/fs/pnode.c index acc2eefbc4ff..d15c63e97ef1 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p) return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); } +static inline struct mount *last_slave(struct mount *p) +{ + return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); +} + static inline struct mount *next_slave(struct mount *p) { return list_entry(p->mnt_slave.next, struct mount, mnt_slave); @@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m, } } +static struct mount *skip_propagation_subtree(struct mount *m, + struct mount *origin) +{ + /* + * Advance m such that propagation_next will not return + * the slaves of m. + */ + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) + m = last_slave(m); + + return m; +} + static struct mount *next_group(struct mount *m, struct mount *origin) { while (1) { @@ -507,6 +525,15 @@ static void restore_mounts(struct list_head *to_restore) } } +static void cleanup_umount_visitations(struct list_head *visited) +{ + while (!list_empty(visited)) { + struct mount *mnt = + list_first_entry(visited, struct mount, mnt_umounting); + list_del_init(&mnt->mnt_umounting); + } +} + /* * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. @@ -519,11 +546,23 @@ int propagate_umount(struct list_head *list) struct mount *mnt; LIST_HEAD(to_restore); LIST_HEAD(to_umount); + LIST_HEAD(visited); - list_for_each_entry(mnt, list, mnt_list) { + /* Find candidates for unmounting */ + list_for_each_entry_reverse(mnt, list, mnt_list) { struct mount *parent = mnt->mnt_parent; struct mount *m; + /* + * If this mount has already been visited it is known that it's + * entire peer group and all of their slaves in the propagation + * tree for the mountpoint has already been visited and there is + * no need to visit them again. + */ + if (!list_empty(&mnt->mnt_umounting)) + continue; + + list_add_tail(&mnt->mnt_umounting, &visited); for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { struct mount *child = __lookup_mnt(&m->mnt, @@ -531,6 +570,27 @@ int propagate_umount(struct list_head *list) if (!child) continue; + if (!list_empty(&child->mnt_umounting)) { + /* + * If the child has already been visited it is + * know that it's entire peer group and all of + * their slaves in the propgation tree for the + * mountpoint has already been visited and there + * is no need to visit this subtree again. + */ + m = skip_propagation_subtree(m, parent); + continue; + } else if (child->mnt.mnt_flags & MNT_UMOUNT) { + /* + * We have come accross an partially unmounted + * mount in list that has not been visited yet. + * Remember it has been visited and continue + * about our merry way. + */ + list_add_tail(&child->mnt_umounting, &visited); + continue; + } + /* Check the child and parents while progress is made */ while (__propagate_umount(child, &to_umount, &to_restore)) { @@ -544,6 +604,7 @@ int propagate_umount(struct list_head *list) umount_list(&to_umount, &to_restore); restore_mounts(&to_restore); + cleanup_umount_visitations(&visited); list_splice_tail(&to_umount, list); return 0; From a2e0b1c18c0904b4f1a2de620ad282b5d01710bb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Jun 2017 08:46:12 -0700 Subject: [PATCH 154/176] selftests/capabilities: Fix the test_execve test commit 796a3bae2fba6810427efdb314a1c126c9490fb3 upstream. test_execve does rather odd mount manipulations to safely create temporary setuid and setgid executables that aren't visible to the rest of the system. Those executables end up in the test's cwd, but that cwd is MNT_DETACHed. The core namespace code considers MNT_DETACHed trees to belong to no mount namespace at all and, in general, MNT_DETACHed trees are only barely function. This interacted with commit 380cf5ba6b0a ("fs: Treat foreign mounts as nosuid") to cause all MNT_DETACHed trees to act as though they're nosuid, breaking the test. Fix it by just not detaching the tree. It's still in a private mount namespace and is therefore still invisible to the rest of the system (except via /proc, and the same nosuid logic will protect all other programs on the system from believing in test_execve's setuid bits). While we're at it, fix some blatant whitespace problems. Reported-by: Naresh Kamboju Fixes: 380cf5ba6b0a ("fs: Treat foreign mounts as nosuid") Cc: "Eric W. Biederman" Cc: Kees Cook Cc: Shuah Khan Cc: Greg KH Cc: linux-kselftest@vger.kernel.org Signed-off-by: Andy Lutomirski Acked-by: Greg Kroah-Hartman Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/capabilities/test_execve.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index 10a21a958aaf..763f37fecfb8 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -138,9 +138,6 @@ static void chdir_to_tmpfs(void) if (chdir(cwd) != 0) err(1, "chdir to private tmpfs"); - - if (umount2(".", MNT_DETACH) != 0) - err(1, "detach private tmpfs"); } static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) @@ -248,7 +245,7 @@ static int do_tests(int uid, const char *our_path) err(1, "chown"); if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) err(1, "chmod"); -} + } capng_get_caps_process(); @@ -384,7 +381,7 @@ static int do_tests(int uid, const char *our_path) } else { printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); exec_other_validate_cap("./validate_cap_sgidnonroot", - false, false, true, false); + false, false, true, false); if (fork_wait()) { printf("[RUN]\tNon-root +ia, sgidroot => i\n"); From 5e07de5b5a4ea5c5b56ff73f359a248d97926629 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 29 Feb 2016 12:29:47 -0500 Subject: [PATCH 155/176] tpm: Get rid of chip->pdev commit 8cfffc9d4d3786d3b496a021d7224e06328bac7d upstream. This is a hold over from before the struct device conversion. - All prints should be using &chip->dev, which is the Linux standard. This changes prints to use tpm0 as the device name, not the PnP/etc ID. - The few places involving sysfs/modules that really do need the parent just use chip->dev.parent instead - We no longer need to get_device(pdev) in any places since it is no longer used by any of the code. The kref on the parent is held by the device core during device_add and dropped in device_del Signed-off-by: Jason Gunthorpe Signed-off-by: Stefan Berger Tested-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 15 ++++++--------- drivers/char/tpm/tpm-dev.c | 4 +--- drivers/char/tpm/tpm-interface.c | 30 +++++++++++++++-------------- drivers/char/tpm/tpm-sysfs.c | 6 +++--- drivers/char/tpm/tpm.h | 3 +-- drivers/char/tpm/tpm2-cmd.c | 8 ++++---- drivers/char/tpm/tpm_atmel.c | 14 +++++++------- drivers/char/tpm/tpm_i2c_atmel.c | 16 +++++++-------- drivers/char/tpm/tpm_i2c_infineon.c | 6 +++--- drivers/char/tpm/tpm_i2c_nuvoton.c | 22 ++++++++++----------- drivers/char/tpm/tpm_infineon.c | 22 ++++++++++----------- drivers/char/tpm/tpm_nsc.c | 20 +++++++++---------- drivers/char/tpm/tpm_tis.c | 16 +++++++-------- 13 files changed, 89 insertions(+), 93 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 252142524ff2..f55b4921c723 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -49,7 +49,7 @@ struct tpm_chip *tpm_chip_find_get(int chip_num) if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) continue; - if (try_module_get(pos->pdev->driver->owner)) { + if (try_module_get(pos->dev.parent->driver->owner)) { chip = pos; break; } @@ -112,13 +112,11 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); - chip->pdev = dev; - dev_set_drvdata(dev, chip); chip->dev.class = tpm_class; chip->dev.release = tpm_dev_release; - chip->dev.parent = chip->pdev; + chip->dev.parent = dev; #ifdef CONFIG_ACPI chip->dev.groups = chip->groups; #endif @@ -133,7 +131,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); cdev_init(&chip->cdev, &tpm_fops); - chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.owner = dev->driver->owner; chip->cdev.kobj.parent = &chip->dev.kobj; devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); @@ -236,9 +234,8 @@ int tpm_chip_register(struct tpm_chip *chip) chip->flags |= TPM_CHIP_FLAG_REGISTERED; if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { - rc = __compat_only_sysfs_link_entry_to_kobj(&chip->pdev->kobj, - &chip->dev.kobj, - "ppi"); + rc = __compat_only_sysfs_link_entry_to_kobj( + &chip->dev.parent->kobj, &chip->dev.kobj, "ppi"); if (rc && rc != -ENOENT) { tpm_chip_unregister(chip); return rc; @@ -273,7 +270,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) synchronize_rcu(); if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) - sysfs_remove_link(&chip->pdev->kobj, "ppi"); + sysfs_remove_link(&chip->dev.parent->kobj, "ppi"); tpm1_chip_unregister(chip); tpm_del_char_device(chip); diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index 4f3137d9a35e..6ed0651cbe58 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -61,7 +61,7 @@ static int tpm_open(struct inode *inode, struct file *file) * by the check of is_open variable, which is protected * by driver_lock. */ if (test_and_set_bit(0, &chip->is_open)) { - dev_dbg(chip->pdev, "Another process owns this TPM\n"); + dev_dbg(&chip->dev, "Another process owns this TPM\n"); return -EBUSY; } @@ -79,7 +79,6 @@ static int tpm_open(struct inode *inode, struct file *file) INIT_WORK(&priv->work, timeout_work); file->private_data = priv; - get_device(chip->pdev); return 0; } @@ -166,7 +165,6 @@ static int tpm_release(struct inode *inode, struct file *file) file->private_data = NULL; atomic_set(&priv->data_pending, 0); clear_bit(0, &priv->chip->is_open); - put_device(priv->chip->pdev); kfree(priv); return 0; } diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 17abe52e6365..4391953a7711 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -343,7 +343,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, if (count == 0) return -ENODATA; if (count > bufsiz) { - dev_err(chip->pdev, + dev_err(&chip->dev, "invalid count value %x %zx\n", count, bufsiz); return -E2BIG; } @@ -353,7 +353,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, rc = chip->ops->send(chip, (u8 *) buf, count); if (rc < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "tpm_transmit: tpm_send: error %zd\n", rc); goto out; } @@ -372,7 +372,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, goto out_recv; if (chip->ops->req_canceled(chip, status)) { - dev_err(chip->pdev, "Operation Canceled\n"); + dev_err(&chip->dev, "Operation Canceled\n"); rc = -ECANCELED; goto out; } @@ -382,14 +382,14 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz, } while (time_before(jiffies, stop)); chip->ops->cancel(chip); - dev_err(chip->pdev, "Operation Timed out\n"); + dev_err(&chip->dev, "Operation Timed out\n"); rc = -ETIME; goto out; out_recv: rc = chip->ops->recv(chip, (u8 *) buf, bufsiz); if (rc < 0) - dev_err(chip->pdev, + dev_err(&chip->dev, "tpm_transmit: tpm_recv: error %zd\n", rc); out: if (!(flags & TPM_TRANSMIT_UNLOCKED)) @@ -416,7 +416,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd, err = be32_to_cpu(header->return_code); if (err != 0 && desc) - dev_err(chip->pdev, "A TPM error (%d) occurred %s\n", err, + dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, desc); return err; @@ -514,7 +514,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) if (rc == TPM_ERR_INVALID_POSTINIT) { /* The TPM is not started, we are the first to talk to it. Execute a startup command. */ - dev_info(chip->pdev, "Issuing TPM_STARTUP"); + dev_info(&chip->dev, "Issuing TPM_STARTUP"); if (tpm_startup(chip, TPM_ST_CLEAR)) return rc; @@ -526,7 +526,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) 0, NULL); } if (rc) { - dev_err(chip->pdev, + dev_err(&chip->dev, "A TPM error (%zd) occurred attempting to determine the timeouts\n", rc); goto duration; @@ -565,7 +565,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) /* Report adjusted timeouts */ if (chip->vendor.timeout_adjusted) { - dev_info(chip->pdev, + dev_info(&chip->dev, HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", old_timeout[0], new_timeout[0], old_timeout[1], new_timeout[1], @@ -612,7 +612,7 @@ duration: chip->vendor.duration[TPM_MEDIUM] *= 1000; chip->vendor.duration[TPM_LONG] *= 1000; chip->vendor.duration_adjusted = true; - dev_info(chip->pdev, "Adjusting TPM timeout parameters."); + dev_info(&chip->dev, "Adjusting TPM timeout parameters."); } return 0; } @@ -802,7 +802,9 @@ int tpm_do_selftest(struct tpm_chip *chip) * around 300ms while the self test is ongoing, keep trying * until the self test duration expires. */ if (rc == -ETIME) { - dev_info(chip->pdev, HW_ERR "TPM command timed out during continue self test"); + dev_info( + &chip->dev, HW_ERR + "TPM command timed out during continue self test"); msleep(delay_msec); continue; } @@ -812,7 +814,7 @@ int tpm_do_selftest(struct tpm_chip *chip) rc = be32_to_cpu(cmd.header.out.return_code); if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) { - dev_info(chip->pdev, + dev_info(&chip->dev, "TPM is disabled/deactivated (0x%X)\n", rc); /* TPM is disabled and/or deactivated; driver can * proceed and TPM does handle commands for @@ -966,10 +968,10 @@ int tpm_pm_suspend(struct device *dev) } if (rc) - dev_err(chip->pdev, + dev_err(&chip->dev, "Error (%d) sending savestate before suspend\n", rc); else if (try > 0) - dev_warn(chip->pdev, "TPM savestate took %dms\n", + dev_warn(&chip->dev, "TPM savestate took %dms\n", try * TPM_TIMEOUT_RETRY); return rc; diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index f880856aa75e..10370c22e98b 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -284,16 +284,16 @@ static const struct attribute_group tpm_dev_group = { int tpm_sysfs_add_device(struct tpm_chip *chip) { int err; - err = sysfs_create_group(&chip->pdev->kobj, + err = sysfs_create_group(&chip->dev.parent->kobj, &tpm_dev_group); if (err) - dev_err(chip->pdev, + dev_err(&chip->dev, "failed to create sysfs attributes, %d\n", err); return err; } void tpm_sysfs_del_device(struct tpm_chip *chip) { - sysfs_remove_group(&chip->pdev->kobj, &tpm_dev_group); + sysfs_remove_group(&chip->dev.parent->kobj, &tpm_dev_group); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 2216861f89f1..57c4c26c38ea 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -171,7 +171,6 @@ enum tpm_chip_flags { }; struct tpm_chip { - struct device *pdev; /* Device stuff */ struct device dev; struct cdev cdev; @@ -203,7 +202,7 @@ struct tpm_chip { static inline void tpm_chip_put(struct tpm_chip *chip) { - module_put(chip->pdev->driver->owner); + module_put(chip->dev.parent->driver->owner); } static inline int tpm_read_index(int base, int index) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index cb7e4f6b70ba..286bd090a488 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -570,7 +570,7 @@ static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle, rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_FLUSH_CONTEXT); if (rc) { - dev_warn(chip->pdev, "0x%08x was not flushed, out of memory\n", + dev_warn(&chip->dev, "0x%08x was not flushed, out of memory\n", handle); return; } @@ -580,7 +580,7 @@ static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle, rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "flushing context"); if (rc) - dev_warn(chip->pdev, "0x%08x was not flushed, rc=%d\n", handle, + dev_warn(&chip->dev, "0x%08x was not flushed, rc=%d\n", handle, rc); tpm_buf_destroy(&buf); @@ -753,7 +753,7 @@ void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type) * except print the error code on a system failure. */ if (rc < 0) - dev_warn(chip->pdev, "transmit returned %d while stopping the TPM", + dev_warn(&chip->dev, "transmit returned %d while stopping the TPM", rc); } EXPORT_SYMBOL_GPL(tpm2_shutdown); @@ -820,7 +820,7 @@ static int tpm2_start_selftest(struct tpm_chip *chip, bool full) * immediately. This is a workaround for that. */ if (rc == TPM2_RC_TESTING) { - dev_warn(chip->pdev, "Got RC_TESTING, ignoring\n"); + dev_warn(&chip->dev, "Got RC_TESTING, ignoring\n"); rc = 0; } diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index dfadad0916a1..a48a878f791d 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -49,7 +49,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (i = 0; i < 6; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading header\n"); + dev_err(&chip->dev, "error reading header\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -60,12 +60,12 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) size = be32_to_cpu(*native_size); if (count < size) { - dev_err(chip->pdev, + dev_err(&chip->dev, "Recv size(%d) less than available space\n", size); for (; i < size; i++) { /* clear the waiting data anyway */ status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading data\n"); + dev_err(&chip->dev, "error reading data\n"); return -EIO; } } @@ -76,7 +76,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (; i < size; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->pdev, "error reading data\n"); + dev_err(&chip->dev, "error reading data\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -86,7 +86,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) status = ioread8(chip->vendor.iobase + 1); if (status & ATML_STATUS_DATA_AVAIL) { - dev_err(chip->pdev, "data available is stuck\n"); + dev_err(&chip->dev, "data available is stuck\n"); return -EIO; } @@ -97,9 +97,9 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count) { int i; - dev_dbg(chip->pdev, "tpm_atml_send:\n"); + dev_dbg(&chip->dev, "tpm_atml_send:\n"); for (i = 0; i < count; i++) { - dev_dbg(chip->pdev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); + dev_dbg(&chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); iowrite8(buf[i], chip->vendor.iobase); } diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 8dfb88b9739c..dd8f0eb3170a 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -52,7 +52,7 @@ struct priv_data { static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; priv->len = 0; @@ -62,7 +62,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) status = i2c_master_send(client, buf, len); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, (int)min_t(size_t, 64, len), buf, len, status); return status; @@ -71,7 +71,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); struct tpm_output_header *hdr = (struct tpm_output_header *)priv->buffer; u32 expected_len; @@ -88,7 +88,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) return -ENOMEM; if (priv->len >= expected_len) { - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -97,7 +97,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) } rc = i2c_master_recv(client, buf, expected_len); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -106,13 +106,13 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) static void i2c_atmel_cancel(struct tpm_chip *chip) { - dev_err(chip->pdev, "TPM operation cancellation was requested, but is not supported"); + dev_err(&chip->dev, "TPM operation cancellation was requested, but is not supported"); } static u8 i2c_atmel_read_status(struct tpm_chip *chip) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); int rc; /* The TPM fails the I2C read until it is ready, so we do the entire @@ -125,7 +125,7 @@ static u8 i2c_atmel_read_status(struct tpm_chip *chip) /* Once the TPM has completed the command the command remains readable * until another command is issued. */ rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); - dev_dbg(chip->pdev, + dev_dbg(&chip->dev, "%s: sts=%d", __func__, rc); if (rc <= 0) return 0; diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index 63d5d22e9e60..f2aa99e34b4b 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -446,7 +446,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ size = recv_data(chip, buf, TPM_HEADER_SIZE); if (size < TPM_HEADER_SIZE) { - dev_err(chip->pdev, "Unable to read header\n"); + dev_err(&chip->dev, "Unable to read header\n"); goto out; } @@ -459,14 +459,14 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE); if (size < expected) { - dev_err(chip->pdev, "Unable to read remainder of result\n"); + dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &status); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->pdev, "Error left over data\n"); + dev_err(&chip->dev, "Error left over data\n"); size = -EIO; goto out; } diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 847f1597fe9b..a1e1474dda30 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -96,13 +96,13 @@ static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size, /* read TPM_STS register */ static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; u8 data; status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); if (status <= 0) { - dev_err(chip->pdev, "%s() error return %d\n", __func__, + dev_err(&chip->dev, "%s() error return %d\n", __func__, status); data = TPM_STS_ERR_VAL; } @@ -127,13 +127,13 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) /* write commandReady to TPM_STS register */ static void i2c_nuvoton_ready(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->pdev); + struct i2c_client *client = to_i2c_client(chip->dev.parent); s32 status; /* this causes the current command to be aborted */ status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); if (status < 0) - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail to write TPM_STS.commandReady\n", __func__); } @@ -212,7 +212,7 @@ static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, return 0; } while (time_before(jiffies, stop)); } - dev_err(chip->pdev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + dev_err(&chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, value); return -ETIMEDOUT; } @@ -240,7 +240,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, &chip->vendor.read_queue) == 0) { burst_count = i2c_nuvoton_get_burstcount(client, chip); if (burst_count < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail to read burstCount=%d\n", __func__, burst_count); return -EIO; @@ -249,12 +249,12 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, bytes2read, &buf[size]); if (rc < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "%s() fail on i2c_nuvoton_read_buf()=%d\n", __func__, rc); return -EIO; } - dev_dbg(chip->pdev, "%s(%d):", __func__, bytes2read); + dev_dbg(&chip->dev, "%s(%d):", __func__, bytes2read); size += bytes2read; } @@ -264,7 +264,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, /* Read TPM command results */ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) { - struct device *dev = chip->pdev; + struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); s32 rc; int expected, status, burst_count, retries, size = 0; @@ -334,7 +334,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) break; } i2c_nuvoton_ready(chip); - dev_dbg(chip->pdev, "%s() -> %d\n", __func__, size); + dev_dbg(&chip->dev, "%s() -> %d\n", __func__, size); return size; } @@ -347,7 +347,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) */ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) { - struct device *dev = chip->pdev; + struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); u32 ordinal; size_t count = 0; diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index 6c488e635fdd..e3cf9f3545c5 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -195,9 +195,9 @@ static int wait(struct tpm_chip *chip, int wait_for_bit) } if (i == TPM_MAX_TRIES) { /* timeout occurs */ if (wait_for_bit == STAT_XFE) - dev_err(chip->pdev, "Timeout in wait(STAT_XFE)\n"); + dev_err(&chip->dev, "Timeout in wait(STAT_XFE)\n"); if (wait_for_bit == STAT_RDA) - dev_err(chip->pdev, "Timeout in wait(STAT_RDA)\n"); + dev_err(&chip->dev, "Timeout in wait(STAT_RDA)\n"); return -EIO; } return 0; @@ -220,7 +220,7 @@ static void wait_and_send(struct tpm_chip *chip, u8 sendbyte) static void tpm_wtx(struct tpm_chip *chip) { number_of_wtx++; - dev_info(chip->pdev, "Granting WTX (%02d / %02d)\n", + dev_info(&chip->dev, "Granting WTX (%02d / %02d)\n", number_of_wtx, TPM_MAX_WTX_PACKAGES); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX); @@ -231,7 +231,7 @@ static void tpm_wtx(struct tpm_chip *chip) static void tpm_wtx_abort(struct tpm_chip *chip) { - dev_info(chip->pdev, "Aborting WTX\n"); + dev_info(&chip->dev, "Aborting WTX\n"); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX_ABORT); wait_and_send(chip, 0x00); @@ -257,7 +257,7 @@ recv_begin: } if (buf[0] != TPM_VL_VER) { - dev_err(chip->pdev, + dev_err(&chip->dev, "Wrong transport protocol implementation!\n"); return -EIO; } @@ -272,7 +272,7 @@ recv_begin: } if ((size == 0x6D00) && (buf[1] == 0x80)) { - dev_err(chip->pdev, "Error handling on vendor layer!\n"); + dev_err(&chip->dev, "Error handling on vendor layer!\n"); return -EIO; } @@ -284,7 +284,7 @@ recv_begin: } if (buf[1] == TPM_CTRL_WTX) { - dev_info(chip->pdev, "WTX-package received\n"); + dev_info(&chip->dev, "WTX-package received\n"); if (number_of_wtx < TPM_MAX_WTX_PACKAGES) { tpm_wtx(chip); goto recv_begin; @@ -295,14 +295,14 @@ recv_begin: } if (buf[1] == TPM_CTRL_WTX_ABORT_ACK) { - dev_info(chip->pdev, "WTX-abort acknowledged\n"); + dev_info(&chip->dev, "WTX-abort acknowledged\n"); return size; } if (buf[1] == TPM_CTRL_ERROR) { - dev_err(chip->pdev, "ERROR-package received:\n"); + dev_err(&chip->dev, "ERROR-package received:\n"); if (buf[4] == TPM_INF_NAK) - dev_err(chip->pdev, + dev_err(&chip->dev, "-> Negative acknowledgement" " - retransmit command!\n"); return -EIO; @@ -321,7 +321,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count) ret = empty_fifo(chip, 1); if (ret) { - dev_err(chip->pdev, "Timeout while clearing FIFO\n"); + dev_err(&chip->dev, "Timeout while clearing FIFO\n"); return -EIO; } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 289389ecef84..766370bed60c 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -113,7 +113,7 @@ static int nsc_wait_for_ready(struct tpm_chip *chip) } while (time_before(jiffies, stop)); - dev_info(chip->pdev, "wait for ready failed\n"); + dev_info(&chip->dev, "wait for ready failed\n"); return -EBUSY; } @@ -129,12 +129,12 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { - dev_err(chip->pdev, "F0 timeout\n"); + dev_err(&chip->dev, "F0 timeout\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) { - dev_err(chip->pdev, "not in normal mode (0x%x)\n", + dev_err(&chip->dev, "not in normal mode (0x%x)\n", data); return -EIO; } @@ -143,7 +143,7 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) for (p = buffer; p < &buffer[count]; p++) { if (wait_for_stat (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "OBF timeout (while reading data)\n"); return -EIO; } @@ -154,11 +154,11 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) if ((data & NSC_STATUS_F0) == 0 && (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { - dev_err(chip->pdev, "F0 not set\n"); + dev_err(&chip->dev, "F0 not set\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) { - dev_err(chip->pdev, + dev_err(&chip->dev, "expected end of command(0x%x)\n", data); return -EIO; } @@ -189,19 +189,19 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, "IBF timeout\n"); + dev_err(&chip->dev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND); if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { - dev_err(chip->pdev, "IBR timeout\n"); + dev_err(&chip->dev, "IBR timeout\n"); return -EIO; } for (i = 0; i < count; i++) { if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, + dev_err(&chip->dev, "IBF timeout (while writing data)\n"); return -EIO; } @@ -209,7 +209,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) } if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->pdev, "IBF timeout\n"); + dev_err(&chip->dev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f10a107614b4..7f13221aeb30 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -293,7 +293,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ if ((size = recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { - dev_err(chip->pdev, "Unable to read header\n"); + dev_err(&chip->dev, "Unable to read header\n"); goto out; } @@ -306,7 +306,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) if ((size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE)) < expected) { - dev_err(chip->pdev, "Unable to read remainder of result\n"); + dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } @@ -315,7 +315,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) &chip->vendor.int_queue, false); status = tpm_tis_status(chip); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->pdev, "Error left over data\n"); + dev_err(&chip->dev, "Error left over data\n"); size = -EIO; goto out; } @@ -401,7 +401,7 @@ static void disable_interrupts(struct tpm_chip *chip) iowrite32(intmask, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); - devm_free_irq(chip->pdev, chip->vendor.irq, chip); + devm_free_irq(&chip->dev, chip->vendor.irq, chip); chip->vendor.irq = 0; } @@ -463,7 +463,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) msleep(1); if (!priv->irq_tested) { disable_interrupts(chip); - dev_err(chip->pdev, + dev_err(&chip->dev, FW_BUG "TPM interrupt not working, polling instead\n"); } priv->irq_tested = true; @@ -533,7 +533,7 @@ static int probe_itpm(struct tpm_chip *chip) rc = tpm_tis_send_data(chip, cmd_getticks, len); if (rc == 0) { - dev_info(chip->pdev, "Detected an iTPM.\n"); + dev_info(&chip->dev, "Detected an iTPM.\n"); rc = 1; } else rc = -EFAULT; @@ -766,7 +766,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, if (devm_request_irq (dev, i, tis_int_probe, IRQF_SHARED, chip->devname, chip) != 0) { - dev_info(chip->pdev, + dev_info(&chip->dev, "Unable to request irq: %d for probe\n", i); continue; @@ -818,7 +818,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, if (devm_request_irq (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, chip->devname, chip) != 0) { - dev_info(chip->pdev, + dev_info(&chip->dev, "Unable to request irq: %d for use\n", chip->vendor.irq); chip->vendor.irq = 0; From 5ec5771bcb2b4c45771f3f750701ab79dd4cb21a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 12 Feb 2016 20:29:53 -0700 Subject: [PATCH 156/176] tpm: Provide strong locking for device removal commit 4e26195f240d73150e8308ae42874702e3df8d2c upstream. Add a read/write semaphore around the ops function pointers so ops can be set to null when the driver un-registers. Previously the tpm core expected module locking to be enough to ensure that tpm_unregister could not be called during certain times, however that hasn't been sufficient for a long time. Introduce a read/write semaphore around 'ops' so the core can set it to null when unregistering. This provides a strong fence around the driver callbacks, guaranteeing to the driver that no callbacks are running or will run again. For now the ops_lock is placed very high in the call stack, it could be pushed down and made more granular in future if necessary. Signed-off-by: Jason Gunthorpe Reviewed-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 72 +++++++++++++++++++++++++++++--- drivers/char/tpm/tpm-dev.c | 11 ++++- drivers/char/tpm/tpm-interface.c | 19 +++++---- drivers/char/tpm/tpm-sysfs.c | 5 +++ drivers/char/tpm/tpm.h | 14 ++++--- 5 files changed, 100 insertions(+), 21 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index f55b4921c723..f3a887e4f692 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -36,10 +36,60 @@ static DEFINE_SPINLOCK(driver_lock); struct class *tpm_class; dev_t tpm_devt; -/* - * tpm_chip_find_get - return tpm_chip for a given chip number - * @chip_num the device number for the chip +/** + * tpm_try_get_ops() - Get a ref to the tpm_chip + * @chip: Chip to ref + * + * The caller must already have some kind of locking to ensure that chip is + * valid. This function will lock the chip so that the ops member can be + * accessed safely. The locking prevents tpm_chip_unregister from + * completing, so it should not be held for long periods. + * + * Returns -ERRNO if the chip could not be got. */ +int tpm_try_get_ops(struct tpm_chip *chip) +{ + int rc = -EIO; + + get_device(&chip->dev); + + down_read(&chip->ops_sem); + if (!chip->ops) + goto out_lock; + + if (!try_module_get(chip->dev.parent->driver->owner)) + goto out_lock; + + return 0; +out_lock: + up_read(&chip->ops_sem); + put_device(&chip->dev); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_try_get_ops); + +/** + * tpm_put_ops() - Release a ref to the tpm_chip + * @chip: Chip to put + * + * This is the opposite pair to tpm_try_get_ops(). After this returns chip may + * be kfree'd. + */ +void tpm_put_ops(struct tpm_chip *chip) +{ + module_put(chip->dev.parent->driver->owner); + up_read(&chip->ops_sem); + put_device(&chip->dev); +} +EXPORT_SYMBOL_GPL(tpm_put_ops); + +/** + * tpm_chip_find_get() - return tpm_chip for a given chip number + * @chip_num: id to find + * + * The return'd chip has been tpm_try_get_ops'd and must be released via + * tpm_put_ops + */ struct tpm_chip *tpm_chip_find_get(int chip_num) { struct tpm_chip *pos, *chip = NULL; @@ -49,10 +99,10 @@ struct tpm_chip *tpm_chip_find_get(int chip_num) if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) continue; - if (try_module_get(pos->dev.parent->driver->owner)) { + /* rcu prevents chip from being free'd */ + if (!tpm_try_get_ops(pos)) chip = pos; - break; - } + break; } rcu_read_unlock(); return chip; @@ -94,6 +144,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, return ERR_PTR(-ENOMEM); mutex_init(&chip->tpm_mutex); + init_rwsem(&chip->ops_sem); INIT_LIST_HEAD(&chip->list); chip->ops = ops; @@ -171,6 +222,12 @@ static int tpm_add_char_device(struct tpm_chip *chip) static void tpm_del_char_device(struct tpm_chip *chip) { cdev_del(&chip->cdev); + + /* Make the driver uncallable. */ + down_write(&chip->ops_sem); + chip->ops = NULL; + up_write(&chip->ops_sem); + device_del(&chip->dev); } @@ -256,6 +313,9 @@ EXPORT_SYMBOL_GPL(tpm_chip_register); * Takes the chip first away from the list of available TPM chips and then * cleans up all the resources reserved by tpm_chip_register(). * + * Once this function returns the driver call backs in 'op's will not be + * running and will no longer start. + * * NOTE: This function should be only called before deinitializing chip * resources. */ diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index 6ed0651cbe58..912ad30be585 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -136,9 +136,18 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, return -EFAULT; } - /* atomic tpm command send and result receive */ + /* atomic tpm command send and result receive. We only hold the ops + * lock during this period so that the tpm can be unregistered even if + * the char dev is held open. + */ + if (tpm_try_get_ops(priv->chip)) { + mutex_unlock(&priv->buffer_mutex); + return -EPIPE; + } out_size = tpm_transmit(priv->chip, priv->data_buffer, sizeof(priv->data_buffer), 0); + + tpm_put_ops(priv->chip); if (out_size < 0) { mutex_unlock(&priv->buffer_mutex); return out_size; diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 4391953a7711..8588f2e4b9af 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -687,7 +687,7 @@ int tpm_is_tpm2(u32 chip_num) rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0; - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } @@ -716,7 +716,7 @@ int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) rc = tpm2_pcr_read(chip, pcr_idx, res_buf); else rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_pcr_read); @@ -751,7 +751,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) if (chip->flags & TPM_CHIP_FLAG_TPM2) { rc = tpm2_pcr_extend(chip, pcr_idx, hash); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } @@ -761,7 +761,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0, "attempting extend a PCR value"); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_pcr_extend); @@ -842,7 +842,7 @@ int tpm_send(u32 chip_num, void *cmd, size_t buflen) rc = tpm_transmit_cmd(chip, cmd, buflen, 0, "attempting tpm_cmd"); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_send); @@ -1025,7 +1025,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) if (chip->flags & TPM_CHIP_FLAG_TPM2) { err = tpm2_get_random(chip, out, max); - tpm_chip_put(chip); + tpm_put_ops(chip); return err; } @@ -1047,7 +1047,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) num_bytes -= recd; } while (retries-- && total < max); - tpm_chip_put(chip); + tpm_put_ops(chip); return total ? total : -EIO; } EXPORT_SYMBOL_GPL(tpm_get_random); @@ -1073,7 +1073,7 @@ int tpm_seal_trusted(u32 chip_num, struct trusted_key_payload *payload, rc = tpm2_seal_trusted(chip, payload, options); - tpm_chip_put(chip); + tpm_put_ops(chip); return rc; } EXPORT_SYMBOL_GPL(tpm_seal_trusted); @@ -1099,7 +1099,8 @@ int tpm_unseal_trusted(u32 chip_num, struct trusted_key_payload *payload, rc = tpm2_unseal_trusted(chip, payload, options); - tpm_chip_put(chip); + tpm_put_ops(chip); + return rc; } EXPORT_SYMBOL_GPL(tpm_unseal_trusted); diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index 10370c22e98b..8af4145d10c7 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -295,5 +295,10 @@ int tpm_sysfs_add_device(struct tpm_chip *chip) void tpm_sysfs_del_device(struct tpm_chip *chip) { + /* The sysfs routines rely on an implicit tpm_try_get_ops, this + * function is called before ops is null'd and the sysfs core + * synchronizes this removal so that no callbacks are running or can + * run again + */ sysfs_remove_group(&chip->dev.parent->kobj, &tpm_dev_group); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 57c4c26c38ea..e21e2c599e66 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -174,7 +174,13 @@ struct tpm_chip { struct device dev; struct cdev cdev; + /* A driver callback under ops cannot be run unless ops_sem is held + * (sometimes implicitly, eg for the sysfs code). ops becomes null + * when the driver is unregistered, see tpm_try_get_ops. + */ + struct rw_semaphore ops_sem; const struct tpm_class_ops *ops; + unsigned int flags; int dev_num; /* /dev/tpm# */ @@ -200,11 +206,6 @@ struct tpm_chip { #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) -static inline void tpm_chip_put(struct tpm_chip *chip) -{ - module_put(chip->dev.parent->driver->owner); -} - static inline int tpm_read_index(int base, int index) { outb(index, base); @@ -516,6 +517,9 @@ extern int wait_for_tpm_stat(struct tpm_chip *, u8, unsigned long, wait_queue_head_t *, bool); struct tpm_chip *tpm_chip_find_get(int chip_num); +__must_check int tpm_try_get_ops(struct tpm_chip *chip); +void tpm_put_ops(struct tpm_chip *chip); + extern struct tpm_chip *tpmm_chip_alloc(struct device *dev, const struct tpm_class_ops *ops); extern int tpm_chip_register(struct tpm_chip *chip); From 5c9a2972983fca37e73648d9a3aa62a9ad048c3c Mon Sep 17 00:00:00 2001 From: Josh Zimmerman Date: Sun, 25 Jun 2017 14:53:23 -0700 Subject: [PATCH 157/176] Add "shutdown" to "struct class". commit f77af15165847406b15d8f70c382c4cb15846b2a upstream. The TPM class has some common shutdown code that must be executed for all drivers. This adds some needed functionality for that. Signed-off-by: Josh Zimmerman Acked-by: Greg Kroah-Hartman Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0") Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 +++++- include/linux/device.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index f18856f5954b..afe045792796 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2094,7 +2094,11 @@ void device_shutdown(void) pm_runtime_get_noresume(dev); pm_runtime_barrier(dev); - if (dev->bus && dev->bus->shutdown) { + if (dev->class && dev->class->shutdown) { + if (initcall_debug) + dev_info(dev, "shutdown\n"); + dev->class->shutdown(dev); + } else if (dev->bus && dev->bus->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->bus->shutdown(dev); diff --git a/include/linux/device.h b/include/linux/device.h index b8f411b57dcb..7075a2485ed3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -368,6 +368,7 @@ int subsys_virtual_register(struct bus_type *subsys, * @suspend: Used to put the device to sleep mode, usually to a low power * state. * @resume: Used to bring the device from the sleep mode. + * @shutdown: Called at shut-down time to quiesce the device. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @pm: The default device power management operations of this class. @@ -396,6 +397,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + int (*shutdown)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); From 0f4fa2138e83d33468baac218c4be652f4619cb7 Mon Sep 17 00:00:00 2001 From: Josh Zimmerman Date: Sun, 25 Jun 2017 14:53:24 -0700 Subject: [PATCH 158/176] tpm: Issue a TPM2_Shutdown for TPM2 devices. commit d1bd4a792d3961a04e6154118816b00167aad91a upstream. If a TPM2 loses power without a TPM2_Shutdown command being issued (a "disorderly reboot"), it may lose some state that has yet to be persisted to NVRam, and will increment the DA counter. After the DA counter gets sufficiently large, the TPM will lock the user out. NOTE: This only changes behavior on TPM2 devices. Since TPM1 uses sysfs, and sysfs relies on implicit locking on chip->ops, it is not safe to allow this code to run in TPM1, or to add sysfs support to TPM2, until that locking is made explicit. Signed-off-by: Josh Zimmerman Cc: stable@vger.kernel.org Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0") Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/char/tpm/tpm-sysfs.c | 7 +++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index f3a887e4f692..6d56877b2e0a 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -124,6 +124,41 @@ static void tpm_dev_release(struct device *dev) kfree(chip); } + +/** + * tpm_class_shutdown() - prepare the TPM device for loss of power. + * @dev: device to which the chip is associated. + * + * Issues a TPM2_Shutdown command prior to loss of power, as required by the + * TPM 2.0 spec. + * Then, calls bus- and device- specific shutdown code. + * + * XXX: This codepath relies on the fact that sysfs is not enabled for + * TPM2: sysfs uses an implicit lock on chip->ops, so this could race if TPM2 + * has sysfs support enabled before TPM sysfs's implicit locking is fixed. + */ +static int tpm_class_shutdown(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev); + + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + down_write(&chip->ops_sem); + tpm2_shutdown(chip, TPM2_SU_CLEAR); + chip->ops = NULL; + up_write(&chip->ops_sem); + } + /* Allow bus- and device-specific code to run. Note: since chip->ops + * is NULL, more-specific shutdown code will not be able to issue TPM + * commands. + */ + if (dev->bus && dev->bus->shutdown) + dev->bus->shutdown(dev); + else if (dev->driver && dev->driver->shutdown) + dev->driver->shutdown(dev); + return 0; +} + + /** * tpmm_chip_alloc() - allocate a new struct tpm_chip instance * @dev: device to which the chip is associated @@ -166,6 +201,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, dev_set_drvdata(dev, chip); chip->dev.class = tpm_class; + chip->dev.class->shutdown = tpm_class_shutdown; chip->dev.release = tpm_dev_release; chip->dev.parent = dev; #ifdef CONFIG_ACPI diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index 8af4145d10c7..6a4056a3f7ee 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -284,6 +284,13 @@ static const struct attribute_group tpm_dev_group = { int tpm_sysfs_add_device(struct tpm_chip *chip) { int err; + + /* XXX: If you wish to remove this restriction, you must first update + * tpm_sysfs to explicitly lock chip->ops. + */ + if (chip->flags & TPM_CHIP_FLAG_TPM2) + return 0; + err = sysfs_create_group(&chip->dev.parent->kobj, &tpm_dev_group); From 8f93a9aa1d654c691602359d426ab66997947b1c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 14 Jul 2017 14:49:38 -0700 Subject: [PATCH 159/176] mm: fix overflow check in expand_upwards() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 37511fb5c91db93d8bd6e3f52f86e5a7ff7cfcdf upstream. Jörn Engel noticed that the expand_upwards() function might not return -ENOMEM in case the requested address is (unsigned long)-PAGE_SIZE and if the architecture didn't defined TASK_SIZE as multiple of PAGE_SIZE. Affected architectures are arm, frv, m68k, blackfin, h8300 and xtensa which all define TASK_SIZE as 0xffffffff, but since none of those have an upwards-growing stack we currently have no actual issue. Nevertheless let's fix this just in case any of the architectures with an upward-growing stack (currently parisc, metag and partly ia64) define TASK_SIZE similar. Link: http://lkml.kernel.org/r/20170702192452.GA11868@p100.box Fixes: bd726c90b6b8 ("Allow stack to grow up to address space limit") Signed-off-by: Helge Deller Reported-by: Jörn Engel Cc: Hugh Dickins Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 0990f8bc0fbe..eaa460ddcaf9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2176,7 +2176,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; - if (address >= TASK_SIZE) + if (address >= (TASK_SIZE & PAGE_MASK)) return -ENOMEM; address += PAGE_SIZE; From d05fbdbe6d1c2b00f1d2c3010a4a4c710fb4097a Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 2 May 2017 09:38:35 -0400 Subject: [PATCH 160/176] crypto: talitos - Extend max key length for SHA384/512-HMAC and AEAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03d2c5114c95797c0aa7d9f463348b171a274fd4 upstream. An updated patch that also handles the additional key length requirements for the AEAD algorithms. The max keysize is not 96. For SHA384/512 it's 128, and for the AEAD algorithms it's longer still. Extend the max keysize for the AEAD size for AES256 + HMAC(SHA512). Fixes: 357fb60502ede ("crypto: talitos - add sha224, sha384 and sha512 to existing AEAD algorithms") Signed-off-by: Martin Hicks Acked-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 9a8a18aafd5c..6a60936b46e0 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -804,7 +804,7 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 -#define TALITOS_MAX_KEY_SIZE 96 +#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE) #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ struct talitos_ctx { @@ -1388,6 +1388,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, { struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + if (keylen > TALITOS_MAX_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + memcpy(&ctx->key, key, keylen); ctx->keylen = keylen; From fc43f0935035008a65e1a8ed50c99679dce65377 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Jun 2017 10:22:03 +0300 Subject: [PATCH 161/176] crypto: atmel - only treat EBUSY as transient if backlog commit 1606043f214f912a52195293614935811a6e3e53 upstream. The Atmel SHA driver was treating -EBUSY as indication of queueing to backlog without checking that backlog is enabled for the request. Fix it by checking request flags. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-sha.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 0dadb6332f0e..7abe908427df 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -963,7 +963,9 @@ static int atmel_sha_finup(struct ahash_request *req) ctx->flags |= SHA_FLAGS_FINUP; err1 = atmel_sha_update(req); - if (err1 == -EINPROGRESS || err1 == -EBUSY) + if (err1 == -EINPROGRESS || + (err1 == -EBUSY && (ahash_request_flags(req) & + CRYPTO_TFM_REQ_MAY_BACKLOG))) return err1; /* From f1bf5d83a9c17b739f2a944e3686d6b97693e67f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Jul 2017 12:21:12 +0800 Subject: [PATCH 162/176] crypto: sha1-ssse3 - Disable avx2 commit b82ce24426a4071da9529d726057e4e642948667 upstream. It has been reported that sha1-avx2 can cause page faults by reading beyond the end of the input. This patch disables it until it can be fixed. Fixes: 7c1da8d0d046 ("crypto: sha - SHA1 transform x86_64 AVX2") Reported-by: Jan Stancek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index dd14616b7739..7de207a11014 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; From 4e3c1188521a62ccaa1a7e41498be24a886c502b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 7 Jul 2017 16:57:06 +0300 Subject: [PATCH 163/176] crypto: caam - fix signals handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7459e1d25ffefa2b1be799477fcc1f6c62f6cec7 upstream. Driver does not properly handle the case when signals interrupt wait_for_completion_interruptible(): -it does not check for return value -completion structure is allocated on stack; in case a signal interrupts the sleep, it will go out of scope, causing the worker thread (caam_jr_dequeue) to fail when it accesses it wait_for_completion_interruptible() is replaced with uninterruptable wait_for_completion(). We choose to block all signals while waiting for I/O (device executing the split key generation job descriptor) since the alternative - in order to have a deterministic device state - would be to flush the job ring (aborting *all* in-progress jobs). Fixes: 045e36780f115 ("crypto: caam - ahash hmac support") Fixes: 4c1ec1f930154 ("crypto: caam - refactor key_gen, sg") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamhash.c | 2 +- drivers/crypto/caam/key_gen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 99d5e11db194..e06cc5df30be 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -498,7 +498,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index e1eaf4ff9762..3ce1d5cdcbd2 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -103,7 +103,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", From 5c34f49776b8d556bb83d31945cfdb5340792bf2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Apr 2017 14:00:49 +0200 Subject: [PATCH 164/176] sched/topology: Fix overlapping sched_group_mask commit 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 upstream. The point of sched_group_mask is to select those CPUs from sched_group_cpus that can actually arrive at this balance domain. The current code gets it wrong, as can be readily demonstrated with a topology like: node 0 1 2 3 0: 10 20 30 20 1: 20 10 20 30 2: 30 20 10 20 3: 20 30 20 10 Where (for example) domain 1 on CPU1 ends up with a mask that includes CPU0: [] CPU1 attaching sched-domain: [] domain 0: span 0-2 level NUMA [] groups: 1 (mask: 1), 2, 0 [] domain 1: span 0-3 level NUMA [] groups: 0-2 (mask: 0-2) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) This causes sched_balance_cpu() to compute the wrong CPU and consequently should_we_balance() will terminate early resulting in missed load-balance opportunities. The fixed topology looks like: [] CPU1 attaching sched-domain: [] domain 0: span 0-2 level NUMA [] groups: 1 (mask: 1), 2, 0 [] domain 1: span 0-3 level NUMA [] groups: 0-2 (mask: 1) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) (note: this relies on OVERLAP domains to always have children, this is true because the regular topology domains are still here -- this is before degenerate trimming) Debugged-by: Lauro Ramos Venancio Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 20253dbc8610..62030b63b671 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6123,6 +6123,9 @@ enum s_alloc { * Build an iteration mask that can exclude certain CPUs from the upwards * domain traversal. * + * Only CPUs that can arrive at this group should be considered to continue + * balancing. + * * Asymmetric node setups can result in situations where the domain tree is of * unequal depth, make sure to skip domains that already cover the entire * range. @@ -6141,11 +6144,24 @@ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) for_each_cpu(i, span) { sibling = *per_cpu_ptr(sdd->sd, i); - if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + + /* + * Can happen in the asymmetric case, where these siblings are + * unused. The mask will not be empty because those CPUs that + * do have the top domain _should_ span the domain. + */ + if (!sibling->child) + continue; + + /* If we would not end up here, we can't continue from here */ + if (!cpumask_equal(sg_span, sched_domain_span(sibling->child))) continue; cpumask_set_cpu(i, sched_group_mask(sg)); } + + /* We must not have empty masks here */ + WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg))); } /* From 988067ec9606e3d7bd7e1125118e77e06c2eda4a Mon Sep 17 00:00:00 2001 From: Lauro Ramos Venancio Date: Thu, 20 Apr 2017 16:51:40 -0300 Subject: [PATCH 165/176] sched/topology: Optimize build_group_mask() commit f32d782e31bf079f600dcec126ed117b0577e85c upstream. The group mask is always used in intersection with the group CPUs. So, when building the group mask, we don't have to care about CPUs that are not part of the group. Signed-off-by: Lauro Ramos Venancio Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: lwang@redhat.com Cc: riel@redhat.com Link: http://lkml.kernel.org/r/1492717903-5195-2-git-send-email-lvenanci@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 62030b63b671..c436426a80dd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6137,12 +6137,12 @@ enum s_alloc { */ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) { - const struct cpumask *span = sched_domain_span(sd); + const struct cpumask *sg_span = sched_group_cpus(sg); struct sd_data *sdd = sd->private; struct sched_domain *sibling; int i; - for_each_cpu(i, span) { + for_each_cpu(i, sg_span) { sibling = *per_cpu_ptr(sdd->sd, i); /* From 275d4be9d39f91ddbaa2f42b69535683bb4d0573 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Jun 2017 19:31:13 +0200 Subject: [PATCH 166/176] PM / wakeirq: Convert to SRCU commit ea0212f40c6bc0594c8eff79266759e3ecd4bacc upstream. The wakeirq infrastructure uses RCU to protect the list of wakeirqs. That breaks the irq bus locking infrastructure, which is allows sleeping functions to be called so interrupt controllers behind slow busses, e.g. i2c, can be handled. The wakeirq functions hold rcu_read_lock and call into irq functions, which in case of interrupts using the irq bus locking will trigger a might_sleep() splat. Convert the wakeirq infrastructure to Sleepable RCU and unbreak it. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Reported-by: Brian Norris Suggested-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Brian Norris Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/wakeup.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index a1e0b9ab847a..e613633ffe9c 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -60,6 +60,8 @@ static LIST_HEAD(wakeup_sources); static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); +DEFINE_STATIC_SRCU(wakeup_srcu); + static struct wakeup_source deleted_ws = { .name = "deleted", .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), @@ -198,7 +200,7 @@ void wakeup_source_remove(struct wakeup_source *ws) spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); spin_unlock_irqrestore(&events_lock, flags); - synchronize_rcu(); + synchronize_srcu(&wakeup_srcu); } EXPORT_SYMBOL_GPL(wakeup_source_remove); @@ -330,13 +332,14 @@ void device_wakeup_detach_irq(struct device *dev) void device_wakeup_arm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->wakeirq) dev_pm_arm_wake_irq(ws->wakeirq); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -347,13 +350,14 @@ void device_wakeup_arm_wake_irqs(void) void device_wakeup_disarm_wake_irqs(void) { struct wakeup_source *ws; + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->wakeirq) dev_pm_disarm_wake_irq(ws->wakeirq); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } /** @@ -807,10 +811,10 @@ EXPORT_SYMBOL_GPL(pm_wakeup_event); void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; - int active = 0; + int srcuidx, active = 0; struct wakeup_source *last_activity_ws = NULL; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { pr_info("active wakeup source: %s\n", ws->name); @@ -826,7 +830,7 @@ void pm_print_active_wakeup_sources(void) if (!active && last_activity_ws) pr_info("last active wakeup source: %s\n", last_activity_ws->name); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); @@ -953,8 +957,9 @@ void pm_wakep_autosleep_enabled(bool set) { struct wakeup_source *ws; ktime_t now = ktime_get(); + int srcuidx; - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { spin_lock_irq(&ws->lock); if (ws->autosleep_enabled != set) { @@ -968,7 +973,7 @@ void pm_wakep_autosleep_enabled(bool set) } spin_unlock_irq(&ws->lock); } - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); } #endif /* CONFIG_PM_AUTOSLEEP */ @@ -1029,15 +1034,16 @@ static int print_wakeup_source_stats(struct seq_file *m, static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; + int srcuidx; seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n"); - rcu_read_lock(); + srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) print_wakeup_source_stats(m, ws); - rcu_read_unlock(); + srcu_read_unlock(&wakeup_srcu, srcuidx); print_wakeup_source_stats(m, &deleted_ws); From c17f6512463e0675fb066affc41bc0b1c6dfb60b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jul 2017 10:21:40 +0300 Subject: [PATCH 167/176] PM / QoS: return -EINVAL for bogus strings commit 2ca30331c156ca9e97643ad05dd8930b8fe78b01 upstream. In the current code, if the user accidentally writes a bogus command to this sysfs file, then we set the latency tolerance to an uninitialized variable. Fixes: 2d984ad132a8 (PM / QoS: Introcuce latency tolerance device PM QoS type) Signed-off-by: Dan Carpenter Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index a7b46798c81d..39efa7e6c0c0 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -268,6 +268,8 @@ static ssize_t pm_qos_latency_tolerance_store(struct device *dev, value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; else if (!strcmp(buf, "any") || !strcmp(buf, "any\n")) value = PM_QOS_LATENCY_ANY; + else + return -EINVAL; } ret = dev_pm_qos_update_user_latency_tolerance(dev, value); return ret < 0 ? ret : n; From 999b96b4de81a984bc94302b9ab7cad9f45eb6df Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Fri, 9 Dec 2016 21:50:17 +0530 Subject: [PATCH 168/176] tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results commit c59f29cb144a6a0dfac16ede9dc8eafc02dc56ca upstream. The 's' flag is supposed to indicate that a softirq is running. This can be detected by testing the preempt_count with SOFTIRQ_OFFSET. The current code tests the preempt_count with SOFTIRQ_MASK, which would be true even when softirqs are disabled but not serving a softirq. Link: http://lkml.kernel.org/r/1481300417-3564-1-git-send-email-pkondeti@codeaurora.org Signed-off-by: Pavankumar Kondeti Signed-off-by: Steven Rostedt Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4c21c0b7dc91..c83d59913d78 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1660,7 +1660,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, TRACE_FLAG_IRQS_NOSUPPORT | #endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); } From c6f3576ed0f21058629d5461eb014c1e0e3f7c4d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2016 09:52:13 +0100 Subject: [PATCH 169/176] KVM: x86: disable MPX if host did not enable MPX XSAVE features commit a87036add09283e6c4f4103a15c596c67b86ab86 upstream. When eager FPU is disabled, KVM will still see the MPX bit in CPUID and presumably the MPX vmentry and vmexit controls. However, it will not be able to expose the MPX XSAVE features to the guest, because the guest's accessible XSAVE features are always a subset of host_xcr0. In this case, we should disable the MPX CPUID bit, the BNDCFGS MSR, and the MPX vmentry and vmexit controls for nested virtualization. It is then unnecessary to enable guest eager FPU if the guest has the MPX CPUID bit set. Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 13 ++++++++++--- arch/x86/kvm/cpuid.h | 9 +-------- arch/x86/kvm/vmx.c | 13 ++++++------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9357b29de9bc..83d6369c45f5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) return ret; } +bool kvm_mpx_supported(void) +{ + return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) + && kvm_x86_ops->mpx_supported()); +} +EXPORT_SYMBOL_GPL(kvm_mpx_supported); + u64 kvm_supported_xcr0(void) { u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; @@ -97,7 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + vcpu->arch.eager_fpu = use_eager_fpu(); if (vcpu->arch.eager_fpu) kvm_x86_ops->fpu_activate(vcpu); @@ -295,7 +302,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 3f5c48ddba45..33d574fb2445 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -4,6 +4,7 @@ #include "x86.h" int kvm_update_cpuid(struct kvm_vcpu *vcpu); +bool kvm_mpx_supported(void); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, @@ -134,14 +135,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); -} - static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bbaa11f4e74b..5579ae069dee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); -static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); @@ -2541,7 +2540,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ @@ -2562,7 +2561,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_ENTRY_LOAD_IA32_PAT; vmx->nested.nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ @@ -2813,7 +2812,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2890,7 +2889,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; @@ -3363,7 +3362,7 @@ static void init_vmcs_shadow_fields(void) for (i = j = 0; i < max_shadow_read_write_fields; i++) { switch (shadow_read_write_fields[i]) { case GUEST_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) continue; break; default: @@ -10265,7 +10264,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); if (nested_cpu_has_xsaves(vmcs12)) vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); From be6f33a61f335cb06b3c8c2cee90214f97c4bda3 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 23 May 2017 11:52:52 -0700 Subject: [PATCH 170/176] kvm: vmx: Do not disable intercepts for BNDCFGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a8b6fda38f80e75afa3b125c9e7f2550b579454b upstream. The MSR permission bitmaps are shared by all VMs. However, some VMs may not be configured to support MPX, even when the host does. If the host supports VMX and the guest does not, we should intercept accesses to the BNDCFGS MSR, so that we can synthesize a #GP fault. Furthermore, if the host does not support MPX and the "ignore_msrs" kvm kernel parameter is set, then we should intercept accesses to the BNDCFGS MSR, so that we can skip over the rdmsr/wrmsr without raising a #GP fault. Fixes: da8999d31818fdc8 ("KVM: x86: Intel MPX vmx and msr handle") Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5579ae069dee..0e0c52e57e90 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6252,7 +6252,6 @@ static __init int hardware_setup(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); memcpy(vmx_msr_bitmap_legacy_x2apic, vmx_msr_bitmap_legacy, PAGE_SIZE); From a40f0ccd4f1c2751d5df5d9a63b9cf93ff70d343 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 24 May 2017 10:49:25 -0700 Subject: [PATCH 171/176] kvm: x86: Guest BNDCFGS requires guest MPX support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4439af9f911ae0243ffe4e2dfc12bace49605d8b upstream. The BNDCFGS MSR should only be exposed to the guest if the guest supports MPX. (cf. the TSC_AUX MSR and RDTSCP.) Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") Change-Id: I3ad7c01bda616715137ceac878f3fa7e66b6b387 Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/vmx.c | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 33d574fb2445..d1534feefcfe 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -143,6 +143,14 @@ static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); } +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} + static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0e0c52e57e90..3d776b625766 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2812,7 +2812,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported()) + if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2889,7 +2889,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported()) + if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; From 6ffa92ffaed817cea7625ebf9c7f217e293923be Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 23 May 2017 11:52:54 -0700 Subject: [PATCH 172/176] kvm: vmx: Check value written to IA32_BNDCFGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 upstream. Bits 11:2 must be zero and the linear addess in bits 63:12 must be canonical. Otherwise, WRMSR(BNDCFGS) should raise #GP. Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kvm/vmx.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..37db36fddc88 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -405,6 +405,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + #define MSR_IA32_XSS 0x00000da0 #define FEATURE_CONTROL_LOCKED (1<<0) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3d776b625766..a2dd5a5ee472 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2891,6 +2891,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) + return 1; vmcs_write64(GUEST_BNDCFGS, data); break; case MSR_IA32_TSC: From 3ed43caedf69a6fd417bdda174eb8fa06b3b5dab Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 4 Jul 2017 10:27:41 +0800 Subject: [PATCH 173/176] kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS commit 691bd4340bef49cf7e5855d06cf24444b5bf2d85 upstream. It's easier for host applications, such as QEMU, if they can always access guest MSR_IA32_BNDCFGS in VMCS, even though MPX is disabled in guest cpuid. Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a2dd5a5ee472..b12391119ce8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2812,7 +2812,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2889,7 +2890,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) return 1; if (is_noncanonical_address(data & PAGE_MASK) || (data & MSR_IA32_BNDCFGS_RSVD)) From ece78cd779f4f306a869f217bc0236507b0fe5f5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jul 2017 07:45:10 +0200 Subject: [PATCH 174/176] Linux 4.4.78 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bf49a61d02e2..ac77ae8ee0b1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = Blurry Fish Butt From 220bdae208dfb20abd040e8e87dc3a4dbca72283 Mon Sep 17 00:00:00 2001 From: Blagovest Kolenichev Date: Wed, 26 Jul 2017 05:01:18 -0700 Subject: [PATCH 175/176] android: binder: Revert fixes for preemption and locking issues These were hacks and no longer needed as next binder from android-4.4@59ff2e1 (v4.4.78) has been reworked to use fine-grained locking. This is a preparation for clean merge of binder changes from android-4.4@59ff2e1 (v4.4.78), which follows right after this commit. Reverted changes: ================= 773fc2f android: binder: use copy_from_user_preempt_disabled To keep the driver consistent, and until we have fine-grained locking in place. Change-Id: I90fe02cdedb8a5677b900a68528fb443b9204322 Signed-off-by: Riley Andrews Git-repo: https://source.codeaurora.org/quic/la/kernel/msm-4.4 Git-commit: 5c9ce54ca3a66a57e4ebfe3ae71c5733b6bcc579 Signed-off-by: Vikram Mulukutla 821e02f CHROMIUM: android: binder: Fix potential scheduling-while-atomica Commit f1e7f0a724f6 ("android: binder: Disable preemption while holding the global binder lock.") re-enabled preemption around most of the sites where calls to potentially sleeping functions were made, but missed __alloc_fd(), which can sleep if the fdtable needs to be resized. Re-enable preemption around __alloc_fd() as well as __fd_install() which can now sleep in upstream kernels as of commit 8a81252b774b ("fs/file.c: don't acquire files->file_lock in fd_install()"). BUG=chrome-os-partner:44012 TEST=Build and boot on Smaug. Change-Id: I9819c4b95876f697e75b1b84810b6c520d9c33ec Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/308582 Reviewed-by: Stephen Barber Reviewed-by: Riley Andrews Git-repo: https://source.codeaurora.org/quic/la/kernel/msm-4.4 Git-commit: c267ff1d548ed1bdad6a08f1c70776c5e60d569e Signed-off-by: Vikram Mulukutla e4045d6 android: binder: Disable preemption while holding the global binder lock. Change-Id: I90fe02cdedb8a5677b900a68528fb443b9204322 Signed-off-by: Riley Andrews Git-repo: https://source.codeaurora.org/quic/la/kernel/msm-4.4 Git-commit: 5c9ce54ca3a66a57e4ebfe3ae71c5733b6bcc579 Signed-off-by: Vikram Mulukutla Change-Id: Id97e04f3694d76c2b6a1e7c0dc39da15d83c2867 Signed-off-by: Blagovest Kolenichev --- drivers/android/binder.c | 179 +++++++++++---------------------------- 1 file changed, 49 insertions(+), 130 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0c3cf182e351..573e9795b236 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -417,7 +417,6 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; - int ret; if (files == NULL) return -ESRCH; @@ -428,11 +427,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - preempt_enable_no_resched(); - ret = __alloc_fd(files, 0, rlim_cur, flags); - preempt_disable(); - - return ret; + return __alloc_fd(files, 0, rlim_cur, flags); } /* @@ -441,11 +436,8 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - if (proc->files) { - preempt_enable_no_resched(); + if (proc->files) __fd_install(proc->files, fd, file); - preempt_disable(); - } } /* @@ -473,7 +465,6 @@ static inline void binder_lock(struct binder_context *context, const char *tag) { trace_binder_lock(tag); mutex_lock(&context->binder_main_lock); - preempt_disable(); trace_binder_locked(tag); } @@ -482,62 +473,8 @@ static inline void binder_unlock(struct binder_context *context, { trace_binder_unlock(tag); mutex_unlock(&context->binder_main_lock); - preempt_enable(); } -static inline void *kzalloc_preempt_disabled(size_t size) -{ - void *ptr; - - ptr = kzalloc(size, GFP_NOWAIT); - if (ptr) - return ptr; - - preempt_enable_no_resched(); - ptr = kzalloc(size, GFP_KERNEL); - preempt_disable(); - - return ptr; -} - -static inline long copy_to_user_preempt_disabled(void __user *to, const void *from, long n) -{ - long ret; - - preempt_enable_no_resched(); - ret = copy_to_user(to, from, n); - preempt_disable(); - return ret; -} - -static inline long copy_from_user_preempt_disabled(void *to, const void __user *from, long n) -{ - long ret; - - preempt_enable_no_resched(); - ret = copy_from_user(to, from, n); - preempt_disable(); - return ret; -} - -#define get_user_preempt_disabled(x, ptr) \ -({ \ - int __ret; \ - preempt_enable_no_resched(); \ - __ret = get_user(x, ptr); \ - preempt_disable(); \ - __ret; \ -}) - -#define put_user_preempt_disabled(x, ptr) \ -({ \ - int __ret; \ - preempt_enable_no_resched(); \ - __ret = put_user(x, ptr); \ - preempt_disable(); \ - __ret; \ -}) - static void binder_set_nice(long nice) { long min_nice; @@ -670,8 +607,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, else mm = get_task_mm(proc->tsk); - preempt_enable_no_resched(); - if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; @@ -726,9 +661,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, up_write(&mm->mmap_sem); mmput(mm); } - - preempt_disable(); - return 0; free_range: @@ -751,9 +683,6 @@ err_no_vma: up_write(&mm->mmap_sem); mmput(mm); } - - preempt_disable(); - return -ENOMEM; } @@ -1022,7 +951,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, return NULL; } - node = kzalloc_preempt_disabled(sizeof(*node)); + node = kzalloc(sizeof(*node), GFP_KERNEL); if (node == NULL) return NULL; binder_stats_created(BINDER_STAT_NODE); @@ -1166,7 +1095,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, else return ref; } - new_ref = kzalloc_preempt_disabled(sizeof(*ref)); + new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); if (new_ref == NULL) return NULL; binder_stats_created(BINDER_STAT_REF); @@ -2039,14 +1968,14 @@ static void binder_transaction(struct binder_proc *proc, e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ - t = kzalloc_preempt_disabled(sizeof(*t)); + t = kzalloc(sizeof(*t), GFP_KERNEL); if (t == NULL) { return_error = BR_FAILED_REPLY; goto err_alloc_t_failed; } binder_stats_created(BINDER_STAT_TRANSACTION); - tcomplete = kzalloc_preempt_disabled(sizeof(*tcomplete)); + tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); if (tcomplete == NULL) { return_error = BR_FAILED_REPLY; goto err_alloc_tcomplete_failed; @@ -2107,14 +2036,14 @@ static void binder_transaction(struct binder_proc *proc, ALIGN(tr->data_size, sizeof(void *))); offp = off_start; - if (copy_from_user_preempt_disabled(t->buffer->data, (const void __user *)(uintptr_t) + if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) tr->data.ptr.buffer, tr->data_size)) { binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; goto err_copy_data_failed; } - if (copy_from_user_preempt_disabled(offp, (const void __user *)(uintptr_t) + if (copy_from_user(offp, (const void __user *)(uintptr_t) tr->data.ptr.offsets, tr->offsets_size)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); @@ -2232,10 +2161,9 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_bad_offset; } - if (copy_from_user_preempt_disabled( - sg_bufp, - (const void __user *)(uintptr_t) - bp->buffer, bp->length)) { + if (copy_from_user(sg_bufp, + (const void __user *)(uintptr_t) + bp->buffer, bp->length)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; @@ -2351,7 +2279,7 @@ static int binder_thread_write(struct binder_proc *proc, void __user *end = buffer + size; while (ptr < end && thread->return_error == BR_OK) { - if (get_user_preempt_disabled(cmd, (uint32_t __user *)ptr)) + if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); trace_binder_command(cmd); @@ -2369,7 +2297,7 @@ static int binder_thread_write(struct binder_proc *proc, struct binder_ref *ref; const char *debug_string; - if (get_user_preempt_disabled(target, (uint32_t __user *)ptr)) + if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (target == 0 && context->binder_context_mgr_node && @@ -2421,10 +2349,10 @@ static int binder_thread_write(struct binder_proc *proc, binder_uintptr_t cookie; struct binder_node *node; - if (get_user_preempt_disabled(node_ptr, (binder_uintptr_t __user *)ptr)) + if (get_user(node_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - if (get_user_preempt_disabled(cookie, (binder_uintptr_t __user *)ptr)) + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); node = binder_get_node(proc, node_ptr); @@ -2482,7 +2410,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_uintptr_t data_ptr; struct binder_buffer *buffer; - if (get_user_preempt_disabled(data_ptr, (binder_uintptr_t __user *)ptr)) + if (get_user(data_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); @@ -2524,8 +2452,7 @@ static int binder_thread_write(struct binder_proc *proc, case BC_REPLY_SG: { struct binder_transaction_data_sg tr; - if (copy_from_user_preempt_disabled(&tr, ptr, - sizeof(tr))) + if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); binder_transaction(proc, thread, &tr.transaction_data, @@ -2536,7 +2463,7 @@ static int binder_thread_write(struct binder_proc *proc, case BC_REPLY: { struct binder_transaction_data tr; - if (copy_from_user_preempt_disabled(&tr, ptr, sizeof(tr))) + if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); binder_transaction(proc, thread, &tr, @@ -2587,10 +2514,10 @@ static int binder_thread_write(struct binder_proc *proc, struct binder_ref *ref; struct binder_ref_death *death; - if (get_user_preempt_disabled(target, (uint32_t __user *)ptr)) + if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (get_user_preempt_disabled(cookie, (binder_uintptr_t __user *)ptr)) + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); ref = binder_get_ref(proc, target, false); @@ -2619,7 +2546,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid); break; } - death = kzalloc_preempt_disabled(sizeof(*death)); + death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { thread->return_error = BR_ERROR; binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, @@ -2673,7 +2600,8 @@ static int binder_thread_write(struct binder_proc *proc, struct binder_work *w; binder_uintptr_t cookie; struct binder_ref_death *death = NULL; - if (get_user_preempt_disabled(cookie, (binder_uintptr_t __user *)ptr)) + + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(cookie); @@ -2705,8 +2633,7 @@ static int binder_thread_write(struct binder_proc *proc, wake_up_interruptible(&proc->wait); } } - } - break; + } break; default: pr_err("%d:%d unknown command %d\n", @@ -2755,7 +2682,7 @@ static int binder_thread_read(struct binder_proc *proc, int wait_for_proc_work; if (*consumed == 0) { - if (put_user_preempt_disabled(BR_NOOP, (uint32_t __user *)ptr)) + if (put_user(BR_NOOP, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); } @@ -2766,7 +2693,7 @@ retry: if (thread->return_error != BR_OK && ptr < end) { if (thread->return_error2 != BR_OK) { - if (put_user_preempt_disabled(thread->return_error2, (uint32_t __user *)ptr)) + if (put_user(thread->return_error2, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, thread->return_error2); @@ -2774,7 +2701,7 @@ retry: goto done; thread->return_error2 = BR_OK; } - if (put_user_preempt_disabled(thread->return_error, (uint32_t __user *)ptr)) + if (put_user(thread->return_error, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, thread->return_error); @@ -2852,7 +2779,7 @@ retry: } break; case BINDER_WORK_TRANSACTION_COMPLETE: { cmd = BR_TRANSACTION_COMPLETE; - if (put_user_preempt_disabled(cmd, (uint32_t __user *) ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); @@ -2894,14 +2821,14 @@ retry: node->has_weak_ref = 0; } if (cmd != BR_NOOP) { - if (put_user_preempt_disabled(cmd, (uint32_t __user *) ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (put_user_preempt_disabled(node->ptr, (binder_uintptr_t __user *) + if (put_user(node->ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - if (put_user_preempt_disabled(node->cookie, (binder_uintptr_t __user *) + if (put_user(node->cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); @@ -2945,10 +2872,11 @@ retry: cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; else cmd = BR_DEAD_BINDER; - if (put_user_preempt_disabled(cmd, (uint32_t __user *) ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (put_user_preempt_disabled(death->cookie, (binder_uintptr_t __user *) ptr)) + if (put_user(death->cookie, + (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); binder_stat_br(proc, thread, cmd); @@ -3015,10 +2943,10 @@ retry: ALIGN(t->buffer->data_size, sizeof(void *)); - if (put_user_preempt_disabled(cmd, (uint32_t __user *) ptr)) + if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (copy_to_user_preempt_disabled(ptr, &tr, sizeof(tr))) + if (copy_to_user(ptr, &tr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); @@ -3060,7 +2988,7 @@ done: binder_debug(BINDER_DEBUG_THREADS, "%d:%d BR_SPAWN_LOOPER\n", proc->pid, thread->pid); - if (put_user_preempt_disabled(BR_SPAWN_LOOPER, (uint32_t __user *) buffer)) + if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } @@ -3135,7 +3063,7 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) break; } if (*p == NULL) { - thread = kzalloc_preempt_disabled(sizeof(*thread)); + thread = kzalloc(sizeof(*thread), GFP_KERNEL); if (thread == NULL) return NULL; binder_stats_created(BINDER_STAT_THREAD); @@ -3239,7 +3167,7 @@ static int binder_ioctl_write_read(struct file *filp, ret = -EINVAL; goto out; } - if (copy_from_user_preempt_disabled(&bwr, ubuf, sizeof(bwr))) { + if (copy_from_user(&bwr, ubuf, sizeof(bwr))) { ret = -EFAULT; goto out; } @@ -3257,7 +3185,7 @@ static int binder_ioctl_write_read(struct file *filp, trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; - if (copy_to_user_preempt_disabled(ubuf, &bwr, sizeof(bwr))) + if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto out; } @@ -3271,7 +3199,7 @@ static int binder_ioctl_write_read(struct file *filp, if (!list_empty(&proc->todo)) wake_up_interruptible(&proc->wait); if (ret < 0) { - if (copy_to_user_preempt_disabled(ubuf, &bwr, sizeof(bwr))) + if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto out; } @@ -3281,7 +3209,7 @@ static int binder_ioctl_write_read(struct file *filp, proc->pid, thread->pid, (u64)bwr.write_consumed, (u64)bwr.write_size, (u64)bwr.read_consumed, (u64)bwr.read_size); - if (copy_to_user_preempt_disabled(ubuf, &bwr, sizeof(bwr))) { + if (copy_to_user(ubuf, &bwr, sizeof(bwr))) { ret = -EFAULT; goto out; } @@ -3362,7 +3290,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto err; break; case BINDER_SET_MAX_THREADS: - if (copy_from_user_preempt_disabled(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { + if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { ret = -EINVAL; goto err; } @@ -3385,8 +3313,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = -EINVAL; goto err; } - if (put_user_preempt_disabled(BINDER_CURRENT_PROTOCOL_VERSION, &ver->protocol_version)) { - ret = -EINVAL; + if (put_user(BINDER_CURRENT_PROTOCOL_VERSION, + &ver->protocol_version)) { + ret = -EINVAL; goto err; } break; @@ -3447,7 +3376,6 @@ static const struct vm_operations_struct binder_vm_ops = { static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; - struct vm_struct *area; struct binder_proc *proc = filp->private_data; const char *failure_string; @@ -3508,11 +3436,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; - /* binder_update_page_range assumes preemption is disabled */ - preempt_disable(); - ret = binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma); - preempt_enable_no_resched(); - if (ret) { + if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) { ret = -ENOMEM; failure_string = "alloc small buf"; goto err_alloc_small_buf_failed; @@ -3803,12 +3727,9 @@ static void binder_deferred_func(struct work_struct *work) int defer; do { - trace_binder_lock(__func__); - mutex_lock(&context->binder_main_lock); - trace_binder_locked(__func__); + binder_lock(context, __func__); mutex_lock(&context->binder_deferred_lock); - preempt_disable(); if (!hlist_empty(&context->binder_deferred_list)) { proc = hlist_entry(context->binder_deferred_list.first, struct binder_proc, deferred_work_node); @@ -3834,9 +3755,7 @@ static void binder_deferred_func(struct work_struct *work) if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ - trace_binder_unlock(__func__); - mutex_unlock(&context->binder_main_lock); - preempt_enable_no_resched(); + binder_unlock(context, __func__); if (files) put_files_struct(files); } while (proc); From 6ce32a69b7f19cb98df4b70f382d7b8bb3f142cd Mon Sep 17 00:00:00 2001 From: Blagovest Kolenichev Date: Wed, 26 Jul 2017 07:35:55 -0700 Subject: [PATCH 176/176] android: binder: Revert duplicated changes for next binder merge This is a preparation change for merging binder from android-4.4@59ff2e1 (v4.4.78). Code matching changes below will be re-added properly with upcoming change: 0cebb40 FROMLIST: binder: Use wake up hint for synchronous transactions. Reverted changes: ================= e3ba00c android: binder: Don't use sched_preempt_enable_no_resched. The correct function is prempt_enable_no_resched(). The other function is reserved for the scheduler core. Change-Id: Ib36697de003f6a59a608a0024d5351dc15ff8715 Signed-off-by: Todd Kjos Git-commit: 776e5bca6446b3aac03b4685b4f4f72446ddcba0 Git-repo: https://android.googlesource.com/kernel/msm [odhyade@codeaurora.org: resolve trivial merge conflicts] Signed-off-by: Omprakash Dhyade 3956eab android: binder: Use wake up hint for synchronous transactions. Use wake_up_interruptible_sync() to hint to the scheduler binder transactions are synchronous wakeups. Disable premption while waking to avoid ping-ponging on the binder lock. Change-Id: Ic406a232d0873662f80148e37acefe5243d912a0 Signed-off-by: Todd Kjos Git-commit: 443c026e90820170aa3db2c21d2933ae5922f900 Git-repo: https://android.googlesource.com/kernel/msm Signed-off-by: Omprakash Dhyade Change-Id: I51a11f4325ce22aaa9666efedec1ce51d1d7036a Signed-off-by: Blagovest Kolenichev --- drivers/android/binder.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 573e9795b236..f33332eb1d5f 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2213,16 +2213,8 @@ static void binder_transaction(struct binder_proc *proc, list_add_tail(&t->work.entry, target_list); tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; list_add_tail(&tcomplete->entry, &thread->todo); - if (target_wait) { - if (reply || !(t->flags & TF_ONE_WAY)) { - preempt_disable(); - wake_up_interruptible_sync(target_wait); - preempt_enable_no_resched(); - } - else { - wake_up_interruptible(target_wait); - } - } + if (target_wait) + wake_up_interruptible(target_wait); return; err_translate_failed: