From 258a8f519a47a2b3c0a42940182715fc0de690e7 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 29 Aug 2016 17:31:10 -0700 Subject: [PATCH 01/38] UPSTREAM: Input: powermate - fix oops with malicious USB descriptors The powermate driver expects at least one valid USB endpoint in its probe function. If given malicious descriptors that specify 0 for the number of endpoints, it will crash. Validate the number of endpoints on the interface before using them. The full report for this issue can be found here: http://seclists.org/bugtraq/2016/Mar/85 BUG: 28242610 Reported-by: Ralf Spenneberg Signed-off-by: Josh Boyer Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: I1cb956a35f3bba73324240d5bd0a029f49d3c456 --- drivers/input/misc/powermate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c index 63b539d3daba..84909a12ff36 100644 --- a/drivers/input/misc/powermate.c +++ b/drivers/input/misc/powermate.c @@ -307,6 +307,9 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i int error = -ENOMEM; interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 1) + return -EINVAL; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -EIO; From 125d90ca2fd862ecb119a085d95f813a6263d218 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 29 Aug 2016 17:33:52 -0700 Subject: [PATCH 02/38] UPSTREAM: USB: cypress_m8: add endpoint sanity check commit c55aee1bf0e6b6feec8b2927b43f7a09a6d5f754 upstream. An attack using missing endpoints exists. CVE-2016-3137 BUG: 28242610 Signed-off-by: Oliver Neukum Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: I1cc7957a5924175d24f12fdc41162ece67c907e5 --- drivers/usb/serial/cypress_m8.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 01bf53392819..244acb1299a9 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -447,6 +447,11 @@ static int cypress_generic_port_probe(struct usb_serial_port *port) struct usb_serial *serial = port->serial; struct cypress_private *priv; + if (!port->interrupt_out_urb || !port->interrupt_in_urb) { + dev_err(&port->dev, "required endpoint is missing\n"); + return -ENODEV; + } + priv = kzalloc(sizeof(struct cypress_private), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -606,12 +611,6 @@ static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port) cypress_set_termios(tty, port, &priv->tmp_termios); /* setup the port and start reading from the device */ - if (!port->interrupt_in_urb) { - dev_err(&port->dev, "%s - interrupt_in_urb is empty!\n", - __func__); - return -1; - } - usb_fill_int_urb(port->interrupt_in_urb, serial->dev, usb_rcvintpipe(serial->dev, port->interrupt_in_endpointAddress), port->interrupt_in_urb->transfer_buffer, From ee5c7a9739c6ddd0d54385c313b5d6f87652cd63 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 30 Aug 2016 13:33:55 -0700 Subject: [PATCH 03/38] UPSTREAM: USB: mct_u232: add sanity checking in probe commit 4e9a0b05257f29cf4b75f3209243ed71614d062e upstream. An attack using the lack of sanity checking in probe is known. This patch checks for the existence of a second port. CVE-2016-3136 BUG: 28242610 Signed-off-by: Oliver Neukum [johan: add error message ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: I284ad648c2087c34a098d67e0cc6d948a568413c --- drivers/usb/serial/mct_u232.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index fd707d6a10e2..89726f702202 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -376,14 +376,21 @@ static void mct_u232_msr_to_state(struct usb_serial_port *port, static int mct_u232_port_probe(struct usb_serial_port *port) { + struct usb_serial *serial = port->serial; struct mct_u232_private *priv; + /* check first to simplify error handling */ + if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) { + dev_err(&port->dev, "expected endpoint missing\n"); + return -ENODEV; + } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* Use second interrupt-in endpoint for reading. */ - priv->read_urb = port->serial->port[1]->interrupt_in_urb; + priv->read_urb = serial->port[1]->interrupt_in_urb; priv->read_urb->context = port; spin_lock_init(&priv->lock); From ae6790121ceb4608e1a1dec10e285c8d661463d5 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 30 Aug 2016 13:35:32 -0700 Subject: [PATCH 04/38] UPSTREAM: USB: usb_driver_claim_interface: add sanity checking commit 0b818e3956fc1ad976bee791eadcbb3b5fec5bfd upstream. Attacks that trick drivers into passing a NULL pointer to usb_driver_claim_interface() using forged descriptors are known. This thwarts them by sanity checking. BUG: 28242610 Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: Ib43ec5edb156985a9db941785a313f6801df092a --- drivers/usb/core/driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 56593a9a8726..2057d91d8336 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -502,11 +502,15 @@ static int usb_unbind_interface(struct device *dev) int usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void *priv) { - struct device *dev = &iface->dev; + struct device *dev; struct usb_device *udev; int retval = 0; int lpm_disable_error; + if (!iface) + return -ENODEV; + + dev = &iface->dev; if (dev->driver) return -EBUSY; From b24e99b3ae1e3860d374d0ed497a371100533e83 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 30 Aug 2016 13:37:07 -0700 Subject: [PATCH 05/38] UPSTREAM: USB: iowarrior: fix oops with malicious USB descriptors commit 4ec0ef3a82125efc36173062a50624550a900ae0 upstream. The iowarrior driver expects at least one valid endpoint. If given malicious descriptors that specify 0 for the number of endpoints, it will crash in the probe function. Ensure there is at least one endpoint on the interface before using it. The full report of this issue can be found here: http://seclists.org/bugtraq/2016/Mar/87 BUG: 28242610 Reported-by: Ralf Spenneberg Signed-off-by: Josh Boyer Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: If5161c23928e9ef77cb3359cba9b36622b1908df --- drivers/usb/misc/iowarrior.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index c6bfd13f6c92..1950e87b4219 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -787,6 +787,12 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); + if (iface_desc->desc.bNumEndpoints < 1) { + dev_err(&interface->dev, "Invalid number of endpoints\n"); + retval = -EINVAL; + goto error; + } + /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; From 6087158f06e3cc66897cb4571cb673ac9fffb12d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 30 Aug 2016 13:39:02 -0700 Subject: [PATCH 06/38] UPSTREAM: USB: cdc-acm: more sanity checking commit 8835ba4a39cf53f705417b3b3a94eb067673f2c9 upstream. An attack has become available which pretends to be a quirky device circumventing normal sanity checks and crashes the kernel by an insufficient number of interfaces. This patch adds a check to the code path for quirky devices. BUG: 28242610 Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman Signed-off-by: Badhri Jagan Sridharan Change-Id: I9a5f7f3c704b65e866335054f470451fcfae9d1c --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 26ca4f910cb0..a7732f80a912 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1113,6 +1113,9 @@ static int acm_probe(struct usb_interface *intf, if (quirks == NO_UNION_NORMAL) { data_interface = usb_ifnum_to_if(usb_dev, 1); control_interface = usb_ifnum_to_if(usb_dev, 0); + /* we would crash */ + if (!data_interface || !control_interface) + return -ENODEV; goto skip_normal_probe; } From 4547c0f93518197834f7fc11c6558b566b7e14c8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 31 Aug 2016 09:52:16 -0700 Subject: [PATCH 07/38] ANDROID: rcu_sync: Export rcu_sync_lockdep_assert x86_64:allmodconfig fails to build with the following error. ERROR: "rcu_sync_lockdep_assert" [kernel/locking/locktorture.ko] undefined! Introduced by commit 3228c5eb7af2 ("RFC: FROMLIST: locking/percpu-rwsem: Optimize readers and reduce global impact"). The applied upstream version exports the missing symbol, so let's do the same. Change-Id: If4e516715c3415fe8c82090f287174857561550d Fixes: 3228c5eb7af2 ("RFC: FROMLIST: locking/percpu-rwsem: Optimize ...") Signed-off-by: Guenter Roeck --- kernel/rcu/sync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index e358313a0d6c..b49cf3ac2d47 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -68,6 +68,7 @@ void rcu_sync_lockdep_assert(struct rcu_sync *rsp) RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(), "suspicious rcu_sync_is_idle() usage"); } +EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert); #endif /** From a49dcf2e745c25a67a76d8a1e9658c16827c9885 Mon Sep 17 00:00:00 2001 From: Yongqin Liu Date: Thu, 1 Sep 2016 22:06:04 +0530 Subject: [PATCH 08/38] ANDROID: base-cfg: enable SECCOMP config Enable following seccomp configs CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y Otherwise we will get mediacode error like this on Android N: E /system/bin/mediaextractor: libminijail: prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER): Invalid argument Change-Id: I2477b6a2cfdded5c0ebf6ffbb6150b0e5fe2ba12 Signed-off-by: Yongqin Liu Signed-off-by: Amit Pundir --- android/configs/android-base.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 6496bb3961a2..288bab2d858d 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -141,6 +141,8 @@ CONFIG_PROFILING=y CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y +CONFIG_SECCOMP=y +CONFIG_SECCOMP_FILTER=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y From af4104512cdb48d5eede620871d6a964a8680253 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 6 Apr 2016 14:06:48 +0100 Subject: [PATCH 09/38] UPSTREAM: assoc_array: don't call compare_object() on a node (cherry picked from commit 8d4a2ec1e0b41b0cf9a0c5cd4511da7f8e4f3de2) Changes since V1: fixed the description and added KASan warning. In assoc_array_insert_into_terminal_node(), we call the compare_object() method on all non-empty slots, even when they're not leaves, passing a pointer to an unexpected structure to compare_object(). Currently it causes an out-of-bound read access in keyring_compare_object detected by KASan (see below). The issue is easily reproduced with keyutils testsuite. Only call compare_object() when the slot is a leave. KASan warning: ================================================================== BUG: KASAN: slab-out-of-bounds in keyring_compare_object+0x213/0x240 at addr ffff880060a6f838 Read of size 8 by task keyctl/1655 ============================================================================= BUG kmalloc-192 (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in assoc_array_insert+0xfd0/0x3a60 age=69 cpu=1 pid=1647 ___slab_alloc+0x563/0x5c0 __slab_alloc+0x51/0x90 kmem_cache_alloc_trace+0x263/0x300 assoc_array_insert+0xfd0/0x3a60 __key_link_begin+0xfc/0x270 key_create_or_update+0x459/0xaf0 SyS_add_key+0x1ba/0x350 entry_SYSCALL_64_fastpath+0x12/0x76 INFO: Slab 0xffffea0001829b80 objects=16 used=8 fp=0xffff880060a6f550 flags=0x3fff8000004080 INFO: Object 0xffff880060a6f740 @offset=5952 fp=0xffff880060a6e5d1 Bytes b4 ffff880060a6f730: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f740: d1 e5 a6 60 00 88 ff ff 0e 00 00 00 00 00 00 00 ...`............ Object ffff880060a6f750: 02 cf 8e 60 00 88 ff ff 02 c0 8e 60 00 88 ff ff ...`.......`.... Object ffff880060a6f760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f770: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f790: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7d0: 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 0 PID: 1655 Comm: keyctl Tainted: G B 4.5.0-rc4-kasan+ #291 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 000000001b2800b4 ffff880060a179e0 ffffffff81b60491 ffff88006c802900 ffff880060a6f740 ffff880060a17a10 ffffffff815e2969 ffff88006c802900 ffffea0001829b80 ffff880060a6f740 ffff880060a6e650 Call Trace: [] dump_stack+0x85/0xc4 [] print_trailer+0xf9/0x150 [] object_err+0x34/0x40 [] kasan_report_error+0x230/0x550 [] ? keyring_get_key_chunk+0x13e/0x210 [] __asan_report_load_n_noabort+0x5d/0x70 [] ? keyring_compare_object+0x213/0x240 [] keyring_compare_object+0x213/0x240 [] assoc_array_insert+0x86c/0x3a60 [] ? assoc_array_cancel_edit+0x70/0x70 [] ? __key_link_begin+0x20d/0x270 [] __key_link_begin+0xfc/0x270 [] key_create_or_update+0x459/0xaf0 [] ? trace_hardirqs_on+0xd/0x10 [] ? key_type_lookup+0xc0/0xc0 [] ? lookup_user_key+0x13d/0xcd0 [] ? memdup_user+0x53/0x80 [] SyS_add_key+0x1ba/0x350 [] ? key_get_type_from_user.constprop.6+0xa0/0xa0 [] ? retint_user+0x18/0x23 [] ? trace_hardirqs_on_caller+0x3fe/0x580 [] ? trace_hardirqs_on_thunk+0x17/0x19 [] entry_SYSCALL_64_fastpath+0x12/0x76 Memory state around the buggy address: ffff880060a6f700: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00 ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc >ffff880060a6f800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff880060a6f880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880060a6f900: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: Jerome Marchand Signed-off-by: David Howells cc: stable@vger.kernel.org Change-Id: I903935a221a5b9fb14cec14ef64bd2b6fa8eb222 Bug: 30513364 --- lib/assoc_array.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 03dd576e6773..59fd7c0b119c 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, free_slot = i; continue; } - if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + if (assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; From 687549af9e5df34f8071c32aced7efae0b46fb58 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 29 Jul 2016 10:40:31 +0200 Subject: [PATCH 10/38] UPSTREAM: block: fix use-after-free in seq file (cherry picked from commit 77da160530dd1dc94f6ae15a981f24e5f0021e84) I got a KASAN report of use-after-free: ================================================================== BUG: KASAN: use-after-free in klist_iter_exit+0x61/0x70 at addr ffff8800b6581508 Read of size 8 by task trinity-c1/315 ============================================================================= BUG kmalloc-32 (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in disk_seqf_start+0x66/0x110 age=144 cpu=1 pid=315 ___slab_alloc+0x4f1/0x520 __slab_alloc.isra.58+0x56/0x80 kmem_cache_alloc_trace+0x260/0x2a0 disk_seqf_start+0x66/0x110 traverse+0x176/0x860 seq_read+0x7e3/0x11a0 proc_reg_read+0xbc/0x180 do_loop_readv_writev+0x134/0x210 do_readv_writev+0x565/0x660 vfs_readv+0x67/0xa0 do_preadv+0x126/0x170 SyS_preadv+0xc/0x10 do_syscall_64+0x1a1/0x460 return_from_SYSCALL_64+0x0/0x6a INFO: Freed in disk_seqf_stop+0x42/0x50 age=160 cpu=1 pid=315 __slab_free+0x17a/0x2c0 kfree+0x20a/0x220 disk_seqf_stop+0x42/0x50 traverse+0x3b5/0x860 seq_read+0x7e3/0x11a0 proc_reg_read+0xbc/0x180 do_loop_readv_writev+0x134/0x210 do_readv_writev+0x565/0x660 vfs_readv+0x67/0xa0 do_preadv+0x126/0x170 SyS_preadv+0xc/0x10 do_syscall_64+0x1a1/0x460 return_from_SYSCALL_64+0x0/0x6a CPU: 1 PID: 315 Comm: trinity-c1 Tainted: G B 4.7.0+ #62 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 ffffea0002d96000 ffff880119b9f918 ffffffff81d6ce81 ffff88011a804480 ffff8800b6581500 ffff880119b9f948 ffffffff8146c7bd ffff88011a804480 ffffea0002d96000 ffff8800b6581500 fffffffffffffff4 ffff880119b9f970 Call Trace: [] dump_stack+0x65/0x84 [] print_trailer+0x10d/0x1a0 [] object_err+0x2f/0x40 [] kasan_report_error+0x221/0x520 [] __asan_report_load8_noabort+0x3e/0x40 [] klist_iter_exit+0x61/0x70 [] class_dev_iter_exit+0x9/0x10 [] disk_seqf_stop+0x3a/0x50 [] seq_read+0x4b2/0x11a0 [] proc_reg_read+0xbc/0x180 [] do_loop_readv_writev+0x134/0x210 [] do_readv_writev+0x565/0x660 [] vfs_readv+0x67/0xa0 [] do_preadv+0x126/0x170 [] SyS_preadv+0xc/0x10 This problem can occur in the following situation: open() - pread() - .seq_start() - iter = kmalloc() // succeeds - seqf->private = iter - .seq_stop() - kfree(seqf->private) - pread() - .seq_start() - iter = kmalloc() // fails - .seq_stop() - class_dev_iter_exit(seqf->private) // boom! old pointer As the comment in disk_seqf_stop() says, stop is called even if start failed, so we need to reinitialise the private pointer to NULL when seq iteration stops. An alternative would be to set the private pointer to NULL when the kmalloc() in disk_seqf_start() fails. Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Acked-by: Tejun Heo Signed-off-by: Jens Axboe Change-Id: I07b33f4b38341f60a37806cdd45b0a0c3ab4d84d Bug: 30942273 --- block/genhd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/genhd.c b/block/genhd.c index 817c67c9e45e..82bc52cad1c1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -831,6 +831,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v) if (iter) { class_dev_iter_exit(iter); kfree(iter); + seqf->private = NULL; } } From c7a407d41ed5d2c6aa0c1892e1d9ebee58d5a928 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 28 Jan 2016 09:22:44 -0200 Subject: [PATCH 11/38] UPSTREAM: [media] xc2028: avoid use after free (cherry picked from commit 8dfbcc4351a0b6d2f2d77f367552f48ffefafe18) If struct xc2028_config is passed without a firmware name, the following trouble may happen: [11009.907205] xc2028 5-0061: type set to XCeive xc2028/xc3028 tuner [11009.907491] ================================================================== [11009.907750] BUG: KASAN: use-after-free in strcmp+0x96/0xb0 at addr ffff8803bd78ab40 [11009.907992] Read of size 1 by task modprobe/28992 [11009.907994] ============================================================================= [11009.907997] BUG kmalloc-16 (Tainted: G W ): kasan: bad access detected [11009.907999] ----------------------------------------------------------------------------- [11009.908008] INFO: Allocated in xhci_urb_enqueue+0x214/0x14c0 [xhci_hcd] age=0 cpu=3 pid=28992 [11009.908012] ___slab_alloc+0x581/0x5b0 [11009.908014] __slab_alloc+0x51/0x90 [11009.908017] __kmalloc+0x27b/0x350 [11009.908022] xhci_urb_enqueue+0x214/0x14c0 [xhci_hcd] [11009.908026] usb_hcd_submit_urb+0x1e8/0x1c60 [11009.908029] usb_submit_urb+0xb0e/0x1200 [11009.908032] usb_serial_generic_write_start+0xb6/0x4c0 [11009.908035] usb_serial_generic_write+0x92/0xc0 [11009.908039] usb_console_write+0x38a/0x560 [11009.908045] call_console_drivers.constprop.14+0x1ee/0x2c0 [11009.908051] console_unlock+0x40d/0x900 [11009.908056] vprintk_emit+0x4b4/0x830 [11009.908061] vprintk_default+0x1f/0x30 [11009.908064] printk+0x99/0xb5 [11009.908067] kasan_report_error+0x10a/0x550 [11009.908070] __asan_report_load1_noabort+0x43/0x50 [11009.908074] INFO: Freed in xc2028_set_config+0x90/0x630 [tuner_xc2028] age=1 cpu=3 pid=28992 [11009.908077] __slab_free+0x2ec/0x460 [11009.908080] kfree+0x266/0x280 [11009.908083] xc2028_set_config+0x90/0x630 [tuner_xc2028] [11009.908086] xc2028_attach+0x310/0x8a0 [tuner_xc2028] [11009.908090] em28xx_attach_xc3028.constprop.7+0x1f9/0x30d [em28xx_dvb] [11009.908094] em28xx_dvb_init.part.3+0x8e4/0x5cf4 [em28xx_dvb] [11009.908098] em28xx_dvb_init+0x81/0x8a [em28xx_dvb] [11009.908101] em28xx_register_extension+0xd9/0x190 [em28xx] [11009.908105] em28xx_dvb_register+0x10/0x1000 [em28xx_dvb] [11009.908108] do_one_initcall+0x141/0x300 [11009.908111] do_init_module+0x1d0/0x5ad [11009.908114] load_module+0x6666/0x9ba0 [11009.908117] SyS_finit_module+0x108/0x130 [11009.908120] entry_SYSCALL_64_fastpath+0x16/0x76 [11009.908123] INFO: Slab 0xffffea000ef5e280 objects=25 used=25 fp=0x (null) flags=0x2ffff8000004080 [11009.908126] INFO: Object 0xffff8803bd78ab40 @offset=2880 fp=0x0000000000000001 [11009.908130] Bytes b4 ffff8803bd78ab30: 01 00 00 00 2a 07 00 00 9d 28 00 00 01 00 00 00 ....*....(...... [11009.908133] Object ffff8803bd78ab40: 01 00 00 00 00 00 00 00 b0 1d c3 6a 00 88 ff ff ...........j.... [11009.908137] CPU: 3 PID: 28992 Comm: modprobe Tainted: G B W 4.5.0-rc1+ #43 [11009.908140] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0350.2015.0812.1722 08/12/2015 [11009.908142] ffff8803bd78a000 ffff8802c273f1b8 ffffffff81932007 ffff8803c6407a80 [11009.908148] ffff8802c273f1e8 ffffffff81556759 ffff8803c6407a80 ffffea000ef5e280 [11009.908153] ffff8803bd78ab40 dffffc0000000000 ffff8802c273f210 ffffffff8155ccb4 [11009.908158] Call Trace: [11009.908162] [] dump_stack+0x4b/0x64 [11009.908165] [] print_trailer+0xf9/0x150 [11009.908168] [] object_err+0x34/0x40 [11009.908171] [] kasan_report_error+0x230/0x550 [11009.908175] [] ? trace_hardirqs_off_caller+0x21/0x290 [11009.908179] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908182] [] __asan_report_load1_noabort+0x43/0x50 [11009.908185] [] ? __asan_register_globals+0x50/0xa0 [11009.908189] [] ? strcmp+0x96/0xb0 [11009.908192] [] strcmp+0x96/0xb0 [11009.908196] [] xc2028_set_config+0x15c/0x630 [tuner_xc2028] [11009.908200] [] xc2028_attach+0x310/0x8a0 [tuner_xc2028] [11009.908203] [] ? memset+0x28/0x30 [11009.908206] [] ? xc2028_set_config+0x630/0x630 [tuner_xc2028] [11009.908211] [] em28xx_attach_xc3028.constprop.7+0x1f9/0x30d [em28xx_dvb] [11009.908215] [] ? em28xx_dvb_init.part.3+0x37c/0x5cf4 [em28xx_dvb] [11009.908219] [] ? hauppauge_hvr930c_init+0x487/0x487 [em28xx_dvb] [11009.908222] [] ? lgdt330x_attach+0x1cc/0x370 [lgdt330x] [11009.908226] [] ? i2c_read_demod_bytes.isra.2+0x210/0x210 [lgdt330x] [11009.908230] [] ? ref_module.part.15+0x10/0x10 [11009.908233] [] ? module_assert_mutex_or_preempt+0x80/0x80 [11009.908238] [] em28xx_dvb_init.part.3+0x8e4/0x5cf4 [em28xx_dvb] [11009.908242] [] ? em28xx_attach_xc3028.constprop.7+0x30d/0x30d [em28xx_dvb] [11009.908245] [] ? string+0x14d/0x1f0 [11009.908249] [] ? symbol_string+0xff/0x1a0 [11009.908253] [] ? uuid_string+0x6f0/0x6f0 [11009.908257] [] ? __kernel_text_address+0x7e/0xa0 [11009.908260] [] ? print_context_stack+0x7f/0xf0 [11009.908264] [] ? __module_address+0xb6/0x360 [11009.908268] [] ? is_ftrace_trampoline+0x99/0xe0 [11009.908271] [] ? __kernel_text_address+0x7e/0xa0 [11009.908275] [] ? debug_check_no_locks_freed+0x290/0x290 [11009.908278] [] ? dump_trace+0x11b/0x300 [11009.908282] [] ? em28xx_register_extension+0x23/0x190 [em28xx] [11009.908285] [] ? trace_hardirqs_off_caller+0x21/0x290 [11009.908289] [] ? trace_hardirqs_on_caller+0x16/0x590 [11009.908292] [] ? trace_hardirqs_on+0xd/0x10 [11009.908296] [] ? em28xx_register_extension+0x23/0x190 [em28xx] [11009.908299] [] ? mutex_trylock+0x400/0x400 [11009.908302] [] ? do_one_initcall+0x131/0x300 [11009.908306] [] ? call_rcu_sched+0x17/0x20 [11009.908309] [] ? put_object+0x48/0x70 [11009.908314] [] em28xx_dvb_init+0x81/0x8a [em28xx_dvb] [11009.908317] [] em28xx_register_extension+0xd9/0x190 [em28xx] [11009.908320] [] ? 0xffffffffa0150000 [11009.908324] [] em28xx_dvb_register+0x10/0x1000 [em28xx_dvb] [11009.908327] [] do_one_initcall+0x141/0x300 [11009.908330] [] ? try_to_run_init_process+0x40/0x40 [11009.908333] [] ? trace_hardirqs_on_caller+0x16/0x590 [11009.908337] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908340] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908343] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908346] [] ? __asan_register_globals+0x87/0xa0 [11009.908350] [] do_init_module+0x1d0/0x5ad [11009.908353] [] load_module+0x6666/0x9ba0 [11009.908356] [] ? symbol_put_addr+0x50/0x50 [11009.908361] [] ? em28xx_dvb_init.part.3+0x5989/0x5cf4 [em28xx_dvb] [11009.908366] [] ? module_frob_arch_sections+0x20/0x20 [11009.908369] [] ? open_exec+0x50/0x50 [11009.908374] [] ? ns_capable+0x5b/0xd0 [11009.908377] [] SyS_finit_module+0x108/0x130 [11009.908379] [] ? SyS_init_module+0x1f0/0x1f0 [11009.908383] [] ? lockdep_sys_exit_thunk+0x12/0x14 [11009.908394] [] entry_SYSCALL_64_fastpath+0x16/0x76 [11009.908396] Memory state around the buggy address: [11009.908398] ffff8803bd78aa00: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908401] ffff8803bd78aa80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908403] >ffff8803bd78ab00: fc fc fc fc fc fc fc fc 00 00 fc fc fc fc fc fc [11009.908405] ^ [11009.908407] ffff8803bd78ab80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908409] ffff8803bd78ac00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908411] ================================================================== In order to avoid it, let's set the cached value of the firmware name to NULL after freeing it. While here, return an error if the memory allocation fails. Signed-off-by: Mauro Carvalho Chehab Change-Id: I945c841dcfb45de2056267e4aa50bbe176b527cf Bug: 30946097 --- drivers/media/tuners/tuner-xc2028.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 4e941f00b600..082ff5608455 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1403,11 +1403,12 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) * in order to avoid troubles during device release. */ kfree(priv->ctrl.fname); + priv->ctrl.fname = NULL; memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); if (p->fname) { priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); if (priv->ctrl.fname == NULL) - rc = -ENOMEM; + return -ENOMEM; } /* From 30439e51fc6f9ab169f7cb9e8e88faf7fb27ae99 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Feb 2016 13:34:00 -0200 Subject: [PATCH 12/38] UPSTREAM: [media] xc2028: unlock on error in xc2028_set_config() (cherry picked from commit 210bd104c6acd31c3c6b8b075b3f12d4a9f6b60d) We have to unlock before returning -ENOMEM. Fixes: 8dfbcc4351a0 ('[media] xc2028: avoid use after free') Signed-off-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab Change-Id: I7b6ba9fde5c6e29467e6de23d398af2fe56e2547 Bug: 30946097 --- drivers/media/tuners/tuner-xc2028.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 082ff5608455..317ef63ee789 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1407,8 +1407,10 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); if (p->fname) { priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); - if (priv->ctrl.fname == NULL) - return -ENOMEM; + if (priv->ctrl.fname == NULL) { + rc = -ENOMEM; + goto unlock; + } } /* @@ -1440,6 +1442,7 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) } else priv->state = XC2028_WAITING_FIRMWARE; } +unlock: mutex_unlock(&priv->lock); return rc; From a4e59955acf0ae25d805e0a0d251ac8f4b1808e7 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 5 May 2016 16:22:26 -0700 Subject: [PATCH 13/38] UPSTREAM: proc: prevent accessing /proc//environ until it's ready (cherry picked from commit 8148a73c9901a8794a50f950083c00ccf97d43b3) If /proc//environ gets read before the envp[] array is fully set up in create_{aout,elf,elf_fdpic,flat}_tables(), we might end up trying to read more bytes than are actually written, as env_start will already be set but env_end will still be zero, making the range calculation underflow, allowing to read beyond the end of what has been written. Fix this as it is done for /proc//cmdline by testing env_end for zero. It is, apparently, intentionally set last in create_*_tables(). This bug was found by the PaX size_overflow plugin that detected the arithmetic underflow of 'this_len = env_end - (env_start + src)' when env_end is still zero. The expected consequence is that userland trying to access /proc//environ of a not yet fully set up process may get inconsistent data as we're in the middle of copying in the environment variables. Fixes: https://forums.grsecurity.net/viewtopic.php?f=3&t=4363 Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=116461 Signed-off-by: Mathias Krause Cc: Emese Revfy Cc: Pax Team Cc: Al Viro Cc: Mateusz Guzik Cc: Alexey Dobriyan Cc: Cyrill Gorcunov Cc: Jarod Wilson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: Ia2f58d48c15478ed4b6e237b63e704c70ff21e96 Bug: 30951939 --- fs/proc/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 49348349e156..df715a095328 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -954,7 +954,8 @@ static ssize_t environ_read(struct file *file, char __user *buf, int ret = 0; struct mm_struct *mm = file->private_data; - if (!mm) + /* Ensure the process spawned far enough to have an environment. */ + if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_TEMPORARY); From 8e975e71f5123f4d2567a98f01641f7c02f8d2a2 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 2 Sep 2016 10:13:21 +0530 Subject: [PATCH 14/38] ANDROID: base-cfg: drop SECCOMP_FILTER config Don't need to set SECCOMP_FILTER explicitly since CONFIG_SECCOMP=y will select that config anyway. Fixes: a49dcf2e745c ("ANDROID: base-cfg: enable SECCOMP config") Change-Id: Iff18ed4d2db5a55b9f9480d5ecbeef7b818b3837 Signed-off-by: Amit Pundir --- android/configs/android-base.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 288bab2d858d..34fa64a669ac 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -142,7 +142,6 @@ CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y CONFIG_SECCOMP=y -CONFIG_SECCOMP_FILTER=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y From d25e3081baa6f22564c1d87b7650c07c8c0b12b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 05:56:26 -0700 Subject: [PATCH 15/38] UPSTREAM: tcp: fix use after free in tcp_xmit_retransmit_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit bb1fceca22492109be12640d49f5ea5a544c6bb4) When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the tail of the write queue using tcp_add_write_queue_tail() Then it attempts to copy user data into this fresh skb. If the copy fails, we undo the work and remove the fresh skb. Unfortunately, this undo lacks the change done to tp->highest_sack and we can leave a dangling pointer (to a freed skb) Later, tcp_xmit_retransmit_queue() can dereference this pointer and access freed memory. For regular kernels where memory is not unmapped, this might cause SACK bugs because tcp_highest_sack_seq() is buggy, returning garbage instead of tp->snd_nxt, but with various debug features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel. This bug was found by Marco Grassi thanks to syzkaller. Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb") Reported-by: Marco Grassi Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Reviewed-by: Cong Wang Signed-off-by: David S. Miller Change-Id: I58bb02d6e4e399612e8580b9e02d11e661df82f5 Bug: 31183296 --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index bcce99a951f8..30d1194ff1c5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1513,6 +1513,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (sk->sk_send_head == skb_unlinked) sk->sk_send_head = NULL; + if (tcp_sk(sk)->highest_sack == skb_unlinked) + tcp_sk(sk)->highest_sack = NULL; } static inline void tcp_init_send_head(struct sock *sk) From 87518a45ae570ada6a1e404331a6cc910014e1a9 Mon Sep 17 00:00:00 2001 From: Mohamad Ayyash Date: Wed, 11 May 2016 13:18:35 -0700 Subject: [PATCH 16/38] BACKPORT: Don't show empty tag stats for unprivileged uids BUG: 27577101 BUG: 27532522 Signed-off-by: Mohamad Ayyash --- net/netfilter/xt_qtaguid.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e2e7d54f9bb1..3bf0c59dab2f 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1946,7 +1946,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) ); f_count = atomic_long_read( &sock_tag_entry->socket->file->f_count); - seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u " + seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u " "f_count=%lu\n", sock_tag_entry->sk, sock_tag_entry->tag, uid, @@ -2548,8 +2548,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, uid_t stat_uid = get_uid_from_tag(tag); struct proc_print_info *ppi = m->private; /* Detailed tags are not available to everybody */ - if (get_atag_from_tag(tag) && !can_read_other_uid_stats( - make_kuid(&init_user_ns,stat_uid))) { + if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u stats.gid=%u\n", From 8ac959cc06c8715cdc64aa88993b3a8b1ef61eaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2015 09:57:27 -0800 Subject: [PATCH 17/38] UPSTREAM: Add 'unsafe' user access functions for batched accesses The naming is meant to discourage random use: the helper functions are not really any more "unsafe" than the traditional double-underscore functions (which need the address range checking), but they do need even more infrastructure around them, and should not be used willy-nilly. In addition to checking the access range, these user access functions require that you wrap the user access with a "user_acess_{begin,end}()" around it. That allows architectures that implement kernel user access control (x86: SMAP, arm64: PAN) to do the user access control in the wrapping user_access_begin/end part, and then batch up the actual user space accesses using the new interfaces. The main (and hopefully only) use for these are for core generic access helpers, initially just the generic user string functions (strnlen_user() and strncpy_from_user()). Signed-off-by: Linus Torvalds Change-Id: Ic64efea41f97171bdbdabe3e531489aebd9b6fac (cherry picked from commit 5b24a7a2aa2040c8c50c3b71122901d01661ff78) Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/uaccess.h | 25 +++++++++++++++++++++++++ include/linux/uaccess.h | 7 +++++++ 2 files changed, 32 insertions(+) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 09b1b0ab94b7..6c4c39b137e5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -745,5 +745,30 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #undef __copy_from_user_overflow #undef __copy_to_user_overflow +/* + * The "unsafe" user accesses aren't really "unsafe", but the naming + * is a big fat warning: you have to not only do the access_ok() + * checking before using them, but you have to surround them with the + * user_access_begin/end() pair. + */ +#define user_access_begin() __uaccess_begin() +#define user_access_end() __uaccess_end() + +#define unsafe_put_user(x, ptr) \ +({ \ + int __pu_err; \ + __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ + __builtin_expect(__pu_err, 0); \ +}) + +#define unsafe_get_user(x, ptr) \ +({ \ + int __gu_err; \ + unsigned long __gu_val; \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + __builtin_expect(__gu_err, 0); \ +}) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 558129af828a..349557825428 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -111,4 +111,11 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define probe_kernel_address(addr, retval) \ probe_kernel_read(&retval, addr, sizeof(retval)) +#ifndef user_access_begin +#define user_access_begin() do { } while (0) +#define user_access_end() do { } while (0) +#define unsafe_get_user(x, ptr) __get_user(x, ptr) +#define unsafe_put_user(x, ptr) __put_user(x, ptr) +#endif + #endif /* __LINUX_UACCESS_H__ */ From c1932b10476e4d60f314bc6111101d1d101599b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2015 10:05:19 -0800 Subject: [PATCH 18/38] UPSTREAM: Use the new batched user accesses in generic user string handling This converts the generic user string functions to use the batched user access functions. It makes a big difference on Skylake, which is the first x86 microarchitecture to implement SMAP. The STAC/CLAC instructions are not very fast, and doing them for each access inside the loop that copies strings from user space (which is what the pathname handling does for every pathname the kernel uses, for example) is very inefficient. Signed-off-by: Linus Torvalds Change-Id: Ic39a686b4bb1ad9cd16ad0887bb669beed6fe8aa (cherry picked from commit 9fd4470ff4974c41b1db43c3b355b9085af9c12a) Signed-off-by: Sami Tolvanen --- lib/strncpy_from_user.c | 11 ++++++++--- lib/strnlen_user.c | 18 ++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index e0af6ff73d14..33840324138c 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -39,7 +39,7 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) break; *(unsigned long *)(dst+res) = c; if (has_zero(c, &data, &constants)) { @@ -55,7 +55,7 @@ byte_at_a_time: while (max) { char c; - if (unlikely(__get_user(c,src+res))) + if (unlikely(unsafe_get_user(c,src+res))) return -EFAULT; dst[res] = c; if (!c) @@ -107,7 +107,12 @@ long strncpy_from_user(char *dst, const char __user *src, long count) src_addr = (unsigned long)src; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strncpy_from_user(dst, src, count, max); + long retval; + + user_access_begin(); + retval = do_strncpy_from_user(dst, src, count, max); + user_access_end(); + return retval; } return -EFAULT; } diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 3a5f2b366d84..2625943625d7 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -45,7 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, src -= align; max += align; - if (unlikely(__get_user(c,(unsigned long __user *)src))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)src))) return 0; c |= aligned_byte_mask(align); @@ -61,7 +61,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) return 0; } res -= align; @@ -112,7 +112,12 @@ long strnlen_user(const char __user *str, long count) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, count, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, count, max); + user_access_end(); + return retval; } return 0; } @@ -141,7 +146,12 @@ long strlen_user(const char __user *str) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, ~0ul, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, ~0ul, max); + user_access_end(); + return retval; } return 0; } From fed752db02277e34e92262daac24ad4216e79c56 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 21:28:47 +0100 Subject: [PATCH 19/38] BACKPORT: ARM: 8583/1: mm: fix location of _etext The _etext position is defined to be the end of the kernel text code, and should not include any part of the data segments. This interferes with things that might check memory ranges and expect executable code up to _etext. Just to be conservative, leave the kernel resource as it was, using __init_begin instead of _etext as the end mark. Signed-off-by: Kees Cook Signed-off-by: Russell King Change-Id: Ida514d1359dbe6f782f562ce29b4ba09ae72bfc0 (cherry picked from commit 14c4a533e0996f95a0a64dfd0b6252d788cebc74) Signed-off-by: Sami Tolvanen --- arch/arm/kernel/setup.c | 2 +- arch/arm/kernel/vmlinux.lds.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20edd349d379..bf63b4693457 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -772,7 +772,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce48..be2ab6d3b91f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -120,6 +120,8 @@ SECTIONS #ifdef CONFIG_DEBUG_RODATA . = ALIGN(1< Date: Thu, 23 Jun 2016 15:53:17 +0200 Subject: [PATCH 20/38] BACKPORT: arm64: mm: fix location of _etext As Kees Cook notes in the ARM counterpart of this patch [0]: The _etext position is defined to be the end of the kernel text code, and should not include any part of the data segments. This interferes with things that might check memory ranges and expect executable code up to _etext. In particular, Kees is referring to the HARDENED_USERCOPY patch set [1], which rejects attempts to call copy_to_user() on kernel ranges containing executable code, but does allow access to the .rodata segment. Regardless of whether one may or may not agree with the distinction, it makes sense for _etext to have the same meaning across architectures. So let's put _etext where it belongs, between .text and .rodata, and fix up existing references to use __init_begin instead, which unlike _end_rodata includes the exception and notes sections as well. The _etext references in kaslr.c are left untouched, since its references to [_stext, _etext) are meant to capture potential jump instruction targets, and so disregarding .rodata is actually an improvement here. [0] http://article.gmane.org/gmane.linux.kernel/2245084 [1] http://thread.gmane.org/gmane.linux.kernel.hardened.devel/2502 Reported-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas Change-Id: I8f6582525217b9ca324f6a382ea52d30ce1d0dbd (cherry picked from commit 9fdc14c55cd6579d619ccd9d40982e0805e62b6d) Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/setup.c | 2 +- arch/arm64/kernel/vmlinux.lds.S | 3 ++- arch/arm64/mm/mmu.c | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479147db..7f0e7226795e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -201,7 +201,7 @@ static void __init request_standard_resources(void) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71426a78db12..f472809a7b45 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -114,11 +114,12 @@ SECTIONS } ALIGN_DEBUG_RO + _etext = .; /* End of text section */ + RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES ALIGN_DEBUG_RO - _etext = .; /* End of text and rodata section */ ALIGN_DEBUG_RO_MIN(PAGE_SIZE) __init_begin = .; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 873e363048c6..9a8dc4a79d8b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -337,7 +337,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end) end - kernel_x_end, PAGE_KERNEL); } - } #else static void __init __map_memblock(phys_addr_t start, phys_addr_t end) @@ -425,7 +424,7 @@ static void __init fixup_executable(void) void mark_rodata_ro(void) { create_mapping_late(__pa(_stext), (unsigned long)_stext, - (unsigned long)_etext - (unsigned long)_stext, + (unsigned long)__init_begin - (unsigned long)_stext, PAGE_KERNEL_ROX); } From 5f1b3400f0ccc2368dc444b5d9a7c93be2e700da Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Aug 2016 13:02:01 -0700 Subject: [PATCH 21/38] UPSTREAM: unsafe_[get|put]_user: change interface to use a error target label When I initially added the unsafe_[get|put]_user() helpers in commit 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses"), I made the mistake of modeling the interface on our traditional __[get|put]_user() functions, which return zero on success, or -EFAULT on failure. That interface is fairly easy to use, but it's actually fairly nasty for good code generation, since it essentially forces the caller to check the error value for each access. In particular, since the error handling is already internally implemented with an exception handler, and we already use "asm goto" for various other things, we could fairly easily make the error cases just jump directly to an error label instead, and avoid the need for explicit checking after each operation. So switch the interface to pass in an error label, rather than checking the error value in the caller. Best do it now before we start growing more users (the signal handling code in particular would be a good place to use the new interface). So rather than if (unsafe_get_user(x, ptr)) ... handle error .. the interface is now unsafe_get_user(x, ptr, label); where an error during the user mode fetch will now just cause a jump to 'label' in the caller. Right now the actual _implementation_ of this all still ends up being a "if (err) goto label", and does not take advantage of any exception label tricks, but for "unsafe_put_user()" in particular it should be fairly straightforward to convert to using the exception table model. Note that "unsafe_get_user()" is much harder to convert to a clever exception table model, because current versions of gcc do not allow the use of "asm goto" (for the exception) with output values (for the actual value to be fetched). But that is hopefully not a limitation in the long term. [ Also note that it might be a good idea to switch unsafe_get_user() to actually _return_ the value it fetches from user space, but this commit only changes the error handling semantics ] Signed-off-by: Linus Torvalds Change-Id: Ib905a84a04d46984320f6fd1056da4d72f3d6b53 (cherry picked from commit 1bd4403d86a1c06cb6cc9ac87664a0c9d3413d51) Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/uaccess.h | 16 ++++++++-------- include/linux/uaccess.h | 4 ++-- lib/strncpy_from_user.c | 8 ++++---- lib/strnlen_user.c | 7 +++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 6c4c39b137e5..5abdd0221fb4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -754,21 +754,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #define user_access_begin() __uaccess_begin() #define user_access_end() __uaccess_end() -#define unsafe_put_user(x, ptr) \ -({ \ +#define unsafe_put_user(x, ptr, err_label) \ +do { \ int __pu_err; \ __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ - __builtin_expect(__pu_err, 0); \ -}) + if (unlikely(__pu_err)) goto err_label; \ +} while (0) -#define unsafe_get_user(x, ptr) \ -({ \ +#define unsafe_get_user(x, ptr, err_label) \ +do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __builtin_expect(__gu_err, 0); \ -}) + if (unlikely(__gu_err)) goto err_label; \ +} while (0) #endif /* _ASM_X86_UACCESS_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 349557825428..f30c187ed785 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -114,8 +114,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #ifndef user_access_begin #define user_access_begin() do { } while (0) #define user_access_end() do { } while (0) -#define unsafe_get_user(x, ptr) __get_user(x, ptr) -#define unsafe_put_user(x, ptr) __put_user(x, ptr) +#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) +#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif #endif /* __LINUX_UACCESS_H__ */ diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 33840324138c..5a003a2ebd96 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -39,8 +39,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) - break; + unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); + *(unsigned long *)(dst+res) = c; if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); @@ -55,8 +55,7 @@ byte_at_a_time: while (max) { char c; - if (unlikely(unsafe_get_user(c,src+res))) - return -EFAULT; + unsafe_get_user(c,src+res, efault); dst[res] = c; if (!c) return res; @@ -75,6 +74,7 @@ byte_at_a_time: * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's an EFAULT. */ +efault: return -EFAULT; } diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 2625943625d7..8e105ed4df12 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, src -= align; max += align; - if (unlikely(unsafe_get_user(c,(unsigned long __user *)src))) - return 0; + unsafe_get_user(c, (unsigned long __user *)src, efault); c |= aligned_byte_mask(align); for (;;) { @@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); - if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) - return 0; + unsafe_get_user(c, (unsigned long __user *)(src+res), efault); } res -= align; @@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's 0. */ +efault: return 0; } From 1e701cdc5bbce1f5561ee25ff8709a18dc0b2282 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 19 Jul 2016 15:00:04 -0700 Subject: [PATCH 22/38] UPSTREAM: mm: Add is_migrate_cma_page Code such as hardened user copy[1] needs a way to tell if a page is CMA or not. Add is_migrate_cma_page in a similar way to is_migrate_isolate_page. [1]http://article.gmane.org/gmane.linux.kernel.mm/155238 Signed-off-by: Laura Abbott Signed-off-by: Kees Cook Change-Id: I1f9aa13d8d063038fa70b93282a836648fbb4f6d (cherry picked from commit 7c15d9bb8231f998ae7dc0b72415f5215459f7fb) Signed-off-by: Sami Tolvanen --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..bab4053fb795 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -65,8 +65,10 @@ enum { #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) +# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) #else # define is_migrate_cma(migratetype) false +# define is_migrate_cma_page(_page) false #endif #define for_each_migratetype_order(order, type) \ From c30d7340ee0b167a45cd8d6d5c48add8f62db9a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Jul 2016 16:19:48 -0700 Subject: [PATCH 23/38] BACKPORT: mm: Implement stack frame object validation This creates per-architecture function arch_within_stack_frames() that should validate if a given object is contained by a kernel stack frame. Initial implementation is on x86. This is based on code from PaX. Signed-off-by: Kees Cook Change-Id: I1f3b299bb8991d65dcdac6af85d633d4b7776df1 (cherry picked from commit 0f60a8efe4005ab5e65ce000724b04d4ca04a199) Signed-off-by: Sami Tolvanen --- arch/Kconfig | 9 ++++++ arch/x86/Kconfig | 1 + arch/x86/include/asm/thread_info.h | 44 ++++++++++++++++++++++++++++++ include/linux/thread_info.h | 9 ++++++ 4 files changed, 63 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 31a318a56d98..98f64ad1caf1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -423,6 +423,15 @@ config CC_STACKPROTECTOR_STRONG endchoice +config HAVE_ARCH_WITHIN_STACK_FRAMES + bool + help + An architecture should select this if it can walk the kernel stack + frames to determine if an object is part of either the arguments + or local variables (i.e. that it excludes saved return addresses, + and similar) by implementing an inline arch_within_stack_frames(), + which is used by CONFIG_HARDENED_USERCOPY. + config HAVE_CONTEXT_TRACKING bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ae3c83e8bde8..4786de7750b1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -88,6 +88,7 @@ config X86 select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_BPF_JIT if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c7b551028740..0c977fc124a7 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -177,6 +177,50 @@ static inline unsigned long current_stack_pointer(void) return sp; } +/* + * Walks up the stack frames to make sure that the specified object is + * entirely contained by a single stack frame. + * + * Returns: + * 1 if within a frame + * -1 if placed across a frame boundary (or outside stack) + * 0 unable to determine (no frame pointers, etc) + */ +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ +#if defined(CONFIG_FRAME_POINTER) + const void *frame = NULL; + const void *oldframe; + + oldframe = __builtin_frame_address(1); + if (oldframe) + frame = __builtin_frame_address(2); + /* + * low ----------------------------------------------> high + * [saved bp][saved ip][args][local vars][saved bp][saved ip] + * ^----------------^ + * allow copies only within here + */ + while (stack <= frame && frame < stackend) { + /* + * If obj + len extends past the last frame, this + * check won't pass and the next frame will be 0, + * causing us to bail out and correctly report + * the copy as invalid. + */ + if (obj + len <= frame) + return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1; + oldframe = frame; + frame = *(const void * const *)frame; + } + return -1; +#else + return 0; +#endif +} + #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ff307b548ed3..5ecb68e86968 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -145,6 +145,15 @@ static inline bool test_and_clear_restore_sigmask(void) #error "no set_restore_sigmask() provided and default one won't work" #endif +#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ + return 0; +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ From d677b3104d3c8e927211f20083a6c2e97e003b87 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 7 Jun 2016 11:05:33 -0700 Subject: [PATCH 24/38] BACKPORT: mm: Hardened usercopy This is the start of porting PAX_USERCOPY into the mainline kernel. This is the first set of features, controlled by CONFIG_HARDENED_USERCOPY. The work is based on code by PaX Team and Brad Spengler, and an earlier port from Casey Schaufler. Additional non-slab page tests are from Rik van Riel. This patch contains the logic for validating several conditions when performing copy_to_user() and copy_from_user() on the kernel object being copied to/from: - address range doesn't wrap around - address range isn't NULL or zero-allocated (with a non-zero copy size) - if on the slab allocator: - object size must be less than or equal to copy size (when check is implemented in the allocator, which appear in subsequent patches) - otherwise, object must not span page allocations (excepting Reserved and CMA ranges) - if on the stack - object must not extend before/after the current process stack - object must be contained by a valid stack frame (when there is arch/build support for identifying stack frames) - object must not overlap with kernel text Signed-off-by: Kees Cook Tested-by: Valdis Kletnieks Tested-by: Michael Ellerman Change-Id: Iff3b5f1ddb04acd99ccf9a9046c7797363962b2a (cherry picked from commit f5509cc18daa7f82bcc553be70df2117c8eedc16) Signed-off-by: Sami Tolvanen --- include/linux/slab.h | 12 ++ include/linux/thread_info.h | 15 ++ mm/Makefile | 4 + mm/usercopy.c | 269 ++++++++++++++++++++++++++++++++++++ security/Kconfig | 28 ++++ 5 files changed, 328 insertions(+) create mode 100644 mm/usercopy.c diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..4ef384b172e0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -144,6 +144,18 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page); +#else +static inline const char *__check_heap_object(const void *ptr, + unsigned long n, + struct page *page) +{ + return NULL; +} +#endif + /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 5ecb68e86968..0ae29ff9ccfd 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -154,6 +154,21 @@ static inline int arch_within_stack_frames(const void * const stack, } #endif +#ifdef CONFIG_HARDENED_USERCOPY +extern void __check_object_size(const void *ptr, unsigned long n, + bool to_user); + +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ + __check_object_size(ptr, n, to_user); +} +#else +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ } +#endif /* CONFIG_HARDENED_USERCOPY */ + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/mm/Makefile b/mm/Makefile index 2ed43191fc3b..8b532c94008f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,6 +5,9 @@ KASAN_SANITIZE_slab_common.o := n KASAN_SANITIZE_slub.o := n +# Since __builtin_frame_address does work as used, disable the warning. +CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address) + mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ @@ -81,3 +84,4 @@ obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o +obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o diff --git a/mm/usercopy.c b/mm/usercopy.c new file mode 100644 index 000000000000..816feccebeb0 --- /dev/null +++ b/mm/usercopy.c @@ -0,0 +1,269 @@ +/* + * This implements the various checks for CONFIG_HARDENED_USERCOPY*, + * which are designed to protect kernel memory from needless exposure + * and overwrite under many unintended conditions. This code is based + * on PAX_USERCOPY, which is: + * + * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source + * Security Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +enum { + BAD_STACK = -1, + NOT_STACK = 0, + GOOD_FRAME, + GOOD_STACK, +}; + +/* + * Checks if a given pointer and length is contained by the current + * stack frame (if possible). + * + * Returns: + * NOT_STACK: not at all on the stack + * GOOD_FRAME: fully within a valid stack frame + * GOOD_STACK: fully on the stack (when can't do frame-checking) + * BAD_STACK: error condition (invalid stack position or bad stack frame) + */ +static noinline int check_stack_object(const void *obj, unsigned long len) +{ + const void * const stack = task_stack_page(current); + const void * const stackend = stack + THREAD_SIZE; + int ret; + + /* Object is not on the stack at all. */ + if (obj + len <= stack || stackend <= obj) + return NOT_STACK; + + /* + * Reject: object partially overlaps the stack (passing the + * the check above means at least one end is within the stack, + * so if this check fails, the other end is outside the stack). + */ + if (obj < stack || stackend < obj + len) + return BAD_STACK; + + /* Check if object is safely within a valid frame. */ + ret = arch_within_stack_frames(stack, stackend, obj, len); + if (ret) + return ret; + + return GOOD_STACK; +} + +static void report_usercopy(const void *ptr, unsigned long len, + bool to_user, const char *type) +{ + pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", ptr, type ? : "unknown", len); + /* + * For greater effect, it would be nice to do do_group_exit(), + * but BUG() actually hooks all the lock-breaking and per-arch + * Oops code, so that is used here instead. + */ + BUG(); +} + +/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ +static bool overlaps(const void *ptr, unsigned long n, unsigned long low, + unsigned long high) +{ + unsigned long check_low = (uintptr_t)ptr; + unsigned long check_high = check_low + n; + + /* Does not overlap if entirely above or entirely below. */ + if (check_low >= high || check_high < low) + return false; + + return true; +} + +/* Is this address range in the kernel text area? */ +static inline const char *check_kernel_text_object(const void *ptr, + unsigned long n) +{ + unsigned long textlow = (unsigned long)_stext; + unsigned long texthigh = (unsigned long)_etext; + unsigned long textlow_linear, texthigh_linear; + + if (overlaps(ptr, n, textlow, texthigh)) + return ""; + + /* + * Some architectures have virtual memory mappings with a secondary + * mapping of the kernel text, i.e. there is more than one virtual + * kernel address that points to the kernel image. It is usually + * when there is a separate linear physical memory mapping, in that + * __pa() is not just the reverse of __va(). This can be detected + * and checked: + */ + textlow_linear = (unsigned long)__va(__pa(textlow)); + /* No different mapping: we're done. */ + if (textlow_linear == textlow) + return NULL; + + /* Check the secondary mapping... */ + texthigh_linear = (unsigned long)__va(__pa(texthigh)); + if (overlaps(ptr, n, textlow_linear, texthigh_linear)) + return ""; + + return NULL; +} + +static inline const char *check_bogus_address(const void *ptr, unsigned long n) +{ + /* Reject if object wraps past end of memory. */ + if (ptr + n < ptr) + return ""; + + /* Reject if NULL or ZERO-allocation. */ + if (ZERO_OR_NULL_PTR(ptr)) + return ""; + + return NULL; +} + +static inline const char *check_heap_object(const void *ptr, unsigned long n, + bool to_user) +{ + struct page *page, *endpage; + const void *end = ptr + n - 1; + bool is_reserved, is_cma; + + /* + * Some architectures (arm64) return true for virt_addr_valid() on + * vmalloced addresses. Work around this by checking for vmalloc + * first. + */ + if (is_vmalloc_addr(ptr)) + return NULL; + + if (!virt_addr_valid(ptr)) + return NULL; + + page = virt_to_head_page(ptr); + + /* Check slab allocator for flags and size. */ + if (PageSlab(page)) + return __check_heap_object(ptr, n, page); + + /* + * Sometimes the kernel data regions are not marked Reserved (see + * check below). And sometimes [_sdata,_edata) does not cover + * rodata and/or bss, so check each range explicitly. + */ + + /* Allow reads of kernel rodata region (if not marked as Reserved). */ + if (ptr >= (const void *)__start_rodata && + end <= (const void *)__end_rodata) { + if (!to_user) + return ""; + return NULL; + } + + /* Allow kernel data region (if not marked as Reserved). */ + if (ptr >= (const void *)_sdata && end <= (const void *)_edata) + return NULL; + + /* Allow kernel bss region (if not marked as Reserved). */ + if (ptr >= (const void *)__bss_start && + end <= (const void *)__bss_stop) + return NULL; + + /* Is the object wholly within one base page? */ + if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == + ((unsigned long)end & (unsigned long)PAGE_MASK))) + return NULL; + + /* Allow if start and end are inside the same compound page. */ + endpage = virt_to_head_page(end); + if (likely(endpage == page)) + return NULL; + + /* + * Reject if range is entirely either Reserved (i.e. special or + * device memory), or CMA. Otherwise, reject since the object spans + * several independently allocated pages. + */ + is_reserved = PageReserved(page); + is_cma = is_migrate_cma_page(page); + if (!is_reserved && !is_cma) + goto reject; + + for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { + page = virt_to_head_page(ptr); + if (is_reserved && !PageReserved(page)) + goto reject; + if (is_cma && !is_migrate_cma_page(page)) + goto reject; + } + + return NULL; + +reject: + return ""; +} + +/* + * Validates that the given object is: + * - not bogus address + * - known-safe heap or stack object + * - not in kernel text + */ +void __check_object_size(const void *ptr, unsigned long n, bool to_user) +{ + const char *err; + + /* Skip all tests if size is zero. */ + if (!n) + return; + + /* Check for invalid addresses. */ + err = check_bogus_address(ptr, n); + if (err) + goto report; + + /* Check for bad heap object. */ + err = check_heap_object(ptr, n, to_user); + if (err) + goto report; + + /* Check for bad stack object. */ + switch (check_stack_object(ptr, n)) { + case NOT_STACK: + /* Object is not touching the current process stack. */ + break; + case GOOD_FRAME: + case GOOD_STACK: + /* + * Object is either in the correct frame (when it + * is possible to check) or just generally on the + * process stack (when frame checking not available). + */ + return; + default: + err = ""; + goto report; + } + + /* Check for object in kernel to avoid text exposure. */ + err = check_kernel_text_object(ptr, n); + if (!err) + return; + +report: + report_usercopy(ptr, n, to_user, err); +} +EXPORT_SYMBOL(__check_object_size); diff --git a/security/Kconfig b/security/Kconfig index 30a2603e8c85..aba6a8d4d1f4 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -127,6 +127,34 @@ config LSM_MMAP_MIN_ADDR this low address space will need the permission specific to the systems running LSM. +config HAVE_HARDENED_USERCOPY_ALLOCATOR + bool + help + The heap allocator implements __check_heap_object() for + validating memory ranges against heap object sizes in + support of CONFIG_HARDENED_USERCOPY. + +config HAVE_ARCH_HARDENED_USERCOPY + bool + help + The architecture supports CONFIG_HARDENED_USERCOPY by + calling check_object_size() just before performing the + userspace copies in the low level implementation of + copy_to_user() and copy_from_user(). + +config HARDENED_USERCOPY + bool "Harden memory copies between kernel and userspace" + depends on HAVE_ARCH_HARDENED_USERCOPY + select BUG + help + This option checks for obviously wrong memory regions when + copying memory to/from the kernel (via copy_to_user() and + copy_from_user() functions) by rejecting memory ranges that + are larger than the specified heap object, span multiple + separately allocates pages, are not on the process stack, + or are part of the kernel text. This kills entire classes + of heap overflow exploits and similar kernel memory exposures. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig From 9c61fcc2b39760263436b13b97071ae6e8eeb5f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 15:04:01 -0700 Subject: [PATCH 25/38] BACKPORT: x86/uaccess: Enable hardened usercopy Enables CONFIG_HARDENED_USERCOPY checks on x86. This is done both in copy_*_user() and __copy_*_user() because copy_*_user() actually calls down to _copy_*_user() and not __copy_*_user(). Based on code from PaX and grsecurity. Signed-off-by: Kees Cook Tested-by: Valdis Kletnieks Change-Id: I260db1d4572bdd2f779200aca99d03a170658440 (cherry picked from commit 5b710f34e194c6b7710f69fdb5d798fdf35b98c1) Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 1 + arch/x86/include/asm/uaccess.h | 10 ++++++---- arch/x86/include/asm/uaccess_32.h | 2 ++ arch/x86/include/asm/uaccess_64.h | 2 ++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4786de7750b1..9e3b1e57499a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -79,6 +79,7 @@ config X86 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_AOUT if X86_32 select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5abdd0221fb4..32a047ce0806 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -714,9 +714,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) * case, and do only runtime checking for non-constant sizes. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); n = _copy_from_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_from_user_overflow(); else __copy_from_user_overflow(sz, n); @@ -732,9 +733,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); /* See the comment in copy_from_user() above. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); n = _copy_to_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_to_user_overflow(); else __copy_to_user_overflow(sz, n); diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index f5dcb5204dcd..2ffc9188cc70 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -43,6 +43,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); if (__builtin_constant_p(n)) { unsigned long ret; @@ -143,6 +144,7 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + check_object_size(to, n, false); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2f9b39b274a..512ffd621fc3 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -53,6 +53,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) { int ret = 0; + check_object_size(dst, size, false); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -103,6 +104,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) { int ret = 0; + check_object_size(src, size, true); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { From 073fcda55b357777cc3edaa2e67060cd8d78dbb3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 15:06:53 -0700 Subject: [PATCH 26/38] BACKPORT: ARM: uaccess: Enable hardened usercopy Enables CONFIG_HARDENED_USERCOPY checks on arm. Based on code from PaX and grsecurity. Signed-off-by: Kees Cook Change-Id: I03a44ca7a8c56832f15a6a74ac32e9330df3ac3b (cherry picked from commit dfd45b6103c973bfcea2341d89e36faf947dbc33) Signed-off-by: Sami Tolvanen --- arch/arm/Kconfig | 1 + arch/arm/include/asm/uaccess.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d3159ff4de5b..586134e01928 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -33,6 +33,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 35c9db857ebe..7fb59199c6bb 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -496,7 +496,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(to, n, false); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); return n; @@ -511,11 +514,15 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(from, n, true); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else + check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } From 2f40fdd6bc0e1beca522657d3c9242e7b25917a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 15:59:42 -0700 Subject: [PATCH 27/38] BACKPORT: arm64/uaccess: Enable hardened usercopy Enables CONFIG_HARDENED_USERCOPY checks on arm64. As done by KASAN in -next, renames the low-level functions to __arch_copy_*_user() so a static inline can do additional work before the copy. Signed-off-by: Kees Cook Change-Id: I1286cae8e6ffcf12ea54ddd62f1a6d2ce742c8d0 (cherry picked from commit faf5b63e294151d6ac24ca6906d6f221bd3496cd) Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/uaccess.h | 29 ++++++++++++++++++++++------- arch/arm64/kernel/arm64ksyms.c | 4 ++-- arch/arm64/lib/copy_from_user.S | 4 ++-- arch/arm64/lib/copy_to_user.S | 4 ++-- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 579ab688312d..3ad9d9ea374b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -48,6 +48,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index b2ede967fe7d..2a44b9795532 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -247,24 +247,39 @@ do { \ -EFAULT; \ }) -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + return __arch_copy_to_user(to, from, n); +} + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else /* security hole - plug it */ + if (access_ok(VERIFY_READ, from, n)) { + check_object_size(to, n, false); + n = __arch_copy_from_user(to, from, n); + } else /* security hole - plug it */ memset(to, 0, n); return n; } static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + if (access_ok(VERIFY_WRITE, to, n)) { + check_object_size(from, n, true); + n = __arch_copy_to_user(to, from, n); + } return n; } diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 3b6d8cc9dfe0..c654df05b7d7 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -33,8 +33,8 @@ EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); /* user mem (segment) */ -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__arch_copy_from_user); +EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_in_user); diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4699cd74f87e..281e75db899a 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -66,7 +66,7 @@ .endm end .req x5 -ENTRY(__copy_from_user) +ENTRY(__arch_copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret -ENDPROC(__copy_from_user) +ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7512bbbc07ac..db4d187de61f 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -65,7 +65,7 @@ .endm end .req x5 -ENTRY(__copy_to_user) +ENTRY(__arch_copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) mov x0, #0 ret -ENDPROC(__copy_to_user) +ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 From 103aa7df683e7811e88e82f52c2c3f16e419a7b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 15:20:59 -0700 Subject: [PATCH 28/38] UPSTREAM: mm: SLAB hardened usercopy support Under CONFIG_HARDENED_USERCOPY, this adds object size checking to the SLAB allocator to catch any copies that may span objects. Based on code from PaX and grsecurity. Signed-off-by: Kees Cook Tested-by: Valdis Kletnieks Change-Id: Ib910a71fdc2ab808e1a45b6d33e9bae1681a1f4a (cherry picked from commit 04385fc5e8fffed84425d909a783c0f0c587d847) Signed-off-by: Sami Tolvanen --- init/Kconfig | 1 + mm/slab.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 235c7a2c0d20..fa031a140397 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1719,6 +1719,7 @@ choice config SLAB bool "SLAB" + select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work well in all environments. It organizes cache hot objects in diff --git a/mm/slab.c b/mm/slab.c index 4765c97ce690..24a615d42d74 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4228,6 +4228,36 @@ static int __init slab_proc_init(void) module_init(slab_proc_init); #endif +#ifdef CONFIG_HARDENED_USERCOPY +/* + * Rejects objects that are incorrectly sized. + * + * Returns NULL if check passes, otherwise const char * to name of cache + * to indicate an error. + */ +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page) +{ + struct kmem_cache *cachep; + unsigned int objnr; + unsigned long offset; + + /* Find and validate object. */ + cachep = page->slab_cache; + objnr = obj_to_index(cachep, page, (void *)ptr); + BUG_ON(objnr >= cachep->num); + + /* Find offset within object. */ + offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); + + /* Allow address range falling entirely within object size. */ + if (offset <= cachep->object_size && n <= cachep->object_size - offset) + return NULL; + + return cachep->name; +} +#endif /* CONFIG_HARDENED_USERCOPY */ + /** * ksize - get the actual amount of memory allocated for a given object * @objp: Pointer to the object From 3a9c8260c66ce387754641134089ba6ea5793bd5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jun 2016 15:24:05 -0700 Subject: [PATCH 29/38] UPSTREAM: mm: SLUB hardened usercopy support Under CONFIG_HARDENED_USERCOPY, this adds object size checking to the SLUB allocator to catch any copies that may span objects. Includes a redzone handling fix discovered by Michael Ellerman. Based on code from PaX and grsecurity. Signed-off-by: Kees Cook Tested-by: Michael Ellerman Reviwed-by: Laura Abbott Change-Id: I52dc6fb3a3492b937d52b5cf9c046bf03dc40a3a (cherry picked from commit ed18adc1cdd00a5c55a20fbdaed4804660772281) Signed-off-by: Sami Tolvanen --- init/Kconfig | 1 + mm/slub.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index fa031a140397..e1d1d6936f92 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1727,6 +1727,7 @@ config SLAB config SLUB bool "SLUB (Unqueued Allocator)" + select HAVE_HARDENED_USERCOPY_ALLOCATOR help SLUB is a slab allocator that minimizes cache line usage instead of managing queues of cached objects (SLAB approach). diff --git a/mm/slub.c b/mm/slub.c index 46997517406e..470f5e561727 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3585,6 +3585,46 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_node); #endif +#ifdef CONFIG_HARDENED_USERCOPY +/* + * Rejects objects that are incorrectly sized. + * + * Returns NULL if check passes, otherwise const char * to name of cache + * to indicate an error. + */ +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page) +{ + struct kmem_cache *s; + unsigned long offset; + size_t object_size; + + /* Find object and usable object size. */ + s = page->slab_cache; + object_size = slab_ksize(s); + + /* Reject impossible pointers. */ + if (ptr < page_address(page)) + return s->name; + + /* Find offset within object. */ + offset = (ptr - page_address(page)) % s->size; + + /* Adjust for redzone and reject if within the redzone. */ + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { + if (offset < s->red_left_pad) + return s->name; + offset -= s->red_left_pad; + } + + /* Allow address range falling entirely within object size. */ + if (offset <= object_size && n <= object_size - offset) + return NULL; + + return s->name; +} +#endif /* CONFIG_HARDENED_USERCOPY */ + static size_t __ksize(const void *object) { struct page *page; From 43243ce74a56007bf71e4fb3310f475fd4441ce4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Aug 2016 12:15:22 -0700 Subject: [PATCH 30/38] UPSTREAM: usercopy: avoid potentially undefined behavior in pointer math check_bogus_address() checked for pointer overflow using this expression, where 'ptr' has type 'const void *': ptr + n < ptr Since pointer wraparound is undefined behavior, gcc at -O2 by default treats it like the following, which would not behave as intended: (long)n < 0 Fortunately, this doesn't currently happen for kernel code because kernel code is compiled with -fno-strict-overflow. But the expression should be fixed anyway to use well-defined integer arithmetic, since it could be treated differently by different compilers in the future or could be reported by tools checking for undefined behavior. Signed-off-by: Eric Biggers Signed-off-by: Kees Cook Change-Id: I73b13be651cf35c03482f2014bf2c3dd291518ab (cherry picked from commit 7329a655875a2f4bd6984fe8a7e00a6981e802f3) Signed-off-by: Sami Tolvanen --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 816feccebeb0..f5c4c3a6f2df 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -125,7 +125,7 @@ static inline const char *check_kernel_text_object(const void *ptr, static inline const char *check_bogus_address(const void *ptr, unsigned long n) { /* Reject if object wraps past end of memory. */ - if (ptr + n < ptr) + if ((unsigned long)ptr + n < (unsigned long)ptr) return ""; /* Reject if NULL or ZERO-allocation. */ From 19787e3f1cd696356b4ec02fa3a345dcb4aaf3dc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 22 Aug 2016 11:53:59 -0500 Subject: [PATCH 31/38] UPSTREAM: usercopy: fix overlap check for kernel text When running with a local patch which moves the '_stext' symbol to the very beginning of the kernel text area, I got the following panic with CONFIG_HARDENED_USERCOPY: usercopy: kernel memory exposure attempt detected from ffff88103dfff000 () (4096 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:79! invalid opcode: 0000 [#1] SMP ... CPU: 0 PID: 4800 Comm: cp Not tainted 4.8.0-rc3.after+ #1 Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.5.4 01/22/2016 task: ffff880817444140 task.stack: ffff880816274000 RIP: 0010:[] __check_object_size+0x76/0x413 RSP: 0018:ffff880816277c40 EFLAGS: 00010246 RAX: 000000000000006b RBX: ffff88103dfff000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88081f80dfa8 RDI: ffff88081f80dfa8 RBP: ffff880816277c90 R08: 000000000000054c R09: 0000000000000000 R10: 0000000000000005 R11: 0000000000000006 R12: 0000000000001000 R13: ffff88103e000000 R14: ffff88103dffffff R15: 0000000000000001 FS: 00007fb9d1750800(0000) GS:ffff88081f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000021d2000 CR3: 000000081a08f000 CR4: 00000000001406f0 Stack: ffff880816277cc8 0000000000010000 000000043de07000 0000000000000000 0000000000001000 ffff880816277e60 0000000000001000 ffff880816277e28 000000000000c000 0000000000001000 ffff880816277ce8 ffffffff8136c3a6 Call Trace: [] copy_page_to_iter_iovec+0xa6/0x1c0 [] copy_page_to_iter+0x16/0x90 [] generic_file_read_iter+0x3e3/0x7c0 [] ? xfs_file_buffered_aio_write+0xad/0x260 [xfs] [] ? down_read+0x12/0x40 [] xfs_file_buffered_aio_read+0x51/0xc0 [xfs] [] xfs_file_read_iter+0x62/0xb0 [xfs] [] __vfs_read+0xdf/0x130 [] vfs_read+0x8e/0x140 [] SyS_read+0x55/0xc0 [] do_syscall_64+0x67/0x160 [] entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:[<00007fb9d0c33c00>] 0x7fb9d0c33c00 RSP: 002b:00007ffc9c262f28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: fffffffffff8ffff RCX: 00007fb9d0c33c00 RDX: 0000000000010000 RSI: 00000000021c3000 RDI: 0000000000000004 RBP: 00000000021c3000 R08: 0000000000000000 R09: 00007ffc9c264d6c R10: 00007ffc9c262c50 R11: 0000000000000246 R12: 0000000000010000 R13: 00007ffc9c2630b0 R14: 0000000000000004 R15: 0000000000010000 Code: 81 48 0f 44 d0 48 c7 c6 90 4d a3 81 48 c7 c0 bb b3 a2 81 48 0f 44 f0 4d 89 e1 48 89 d9 48 c7 c7 68 16 a3 81 31 c0 e8 f4 57 f7 ff <0f> 0b 48 8d 90 00 40 00 00 48 39 d3 0f 83 22 01 00 00 48 39 c3 RIP [] __check_object_size+0x76/0x413 RSP The checked object's range [ffff88103dfff000, ffff88103e000000) is valid, so there shouldn't have been a BUG. The hardened usercopy code got confused because the range's ending address is the same as the kernel's text starting address at 0xffff88103e000000. The overlap check is slightly off. Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Josh Poimboeuf Signed-off-by: Kees Cook Change-Id: I839dbf4ddbb4d9874026a42abed557eb9b3f8bef (cherry picked from commit 94cd97af690dd9537818dc9841d0ec68bb1dd877) Signed-off-by: Sami Tolvanen --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index f5c4c3a6f2df..f78015e8b1e5 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -84,7 +84,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ - if (check_low >= high || check_high < low) + if (check_low >= high || check_high <= low) return false; return true; From 65733da1dd5c3643e310d371bdb2420a6f3d8e97 Mon Sep 17 00:00:00 2001 From: Mohan Srinivasan Date: Thu, 25 Aug 2016 18:31:01 -0700 Subject: [PATCH 32/38] Android: MMC/UFS IO Latency Histograms. This patch adds a new sysfs node (latency_hist) and reports IO (svc time) latency histograms. Disabled by default, can be enabled by echoing 0 into latency_hist, stats can be cleared by writing 2 into latency_hist. This commit fixes the 32 bit build breakage in the previous commit. Tested on both 32 bit and 64 bit arm devices. Bug: 30677035 Change-Id: I9a615a16616d80f87e75676ac4d078a5c429dcf9 Signed-off-by: Mohan Srinivasan --- block/blk-core.c | 82 +++++++++++++++++++++++++++++++++++++++ drivers/mmc/core/core.c | 66 ++++++++++++++++++++++++++++++- drivers/mmc/core/host.c | 6 ++- drivers/mmc/core/host.h | 5 +++ drivers/scsi/ufs/ufshcd.c | 81 ++++++++++++++++++++++++++++++++++++++ drivers/scsi/ufs/ufshcd.h | 3 ++ include/linux/blkdev.h | 76 ++++++++++++++++++++++++++++++++++++ include/linux/mmc/core.h | 2 + include/linux/mmc/host.h | 4 ++ 9 files changed, 322 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 33e2f62d5062..840713279726 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,6 +40,8 @@ #include "blk.h" #include "blk-mq.h" +#include + EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -3539,3 +3541,83 @@ int __init blk_dev_init(void) return 0; } + +/* + * Blk IO latency support. We want this to be as cheap as possible, so doing + * this lockless (and avoiding atomics), a few off by a few errors in this + * code is not harmful, and we don't want to do anything that is + * perf-impactful. + * TODO : If necessary, we can make the histograms per-cpu and aggregate + * them when printing them out. + */ +void +blk_zero_latency_hist(struct io_latency_state *s) +{ + memset(s->latency_y_axis_read, 0, + sizeof(s->latency_y_axis_read)); + memset(s->latency_y_axis_write, 0, + sizeof(s->latency_y_axis_write)); + s->latency_reads_elems = 0; + s->latency_writes_elems = 0; +} + +ssize_t +blk_latency_hist_show(struct io_latency_state *s, char *buf) +{ + int i; + int bytes_written = 0; + u_int64_t num_elem, elem; + int pct; + + num_elem = s->latency_reads_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Read Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + num_elem = s->latency_writes_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Write Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + return bytes_written; +} diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 9fab52559a8c..b503249950df 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -183,6 +183,17 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) pr_debug("%s: %d bytes transferred: %d\n", mmc_hostname(host), mrq->data->bytes_xfered, mrq->data->error); + if (mrq->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + mrq->io_start); + blk_update_latency_hist(&host->io_lat_s, + (mrq->data->flags & MMC_DATA_READ), + delta_us); + } trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data); } @@ -627,6 +638,11 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host, } if (!err && areq) { + if (host->latency_hist_enabled) { + areq->mrq->io_start = ktime_get(); + areq->mrq->lat_hist_enabled = 1; + } else + areq->mrq->lat_hist_enabled = 0; trace_mmc_blk_rw_start(areq->mrq->cmd->opcode, areq->mrq->cmd->arg, areq->mrq->data); @@ -1964,7 +1980,7 @@ void mmc_init_erase(struct mmc_card *card) } static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, - unsigned int arg, unsigned int qty) + unsigned int arg, unsigned int qty) { unsigned int erase_timeout; @@ -2907,6 +2923,54 @@ static void __exit mmc_exit(void) destroy_workqueue(workqueue); } +static ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + return blk_latency_hist_show(&host->io_lat_s, buf); +} + +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&host->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + host->latency_hist_enabled = value; + return count; +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +void +mmc_latency_hist_sysfs_init(struct mmc_host *host) +{ + if (device_create_file(&host->class_dev, &dev_attr_latency_hist)) + dev_err(&host->class_dev, + "Failed to create latency_hist sysfs entry\n"); +} + +void +mmc_latency_hist_sysfs_exit(struct mmc_host *host) +{ + device_remove_file(&host->class_dev, &dev_attr_latency_hist); +} + subsys_initcall(mmc_init); module_exit(mmc_exit); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index fcf7829c759e..17068839c74b 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -32,8 +32,6 @@ #include "slot-gpio.h" #include "pwrseq.h" -#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) - static DEFINE_IDR(mmc_host_idr); static DEFINE_SPINLOCK(mmc_host_lock); @@ -394,6 +392,8 @@ int mmc_add_host(struct mmc_host *host) mmc_add_host_debugfs(host); #endif + mmc_latency_hist_sysfs_init(host); + mmc_start_host(host); if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) register_pm_notifier(&host->pm_notify); @@ -422,6 +422,8 @@ void mmc_remove_host(struct mmc_host *host) mmc_remove_host_debugfs(host); #endif + mmc_latency_hist_sysfs_exit(host); + device_del(&host->class_dev); led_trigger_unregister_simple(host->led); diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 992bf5397633..bf38533406fd 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -12,6 +12,8 @@ #define _MMC_CORE_HOST_H #include +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) + int mmc_register_host_class(void); void mmc_unregister_host_class(void); @@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host); void mmc_retune_release(struct mmc_host *host); int mmc_retune(struct mmc_host *host); +void mmc_latency_hist_sysfs_init(struct mmc_host *host); +void mmc_latency_hist_sysfs_exit(struct mmc_host *host); + #endif diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 85cd2564c157..4167bdbf0ecf 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -39,6 +39,7 @@ #include #include +#include #include "ufshcd.h" #include "unipro.h" @@ -1332,6 +1333,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) clear_bit_unlock(tag, &hba->lrb_in_use); goto out; } + + /* IO svc time latency histogram */ + if (hba != NULL && cmd->request != NULL) { + if (hba->latency_hist_enabled && + (cmd->request->cmd_type == REQ_TYPE_FS)) { + cmd->request->lat_hist_io_start = ktime_get(); + cmd->request->lat_hist_enabled = 1; + } else + cmd->request->lat_hist_enabled = 0; + } + WARN_ON(hba->clk_gating.state != CLKS_ON); lrbp = &hba->lrb[tag]; @@ -3160,6 +3172,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba) u32 tr_doorbell; int result; int index; + struct request *req; /* Resetting interrupt aggregation counters first and reading the * DOOR_BELL afterward allows us to handle all the completed requests. @@ -3184,6 +3197,22 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba) /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; clear_bit_unlock(index, &hba->lrb_in_use); + req = cmd->request; + if (req) { + /* Update IO svc time latency histogram */ + if (req->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + req->lat_hist_io_start); + /* rq_data_dir() => true if WRITE */ + blk_update_latency_hist(&hba->io_lat_s, + (rq_data_dir(req) == READ), + delta_us); + } + } /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); __ufshcd_release(hba); @@ -5327,6 +5356,54 @@ out: } EXPORT_SYMBOL(ufshcd_shutdown); +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&hba->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + hba->latency_hist_enabled = value; + return count; +} + +ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + return blk_latency_hist_show(&hba->io_lat_s, buf); +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +static void +ufshcd_init_latency_hist(struct ufs_hba *hba) +{ + if (device_create_file(hba->dev, &dev_attr_latency_hist)) + dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n"); +} + +static void +ufshcd_exit_latency_hist(struct ufs_hba *hba) +{ + device_create_file(hba->dev, &dev_attr_latency_hist); +} + /** * ufshcd_remove - de-allocate SCSI host and host memory space * data structure memory @@ -5342,6 +5419,7 @@ void ufshcd_remove(struct ufs_hba *hba) scsi_host_put(hba->host); ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); if (ufshcd_is_clkscaling_enabled(hba)) devfreq_remove_device(hba->devfreq); ufshcd_hba_exit(hba); @@ -5639,6 +5717,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Hold auto suspend until async scan completes */ pm_runtime_get_sync(dev); + ufshcd_init_latency_hist(hba); + /* * The device-initialize-sequence hasn't been invoked yet. * Set the device to power-off state @@ -5653,6 +5733,7 @@ out_remove_scsi_host: scsi_remove_host(hba->host); exit_gating: ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); out_disable: hba->is_irq_enabled = false; scsi_host_put(host); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 2570d9477b37..f3780cf7d895 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -532,6 +532,9 @@ struct ufs_hba { struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; bool is_sys_suspended; + + int latency_hist_enabled; + struct io_latency_state io_lat_s; }; /* Returns true if clocks can be gated. Otherwise false */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c70e3588a48c..faf2c0093527 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -197,6 +197,9 @@ struct request { /* for bidi */ struct request *next_rq; + + ktime_t lat_hist_io_start; + int lat_hist_enabled; }; static inline unsigned short req_get_ioprio(struct request *req) @@ -1645,6 +1648,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, sector_t, void __pmem **addr, unsigned long *pfn, long size); + +/* + * X-axis for IO latency histogram support. + */ +static const u_int64_t latency_x_axis_us[] = { + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000, + 1200, + 1400, + 1600, + 1800, + 2000, + 2500, + 3000, + 4000, + 5000, + 6000, + 7000, + 9000, + 10000 +}; + +#define BLK_IO_LAT_HIST_DISABLE 0 +#define BLK_IO_LAT_HIST_ENABLE 1 +#define BLK_IO_LAT_HIST_ZERO 2 + +struct io_latency_state { + u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_reads_elems; + u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_writes_elems; +}; + +static inline void +blk_update_latency_hist(struct io_latency_state *s, + int read, + u_int64_t delta_us) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) { + if (delta_us < (u_int64_t)latency_x_axis_us[i]) { + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + break; + } + } + if (i == ARRAY_SIZE(latency_x_axis_us)) { + /* Overflowed the histogram */ + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + } + if (read) + s->latency_reads_elems++; + else + s->latency_writes_elems++; +} + +void blk_zero_latency_hist(struct io_latency_state *s); +ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf); + #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 37967b6da03c..3349f0676acb 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -136,6 +136,8 @@ struct mmc_request { struct completion completion; void (*done)(struct mmc_request *);/* completion function */ struct mmc_host *host; + ktime_t io_start; + int lat_hist_enabled; }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 40025b28c1fb..e4862f7cdede 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -379,6 +380,9 @@ struct mmc_host { } embedded_sdio_data; #endif + int latency_hist_enabled; + struct io_latency_state io_lat_s; + unsigned long private[0] ____cacheline_aligned; }; From ef1cf0cfecdb16c520e32628ef8e6d9fa86fc08f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Aug 2016 12:47:01 -0700 Subject: [PATCH 33/38] UPSTREAM: Make the hardened user-copy code depend on having a hardened allocator The kernel test robot reported a usercopy failure in the new hardened sanity checks, due to a page-crossing copy of the FPU state into the task structure. This happened because the kernel test robot was testing with SLOB, which doesn't actually do the required book-keeping for slab allocations, and as a result the hardening code didn't realize that the task struct allocation was one single allocation - and the sanity checks fail. Since SLOB doesn't even claim to support hardening (and you really shouldn't use it), the straightforward solution is to just make the usercopy hardening code depend on the allocator supporting it. Reported-by: kernel test robot Cc: Kees Cook Signed-off-by: Linus Torvalds Change-Id: I37d51f866f873341bf7d5297249899b852e1c6ce (cherry picked from commit 6040e57658eee6eb1315a26119101ca832d1f854) Signed-off-by: Sami Tolvanen --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index aba6a8d4d1f4..2b42c225de28 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -145,6 +145,7 @@ config HAVE_ARCH_HARDENED_USERCOPY config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" depends on HAVE_ARCH_HARDENED_USERCOPY + depends on HAVE_HARDENED_USERCOPY_ALLOCATOR select BUG help This option checks for obviously wrong memory regions when From 0715ce3b882b9982b6acdbf9e46fccd1e6f180d7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2015 09:45:09 -0800 Subject: [PATCH 34/38] UPSTREAM: x86: reorganize SMAP handling in user space accesses This reorganizes how we do the stac/clac instructions in the user access code. Instead of adding the instructions directly to the same inline asm that does the actual user level access and exception handling, add them at a higher level. This is mainly preparation for the next step, where we will expose an interface to allow users to mark several accesses together as being user space accesses, but it does already clean up some code: - the inlined trivial cases of copy_in_user() now do stac/clac just once over the accesses: they used to do one pair around the user space read, and another pair around the write-back. - the {get,put}_user_ex() macros that are used with the catch/try handling don't do any stac/clac at all, because that happens in the try/catch surrounding them. Other than those two cleanups that happened naturally from the re-organization, this should not make any difference. Yet. Signed-off-by: Linus Torvalds Change-Id: Iaad8756bc8e95876e2e2a7d7bbd333fc478ab441 (cherry picked from commit 11f1a4b9755f5dbc3e822a96502ebe9b044b14d8) Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/uaccess.h | 53 +++++++++++------ arch/x86/include/asm/uaccess_64.h | 94 ++++++++++++++++++++++--------- 2 files changed, 101 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 32a047ce0806..be439e246d91 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -134,6 +134,9 @@ extern int __get_user_4(void); extern int __get_user_8(void); extern int __get_user_bad(void); +#define __uaccess_begin() stac() +#define __uaccess_end() clac() + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -193,10 +196,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ - "3: " ASM_CLAC "\n" \ + "3:" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -207,10 +210,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ - "3: " ASM_CLAC "\n" \ + "3:" \ _ASM_EXTABLE_EX(1b, 2b) \ _ASM_EXTABLE_EX(2b, 3b) \ : : "A" (x), "r" (addr)) @@ -304,6 +307,10 @@ do { \ } \ } while (0) +/* + * This doesn't do __uaccess_begin/end - the exception handling + * around it must do that. + */ #define __put_user_size_ex(x, ptr, size) \ do { \ __chk_user_ptr(ptr); \ @@ -358,9 +365,9 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: mov"itype" %2,%"rtype"1\n" \ - "2: " ASM_CLAC "\n" \ + "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -370,6 +377,10 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +/* + * This doesn't do __uaccess_begin/end - the exception handling + * around it must do that. + */ #define __get_user_size_ex(x, ptr, size) \ do { \ __chk_user_ptr(ptr); \ @@ -400,7 +411,9 @@ do { \ #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ + __uaccess_begin(); \ __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __uaccess_end(); \ __builtin_expect(__pu_err, 0); \ }) @@ -408,7 +421,9 @@ do { \ ({ \ int __gu_err; \ unsigned long __gu_val; \ + __uaccess_begin(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ }) @@ -423,9 +438,9 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: mov"itype" %"rtype"1,%2\n" \ - "2: " ASM_CLAC "\n" \ + "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " jmp 2b\n" \ @@ -445,11 +460,11 @@ struct __large_struct { unsigned long buf[100]; }; */ #define uaccess_try do { \ current_thread_info()->uaccess_err = 0; \ - stac(); \ + __uaccess_begin(); \ barrier(); #define uaccess_catch(err) \ - clac(); \ + __uaccess_end(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ } while (0) @@ -547,12 +562,13 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ + __uaccess_begin(); \ switch (size) { \ case 1: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -566,9 +582,9 @@ extern void __cmpxchg_wrong_size(void) } \ case 2: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -582,9 +598,9 @@ extern void __cmpxchg_wrong_size(void) } \ case 4: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -601,9 +617,9 @@ extern void __cmpxchg_wrong_size(void) if (!IS_ENABLED(CONFIG_X86_64)) \ __cmpxchg_wrong_size(); \ \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -618,6 +634,7 @@ extern void __cmpxchg_wrong_size(void) default: \ __cmpxchg_wrong_size(); \ } \ + __uaccess_end(); \ *__uval = __old; \ __ret; \ }) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 512ffd621fc3..2957c8237c28 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -57,35 +57,49 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { - case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src, + case 1: + __uaccess_begin(); + __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); + __uaccess_end(); return ret; - case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src, + case 2: + __uaccess_begin(); + __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); + __uaccess_end(); return ret; - case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src, + case 4: + __uaccess_begin(); + __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); + __uaccess_end(); return ret; - case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src, + case 8: + __uaccess_begin(); + __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); + __uaccess_end(); return ret; case 10: + __uaccess_begin(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); - if (unlikely(ret)) - return ret; - __get_user_asm(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); + if (likely(!ret)) + __get_user_asm(*(u16 *)(8 + (char *)dst), + (u16 __user *)(8 + (char __user *)src), + ret, "w", "w", "=r", 2); + __uaccess_end(); return ret; case 16: + __uaccess_begin(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); - if (unlikely(ret)) - return ret; - __get_user_asm(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); + if (likely(!ret)) + __get_user_asm(*(u64 *)(8 + (char *)dst), + (u64 __user *)(8 + (char __user *)src), + ret, "q", "", "=r", 8); + __uaccess_end(); return ret; default: return copy_user_generic(dst, (__force void *)src, size); @@ -108,35 +122,51 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { - case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst, + case 1: + __uaccess_begin(); + __put_user_asm(*(u8 *)src, (u8 __user *)dst, ret, "b", "b", "iq", 1); + __uaccess_end(); return ret; - case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst, + case 2: + __uaccess_begin(); + __put_user_asm(*(u16 *)src, (u16 __user *)dst, ret, "w", "w", "ir", 2); + __uaccess_end(); return ret; - case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst, + case 4: + __uaccess_begin(); + __put_user_asm(*(u32 *)src, (u32 __user *)dst, ret, "l", "k", "ir", 4); + __uaccess_end(); return ret; - case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, + case 8: + __uaccess_begin(); + __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 8); + __uaccess_end(); return ret; case 10: + __uaccess_begin(); __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 10); - if (unlikely(ret)) - return ret; - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); + if (likely(!ret)) { + asm("":::"memory"); + __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, + ret, "w", "w", "ir", 2); + } + __uaccess_end(); return ret; case 16: + __uaccess_begin(); __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 16); - if (unlikely(ret)) - return ret; - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); + if (likely(!ret)) { + asm("":::"memory"); + __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, + ret, "q", "", "er", 8); + } + __uaccess_end(); return ret; default: return copy_user_generic((__force void *)dst, src, size); @@ -162,39 +192,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) switch (size) { case 1: { u8 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) __put_user_asm(tmp, (u8 __user *)dst, ret, "b", "b", "iq", 1); + __uaccess_end(); return ret; } case 2: { u16 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) __put_user_asm(tmp, (u16 __user *)dst, ret, "w", "w", "ir", 2); + __uaccess_end(); return ret; } case 4: { u32 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) __put_user_asm(tmp, (u32 __user *)dst, ret, "l", "k", "ir", 4); + __uaccess_end(); return ret; } case 8: { u64 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) __put_user_asm(tmp, (u64 __user *)dst, ret, "q", "", "er", 8); + __uaccess_end(); return ret; } default: From a5eeb9b4123eee6aa88bb7a43dba919641abeeb6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:55:12 -0700 Subject: [PATCH 35/38] UPSTREAM: mm/slub: support left redzone SLUB already has a redzone debugging feature. But it is only positioned at the end of object (aka right redzone) so it cannot catch left oob. Although current object's right redzone acts as left redzone of next object, first object in a slab cannot take advantage of this effect. This patch explicitly adds a left red zone to each object to detect left oob more precisely. Background: Someone complained to me that left OOB doesn't catch even if KASAN is enabled which does page allocation debugging. That page is out of our control so it would be allocated when left OOB happens and, in this case, we can't find OOB. Moreover, SLUB debugging feature can be enabled without page allocator debugging and, in this case, we will miss that OOB. Before trying to implement, I expected that changes would be too complex, but, it doesn't look that complex to me now. Almost changes are applied to debug specific functions so I feel okay. Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: Ib893a17ecabd692e6c402e864196bf89cd6781a5 (cherry picked from commit d86bd1bece6fc41d59253002db5441fe960a37f6) Signed-off-by: Sami Tolvanen --- include/linux/slub_def.h | 1 + mm/slub.c | 100 +++++++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33885118523c..f4e857e920cd 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -81,6 +81,7 @@ struct kmem_cache { int reserved; /* Reserved bytes at the end of slabs */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ + int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif diff --git a/mm/slub.c b/mm/slub.c index 470f5e561727..cfd3579cbaeb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline void *fixup_red_left(struct kmem_cache *s, void *p) +{ + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) + p += s->red_left_pad; + + return p; +} + static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL @@ -224,24 +232,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) * Core slab cache functions *******************************************************************/ -/* Verify that a pointer has an address that is valid within a slab page */ -static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, const void *object) -{ - void *base; - - if (!object) - return 1; - - base = page_address(page); - if (object < base || object >= base + page->objects * s->size || - (object - base) % s->size) { - return 0; - } - - return 1; -} - static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -271,12 +261,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ - for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ - __p += (__s)->size) + for (__p = fixup_red_left(__s, __addr); \ + __p < (__addr) + (__objects) * (__s)->size; \ + __p += (__s)->size) #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ - for (__p = (__addr), __idx = 1; __idx <= __objects;\ - __p += (__s)->size, __idx++) + for (__p = fixup_red_left(__s, __addr), __idx = 1; \ + __idx <= __objects; \ + __p += (__s)->size, __idx++) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -456,6 +448,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) set_bit(slab_index(p, s, addr), map); } +static inline int size_from_object(struct kmem_cache *s) +{ + if (s->flags & SLAB_RED_ZONE) + return s->size - s->red_left_pad; + + return s->size; +} + +static inline void *restore_red_left(struct kmem_cache *s, void *p) +{ + if (s->flags & SLAB_RED_ZONE) + p -= s->red_left_pad; + + return p; +} + /* * Debug settings: */ @@ -489,6 +497,26 @@ static inline void metadata_access_disable(void) /* * Object debugging */ + +/* Verify that a pointer has an address that is valid within a slab page */ +static inline int check_valid_pointer(struct kmem_cache *s, + struct page *page, void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + object = restore_red_left(s, object); + if (object < base || object >= base + page->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + static void print_section(char *text, u8 *addr, unsigned int length) { metadata_access_enable(); @@ -628,7 +656,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); - if (p > addr + 16) + if (s->flags & SLAB_RED_ZONE) + print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); + else if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); print_section("Object ", p, min_t(unsigned long, s->object_size, @@ -645,9 +675,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); - if (off != s->size) + if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section("Padding ", p + off, s->size - off); + print_section("Padding ", p + off, size_from_object(s) - off); dump_stack(); } @@ -677,6 +707,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = object; + if (s->flags & SLAB_RED_ZONE) + memset(p - s->red_left_pad, val, s->red_left_pad); + if (s->flags & __OBJECT_POISON) { memset(p, POISON_FREE, s->object_size - 1); p[s->object_size - 1] = POISON_END; @@ -769,11 +802,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) /* We also have user information there */ off += 2 * sizeof(struct track); - if (s->size == off) + if (size_from_object(s) == off) return 1; return check_bytes_and_report(s, page, p, "Object padding", - p + off, POISON_INUSE, s->size - off); + p + off, POISON_INUSE, size_from_object(s) - off); } /* Check the pad bytes at the end of a slab page */ @@ -817,6 +850,10 @@ static int check_object(struct kmem_cache *s, struct page *page, u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { + if (!check_bytes_and_report(s, page, object, "Redzone", + object - s->red_left_pad, val, s->red_left_pad)) + return 0; + if (!check_bytes_and_report(s, page, object, "Redzone", endobject, val, s->inuse - s->object_size)) return 0; @@ -1468,7 +1505,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, p, NULL); } - page->freelist = start; + page->freelist = fixup_red_left(s, start); page->inuse = page->objects; page->frozen = 1; @@ -3283,7 +3320,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ size += 2 * sizeof(struct track); - if (flags & SLAB_RED_ZONE) + if (flags & SLAB_RED_ZONE) { /* * Add some empty padding so that we can catch * overwrites from earlier objects rather than let @@ -3292,6 +3329,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * of the object. */ size += sizeof(void *); + + s->red_left_pad = sizeof(void *); + s->red_left_pad = ALIGN(s->red_left_pad, s->align); + size += s->red_left_pad; + } #endif /* From cd55f3c2be7a55bb708ba0f5fb02ae1c08cc1d33 Mon Sep 17 00:00:00 2001 From: Riley Andrews Date: Tue, 6 Sep 2016 15:16:25 -0700 Subject: [PATCH 36/38] cpuset: Make cpusets restore on hotplug This deliberately changes the behavior of the per-cpuset cpus file to not be effected by hotplug. When a cpu is offlined, it will be removed from the cpuset/cpus file. When a cpu is onlined, if the cpuset originally requested that that cpu was part of the cpuset, that cpu will be restored to the cpuset. The cpus files still have to be hierachical, but the ranges no longer have to be out of the currently online cpus, just the physically present cpus. Change-Id: I22cdf33e7d312117bcefba1aeb0125e1ada289a9 Signed-off-by: Dmitry Shmidt --- kernel/cpuset.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0ff01c2a7222..3b4f27981778 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -99,6 +99,7 @@ struct cpuset { /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; + cpumask_var_t cpus_requested; nodemask_t mems_allowed; /* effective CPUs and Memory Nodes allow to tasks */ @@ -385,7 +386,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { - return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && + return cpumask_subset(p->cpus_requested, q->cpus_requested) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); @@ -485,7 +486,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && - cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) + cpumask_intersects(trial->cpus_requested, c->cpus_requested)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && @@ -944,17 +945,18 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs->cpus_allowed); + retval = cpulist_parse(buf, trialcs->cpus_requested); if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, - top_cpuset.cpus_allowed)) + if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) return -EINVAL; + + cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); } /* Nothing to do if the cpus didn't change */ - if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) + if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested)) return 0; retval = validate_change(cs, trialcs); @@ -963,6 +965,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); + cpumask_copy(cs->cpus_requested, trialcs->cpus_requested); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ @@ -1731,7 +1734,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) switch (type) { case FILE_CPULIST: - seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); @@ -1920,11 +1923,14 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; + if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) + goto free_allowed; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) - goto free_cpus; + goto free_requested; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); + cpumask_clear(cs->cpus_requested); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); @@ -1933,7 +1939,9 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return &cs->css; -free_cpus: +free_requested: + free_cpumask_var(cs->cpus_requested); +free_allowed: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); @@ -1996,6 +2004,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->cpus_requested, parent->cpus_requested); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: @@ -2030,6 +2039,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); + free_cpumask_var(cs->cpus_requested); kfree(cs); } @@ -2096,8 +2106,11 @@ int __init cpuset_init(void) BUG(); if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); + cpumask_setall(top_cpuset.cpus_requested); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); @@ -2231,7 +2244,7 @@ retry: goto retry; } - cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); From 96a970e08bdec910c7780e8f823b3595eef6ba8c Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 2 Feb 2016 13:36:45 -0500 Subject: [PATCH 37/38] UPSTREAM: netfilter: nfnetlink: correctly validate length of batch messages (cherry picked from commit c58d6c93680f28ac58984af61d0a7ebf4319c241) If nlh->nlmsg_len is zero then an infinite loop is triggered because 'skb_pull(skb, msglen);' pulls zero bytes. The calculation in nlmsg_len() underflows if 'nlh->nlmsg_len < NLMSG_HDRLEN' which bypasses the length validation and will later trigger an out-of-bound read. If the length validation does fail then the malformed batch message is copied back to userspace. However, we cannot do this because the nlh->nlmsg_len can be invalid. This leads to an out-of-bounds read in netlink_ack: [ 41.455421] ================================================================== [ 41.456431] BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40 at addr ffff880119e79340 [ 41.456431] Read of size 4294967280 by task a.out/987 [ 41.456431] ============================================================================= [ 41.456431] BUG kmalloc-512 (Not tainted): kasan: bad access detected [ 41.456431] ----------------------------------------------------------------------------- ... [ 41.456431] Bytes b4 ffff880119e79310: 00 00 00 00 d5 03 00 00 b0 fb fe ff 00 00 00 00 ................ [ 41.456431] Object ffff880119e79320: 20 00 00 00 10 00 05 00 00 00 00 00 00 00 00 00 ............... [ 41.456431] Object ffff880119e79330: 14 00 0a 00 01 03 fc 40 45 56 11 22 33 10 00 05 .......@EV."3... [ 41.456431] Object ffff880119e79340: f0 ff ff ff 88 99 aa bb 00 14 00 0a 00 06 fe fb ................ ^^ start of batch nlmsg with nlmsg_len=4294967280 ... [ 41.456431] Memory state around the buggy address: [ 41.456431] ffff880119e79400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 41.456431] ffff880119e79480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 41.456431] >ffff880119e79500: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc [ 41.456431] ^ [ 41.456431] ffff880119e79580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 41.456431] ffff880119e79600: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb [ 41.456431] ================================================================== Fix this with better validation of nlh->nlmsg_len and by setting NFNL_BATCH_FAILURE if any batch message fails length validation. CAP_NET_ADMIN is required to trigger the bugs. Fixes: 9ea2aa8b7dba ("netfilter: nfnetlink: validate nfnetlink header from batch") Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso Change-Id: Id3e15c40cb464bf2791af907c235d8a316b2449c Bug: 30947055 --- net/netfilter/nfnetlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 77afe913d03d..9adedba78eea 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -326,10 +326,12 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || - skb->len < nlh->nlmsg_len) { - err = -EINVAL; - goto ack; + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len || + nlmsg_len(nlh) < sizeof(struct nfgenmsg)) { + nfnl_err_reset(&err_list); + status |= NFNL_BATCH_FAILURE; + goto done; } /* Only requests are handled by the kernel */ From 6b93f8214eabf0f363eab5283c2ad18b5bc33135 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 27 Nov 2015 14:30:21 -0500 Subject: [PATCH 38/38] UPSTREAM: tty: Prevent ldisc drivers from re-using stale tty fields (cherry picked from commit dd42bf1197144ede075a9d4793123f7689e164bc) Line discipline drivers may mistakenly misuse ldisc-related fields when initializing. For example, a failure to initialize tty->receive_room in the N_GIGASET_M101 line discipline was recently found and fixed [1]. Now, the N_X25 line discipline has been discovered accessing the previous line discipline's already-freed private data [2]. Harden the ldisc interface against misuse by initializing revelant tty fields before instancing the new line discipline. [1] commit fd98e9419d8d622a4de91f76b306af6aa627aa9c Author: Tilman Schmidt Date: Tue Jul 14 00:37:13 2015 +0200 isdn/gigaset: reset tty->receive_room when attaching ser_gigaset [2] Report from Sasha Levin [ 634.336761] ================================================================== [ 634.338226] BUG: KASAN: use-after-free in x25_asy_open_tty+0x13d/0x490 at addr ffff8800a743efd0 [ 634.339558] Read of size 4 by task syzkaller_execu/8981 [ 634.340359] ============================================================================= [ 634.341598] BUG kmalloc-512 (Not tainted): kasan: bad access detected ... [ 634.405018] Call Trace: [ 634.405277] dump_stack (lib/dump_stack.c:52) [ 634.405775] print_trailer (mm/slub.c:655) [ 634.406361] object_err (mm/slub.c:662) [ 634.406824] kasan_report_error (mm/kasan/report.c:138 mm/kasan/report.c:236) [ 634.409581] __asan_report_load4_noabort (mm/kasan/report.c:279) [ 634.411355] x25_asy_open_tty (drivers/net/wan/x25_asy.c:559 (discriminator 1)) [ 634.413997] tty_ldisc_open.isra.2 (drivers/tty/tty_ldisc.c:447) [ 634.414549] tty_set_ldisc (drivers/tty/tty_ldisc.c:567) [ 634.415057] tty_ioctl (drivers/tty/tty_io.c:2646 drivers/tty/tty_io.c:2879) [ 634.423524] do_vfs_ioctl (fs/ioctl.c:43 fs/ioctl.c:607) [ 634.427491] SyS_ioctl (fs/ioctl.c:622 fs/ioctl.c:613) [ 634.427945] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) Cc: Tilman Schmidt Cc: Sasha Levin Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Change-Id: Ibed6feadfb9706d478f93feec3b240aecfc64af3 Bug: 30951112 --- drivers/tty/tty_ldisc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 629e3c865072..9bee25cfa0be 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * they are not on hot paths so a little discipline won't do * any harm. * + * The line discipline-related tty_struct fields are reset to + * prevent the ldisc driver from re-using stale information for + * the new ldisc instance. + * * Locking: takes termios_rwsem */ @@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) down_write(&tty->termios_rwsem); tty->termios.c_line = num; up_write(&tty->termios_rwsem); + + tty->disc_data = NULL; + tty->receive_room = 0; } /**