diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 6496bb3961a2..34fa64a669ac 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -141,6 +141,7 @@ CONFIG_PROFILING=y CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y +CONFIG_SECCOMP=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y diff --git a/arch/Kconfig b/arch/Kconfig index 31a318a56d98..98f64ad1caf1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -423,6 +423,15 @@ config CC_STACKPROTECTOR_STRONG endchoice +config HAVE_ARCH_WITHIN_STACK_FRAMES + bool + help + An architecture should select this if it can walk the kernel stack + frames to determine if an object is part of either the arguments + or local variables (i.e. that it excludes saved return addresses, + and similar) by implementing an inline arch_within_stack_frames(), + which is used by CONFIG_HARDENED_USERCOPY. + config HAVE_CONTEXT_TRACKING bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d3159ff4de5b..586134e01928 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -33,6 +33,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 35c9db857ebe..7fb59199c6bb 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -496,7 +496,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(to, n, false); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); return n; @@ -511,11 +514,15 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(from, n, true); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else + check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20edd349d379..bf63b4693457 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -772,7 +772,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce48..be2ab6d3b91f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -120,6 +120,8 @@ SECTIONS #ifdef CONFIG_DEBUG_RODATA . = ALIGN(1< high + * [saved bp][saved ip][args][local vars][saved bp][saved ip] + * ^----------------^ + * allow copies only within here + */ + while (stack <= frame && frame < stackend) { + /* + * If obj + len extends past the last frame, this + * check won't pass and the next frame will be 0, + * causing us to bail out and correctly report + * the copy as invalid. + */ + if (obj + len <= frame) + return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1; + oldframe = frame; + frame = *(const void * const *)frame; + } + return -1; +#else + return 0; +#endif +} + #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 09b1b0ab94b7..be439e246d91 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -134,6 +134,9 @@ extern int __get_user_4(void); extern int __get_user_8(void); extern int __get_user_bad(void); +#define __uaccess_begin() stac() +#define __uaccess_end() clac() + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -193,10 +196,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ - "3: " ASM_CLAC "\n" \ + "3:" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -207,10 +210,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ - "3: " ASM_CLAC "\n" \ + "3:" \ _ASM_EXTABLE_EX(1b, 2b) \ _ASM_EXTABLE_EX(2b, 3b) \ : : "A" (x), "r" (addr)) @@ -304,6 +307,10 @@ do { \ } \ } while (0) +/* + * This doesn't do __uaccess_begin/end - the exception handling + * around it must do that. + */ #define __put_user_size_ex(x, ptr, size) \ do { \ __chk_user_ptr(ptr); \ @@ -358,9 +365,9 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: mov"itype" %2,%"rtype"1\n" \ - "2: " ASM_CLAC "\n" \ + "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -370,6 +377,10 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +/* + * This doesn't do __uaccess_begin/end - the exception handling + * around it must do that. + */ #define __get_user_size_ex(x, ptr, size) \ do { \ __chk_user_ptr(ptr); \ @@ -400,7 +411,9 @@ do { \ #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ + __uaccess_begin(); \ __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __uaccess_end(); \ __builtin_expect(__pu_err, 0); \ }) @@ -408,7 +421,9 @@ do { \ ({ \ int __gu_err; \ unsigned long __gu_val; \ + __uaccess_begin(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ }) @@ -423,9 +438,9 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: mov"itype" %"rtype"1,%2\n" \ - "2: " ASM_CLAC "\n" \ + "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " jmp 2b\n" \ @@ -445,11 +460,11 @@ struct __large_struct { unsigned long buf[100]; }; */ #define uaccess_try do { \ current_thread_info()->uaccess_err = 0; \ - stac(); \ + __uaccess_begin(); \ barrier(); #define uaccess_catch(err) \ - clac(); \ + __uaccess_end(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ } while (0) @@ -547,12 +562,13 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ + __uaccess_begin(); \ switch (size) { \ case 1: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -566,9 +582,9 @@ extern void __cmpxchg_wrong_size(void) } \ case 2: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -582,9 +598,9 @@ extern void __cmpxchg_wrong_size(void) } \ case 4: \ { \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -601,9 +617,9 @@ extern void __cmpxchg_wrong_size(void) if (!IS_ENABLED(CONFIG_X86_64)) \ __cmpxchg_wrong_size(); \ \ - asm volatile("\t" ASM_STAC "\n" \ + asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ - "2:\t" ASM_CLAC "\n" \ + "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ @@ -618,6 +634,7 @@ extern void __cmpxchg_wrong_size(void) default: \ __cmpxchg_wrong_size(); \ } \ + __uaccess_end(); \ *__uval = __old; \ __ret; \ }) @@ -714,9 +731,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) * case, and do only runtime checking for non-constant sizes. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); n = _copy_from_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_from_user_overflow(); else __copy_from_user_overflow(sz, n); @@ -732,9 +750,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); /* See the comment in copy_from_user() above. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); n = _copy_to_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_to_user_overflow(); else __copy_to_user_overflow(sz, n); @@ -745,5 +764,30 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #undef __copy_from_user_overflow #undef __copy_to_user_overflow +/* + * The "unsafe" user accesses aren't really "unsafe", but the naming + * is a big fat warning: you have to not only do the access_ok() + * checking before using them, but you have to surround them with the + * user_access_begin/end() pair. + */ +#define user_access_begin() __uaccess_begin() +#define user_access_end() __uaccess_end() + +#define unsafe_put_user(x, ptr, err_label) \ +do { \ + int __pu_err; \ + __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ + if (unlikely(__pu_err)) goto err_label; \ +} while (0) + +#define unsafe_get_user(x, ptr, err_label) \ +do { \ + int __gu_err; \ + unsigned long __gu_val; \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (unlikely(__gu_err)) goto err_label; \ +} while (0) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index f5dcb5204dcd..2ffc9188cc70 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -43,6 +43,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); if (__builtin_constant_p(n)) { unsigned long ret; @@ -143,6 +144,7 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + check_object_size(to, n, false); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2f9b39b274a..2957c8237c28 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -53,38 +53,53 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) { int ret = 0; + check_object_size(dst, size, false); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { - case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src, + case 1: + __uaccess_begin(); + __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); + __uaccess_end(); return ret; - case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src, + case 2: + __uaccess_begin(); + __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); + __uaccess_end(); return ret; - case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src, + case 4: + __uaccess_begin(); + __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); + __uaccess_end(); return ret; - case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src, + case 8: + __uaccess_begin(); + __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); + __uaccess_end(); return ret; case 10: + __uaccess_begin(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); - if (unlikely(ret)) - return ret; - __get_user_asm(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); + if (likely(!ret)) + __get_user_asm(*(u16 *)(8 + (char *)dst), + (u16 __user *)(8 + (char __user *)src), + ret, "w", "w", "=r", 2); + __uaccess_end(); return ret; case 16: + __uaccess_begin(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); - if (unlikely(ret)) - return ret; - __get_user_asm(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); + if (likely(!ret)) + __get_user_asm(*(u64 *)(8 + (char *)dst), + (u64 __user *)(8 + (char __user *)src), + ret, "q", "", "=r", 8); + __uaccess_end(); return ret; default: return copy_user_generic(dst, (__force void *)src, size); @@ -103,38 +118,55 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) { int ret = 0; + check_object_size(src, size, true); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { - case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst, + case 1: + __uaccess_begin(); + __put_user_asm(*(u8 *)src, (u8 __user *)dst, ret, "b", "b", "iq", 1); + __uaccess_end(); return ret; - case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst, + case 2: + __uaccess_begin(); + __put_user_asm(*(u16 *)src, (u16 __user *)dst, ret, "w", "w", "ir", 2); + __uaccess_end(); return ret; - case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst, + case 4: + __uaccess_begin(); + __put_user_asm(*(u32 *)src, (u32 __user *)dst, ret, "l", "k", "ir", 4); + __uaccess_end(); return ret; - case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, + case 8: + __uaccess_begin(); + __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 8); + __uaccess_end(); return ret; case 10: + __uaccess_begin(); __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 10); - if (unlikely(ret)) - return ret; - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); + if (likely(!ret)) { + asm("":::"memory"); + __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, + ret, "w", "w", "ir", 2); + } + __uaccess_end(); return ret; case 16: + __uaccess_begin(); __put_user_asm(*(u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 16); - if (unlikely(ret)) - return ret; - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); + if (likely(!ret)) { + asm("":::"memory"); + __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, + ret, "q", "", "er", 8); + } + __uaccess_end(); return ret; default: return copy_user_generic((__force void *)dst, src, size); @@ -160,39 +192,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) switch (size) { case 1: { u8 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) __put_user_asm(tmp, (u8 __user *)dst, ret, "b", "b", "iq", 1); + __uaccess_end(); return ret; } case 2: { u16 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) __put_user_asm(tmp, (u16 __user *)dst, ret, "w", "w", "ir", 2); + __uaccess_end(); return ret; } case 4: { u32 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) __put_user_asm(tmp, (u32 __user *)dst, ret, "l", "k", "ir", 4); + __uaccess_end(); return ret; } case 8: { u64 tmp; + __uaccess_begin(); __get_user_asm(tmp, (u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) __put_user_asm(tmp, (u64 __user *)dst, ret, "q", "", "er", 8); + __uaccess_end(); return ret; } default: diff --git a/block/blk-core.c b/block/blk-core.c index f8e64cac981a..b20ada4ad68e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,6 +40,8 @@ #include "blk.h" #include "blk-mq.h" +#include + EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -3539,3 +3541,83 @@ int __init blk_dev_init(void) return 0; } + +/* + * Blk IO latency support. We want this to be as cheap as possible, so doing + * this lockless (and avoiding atomics), a few off by a few errors in this + * code is not harmful, and we don't want to do anything that is + * perf-impactful. + * TODO : If necessary, we can make the histograms per-cpu and aggregate + * them when printing them out. + */ +void +blk_zero_latency_hist(struct io_latency_state *s) +{ + memset(s->latency_y_axis_read, 0, + sizeof(s->latency_y_axis_read)); + memset(s->latency_y_axis_write, 0, + sizeof(s->latency_y_axis_write)); + s->latency_reads_elems = 0; + s->latency_writes_elems = 0; +} + +ssize_t +blk_latency_hist_show(struct io_latency_state *s, char *buf) +{ + int i; + int bytes_written = 0; + u_int64_t num_elem, elem; + int pct; + + num_elem = s->latency_reads_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Read Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + num_elem = s->latency_writes_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Write Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + return bytes_written; +} diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 4e941f00b600..317ef63ee789 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1403,11 +1403,14 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) * in order to avoid troubles during device release. */ kfree(priv->ctrl.fname); + priv->ctrl.fname = NULL; memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); if (p->fname) { priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); - if (priv->ctrl.fname == NULL) + if (priv->ctrl.fname == NULL) { rc = -ENOMEM; + goto unlock; + } } /* @@ -1439,6 +1442,7 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) } else priv->state = XC2028_WAITING_FIRMWARE; } +unlock: mutex_unlock(&priv->lock); return rc; diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 4df0c68e87e8..1689075e2229 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -183,6 +183,17 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) pr_debug("%s: %d bytes transferred: %d\n", mmc_hostname(host), mrq->data->bytes_xfered, mrq->data->error); + if (mrq->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + mrq->io_start); + blk_update_latency_hist(&host->io_lat_s, + (mrq->data->flags & MMC_DATA_READ), + delta_us); + } trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data); } @@ -627,6 +638,11 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host, } if (!err && areq) { + if (host->latency_hist_enabled) { + areq->mrq->io_start = ktime_get(); + areq->mrq->lat_hist_enabled = 1; + } else + areq->mrq->lat_hist_enabled = 0; trace_mmc_blk_rw_start(areq->mrq->cmd->opcode, areq->mrq->cmd->arg, areq->mrq->data); @@ -1964,7 +1980,7 @@ void mmc_init_erase(struct mmc_card *card) } static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, - unsigned int arg, unsigned int qty) + unsigned int arg, unsigned int qty) { unsigned int erase_timeout; @@ -2907,6 +2923,54 @@ static void __exit mmc_exit(void) destroy_workqueue(workqueue); } +static ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + return blk_latency_hist_show(&host->io_lat_s, buf); +} + +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&host->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + host->latency_hist_enabled = value; + return count; +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +void +mmc_latency_hist_sysfs_init(struct mmc_host *host) +{ + if (device_create_file(&host->class_dev, &dev_attr_latency_hist)) + dev_err(&host->class_dev, + "Failed to create latency_hist sysfs entry\n"); +} + +void +mmc_latency_hist_sysfs_exit(struct mmc_host *host) +{ + device_remove_file(&host->class_dev, &dev_attr_latency_hist); +} + subsys_initcall(mmc_init); module_exit(mmc_exit); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index fcf7829c759e..17068839c74b 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -32,8 +32,6 @@ #include "slot-gpio.h" #include "pwrseq.h" -#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) - static DEFINE_IDR(mmc_host_idr); static DEFINE_SPINLOCK(mmc_host_lock); @@ -394,6 +392,8 @@ int mmc_add_host(struct mmc_host *host) mmc_add_host_debugfs(host); #endif + mmc_latency_hist_sysfs_init(host); + mmc_start_host(host); if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) register_pm_notifier(&host->pm_notify); @@ -422,6 +422,8 @@ void mmc_remove_host(struct mmc_host *host) mmc_remove_host_debugfs(host); #endif + mmc_latency_hist_sysfs_exit(host); + device_del(&host->class_dev); led_trigger_unregister_simple(host->led); diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 992bf5397633..bf38533406fd 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -12,6 +12,8 @@ #define _MMC_CORE_HOST_H #include +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) + int mmc_register_host_class(void); void mmc_unregister_host_class(void); @@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host); void mmc_retune_release(struct mmc_host *host); int mmc_retune(struct mmc_host *host); +void mmc_latency_hist_sysfs_init(struct mmc_host *host); +void mmc_latency_hist_sysfs_exit(struct mmc_host *host); + #endif diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 85cd2564c157..4167bdbf0ecf 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -39,6 +39,7 @@ #include #include +#include #include "ufshcd.h" #include "unipro.h" @@ -1332,6 +1333,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) clear_bit_unlock(tag, &hba->lrb_in_use); goto out; } + + /* IO svc time latency histogram */ + if (hba != NULL && cmd->request != NULL) { + if (hba->latency_hist_enabled && + (cmd->request->cmd_type == REQ_TYPE_FS)) { + cmd->request->lat_hist_io_start = ktime_get(); + cmd->request->lat_hist_enabled = 1; + } else + cmd->request->lat_hist_enabled = 0; + } + WARN_ON(hba->clk_gating.state != CLKS_ON); lrbp = &hba->lrb[tag]; @@ -3160,6 +3172,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba) u32 tr_doorbell; int result; int index; + struct request *req; /* Resetting interrupt aggregation counters first and reading the * DOOR_BELL afterward allows us to handle all the completed requests. @@ -3184,6 +3197,22 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba) /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; clear_bit_unlock(index, &hba->lrb_in_use); + req = cmd->request; + if (req) { + /* Update IO svc time latency histogram */ + if (req->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + req->lat_hist_io_start); + /* rq_data_dir() => true if WRITE */ + blk_update_latency_hist(&hba->io_lat_s, + (rq_data_dir(req) == READ), + delta_us); + } + } /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); __ufshcd_release(hba); @@ -5327,6 +5356,54 @@ out: } EXPORT_SYMBOL(ufshcd_shutdown); +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&hba->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + hba->latency_hist_enabled = value; + return count; +} + +ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + return blk_latency_hist_show(&hba->io_lat_s, buf); +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +static void +ufshcd_init_latency_hist(struct ufs_hba *hba) +{ + if (device_create_file(hba->dev, &dev_attr_latency_hist)) + dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n"); +} + +static void +ufshcd_exit_latency_hist(struct ufs_hba *hba) +{ + device_create_file(hba->dev, &dev_attr_latency_hist); +} + /** * ufshcd_remove - de-allocate SCSI host and host memory space * data structure memory @@ -5342,6 +5419,7 @@ void ufshcd_remove(struct ufs_hba *hba) scsi_host_put(hba->host); ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); if (ufshcd_is_clkscaling_enabled(hba)) devfreq_remove_device(hba->devfreq); ufshcd_hba_exit(hba); @@ -5639,6 +5717,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Hold auto suspend until async scan completes */ pm_runtime_get_sync(dev); + ufshcd_init_latency_hist(hba); + /* * The device-initialize-sequence hasn't been invoked yet. * Set the device to power-off state @@ -5653,6 +5733,7 @@ out_remove_scsi_host: scsi_remove_host(hba->host); exit_gating: ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); out_disable: hba->is_irq_enabled = false; scsi_host_put(host); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 2570d9477b37..f3780cf7d895 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -532,6 +532,9 @@ struct ufs_hba { struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; bool is_sys_suspended; + + int latency_hist_enabled; + struct io_latency_state io_lat_s; }; /* Returns true if clocks can be gated. Otherwise false */ diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 629e3c865072..9bee25cfa0be 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -417,6 +417,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * they are not on hot paths so a little discipline won't do * any harm. * + * The line discipline-related tty_struct fields are reset to + * prevent the ldisc driver from re-using stale information for + * the new ldisc instance. + * * Locking: takes termios_rwsem */ @@ -425,6 +429,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) down_write(&tty->termios_rwsem); tty->termios.c_line = num; up_write(&tty->termios_rwsem); + + tty->disc_data = NULL; + tty->receive_room = 0; } /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 168755791ec8..c98bae90624c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -197,6 +197,9 @@ struct request { /* for bidi */ struct request *next_rq; + + ktime_t lat_hist_io_start; + int lat_hist_enabled; }; static inline unsigned short req_get_ioprio(struct request *req) @@ -1656,6 +1659,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, sector_t, void __pmem **addr, unsigned long *pfn, long size); + +/* + * X-axis for IO latency histogram support. + */ +static const u_int64_t latency_x_axis_us[] = { + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000, + 1200, + 1400, + 1600, + 1800, + 2000, + 2500, + 3000, + 4000, + 5000, + 6000, + 7000, + 9000, + 10000 +}; + +#define BLK_IO_LAT_HIST_DISABLE 0 +#define BLK_IO_LAT_HIST_ENABLE 1 +#define BLK_IO_LAT_HIST_ZERO 2 + +struct io_latency_state { + u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_reads_elems; + u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_writes_elems; +}; + +static inline void +blk_update_latency_hist(struct io_latency_state *s, + int read, + u_int64_t delta_us) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) { + if (delta_us < (u_int64_t)latency_x_axis_us[i]) { + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + break; + } + } + if (i == ARRAY_SIZE(latency_x_axis_us)) { + /* Overflowed the histogram */ + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + } + if (read) + s->latency_reads_elems++; + else + s->latency_writes_elems++; +} + +void blk_zero_latency_hist(struct io_latency_state *s); +ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf); + #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 37967b6da03c..3349f0676acb 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -136,6 +136,8 @@ struct mmc_request { struct completion completion; void (*done)(struct mmc_request *);/* completion function */ struct mmc_host *host; + ktime_t io_start; + int lat_hist_enabled; }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 40025b28c1fb..e4862f7cdede 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -379,6 +380,9 @@ struct mmc_host { } embedded_sdio_data; #endif + int latency_hist_enabled; + struct io_latency_state io_lat_s; + unsigned long private[0] ____cacheline_aligned; }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..bab4053fb795 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -65,8 +65,10 @@ enum { #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) +# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) #else # define is_migrate_cma(migratetype) false +# define is_migrate_cma_page(_page) false #endif #define for_each_migratetype_order(order, type) \ diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..4ef384b172e0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -144,6 +144,18 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page); +#else +static inline const char *__check_heap_object(const void *ptr, + unsigned long n, + struct page *page) +{ + return NULL; +} +#endif + /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33885118523c..f4e857e920cd 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -81,6 +81,7 @@ struct kmem_cache { int reserved; /* Reserved bytes at the end of slabs */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ + int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ff307b548ed3..0ae29ff9ccfd 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -145,6 +145,30 @@ static inline bool test_and_clear_restore_sigmask(void) #error "no set_restore_sigmask() provided and default one won't work" #endif +#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ + return 0; +} +#endif + +#ifdef CONFIG_HARDENED_USERCOPY +extern void __check_object_size(const void *ptr, unsigned long n, + bool to_user); + +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ + __check_object_size(ptr, n, to_user); +} +#else +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ } +#endif /* CONFIG_HARDENED_USERCOPY */ + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 558129af828a..f30c187ed785 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -111,4 +111,11 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define probe_kernel_address(addr, retval) \ probe_kernel_read(&retval, addr, sizeof(retval)) +#ifndef user_access_begin +#define user_access_begin() do { } while (0) +#define user_access_end() do { } while (0) +#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) +#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) +#endif + #endif /* __LINUX_UACCESS_H__ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b36cebad6b2f..6c480679423e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1513,6 +1513,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (sk->sk_send_head == skb_unlinked) sk->sk_send_head = NULL; + if (tcp_sk(sk)->highest_sack == skb_unlinked) + tcp_sk(sk)->highest_sack = NULL; } static inline void tcp_init_send_head(struct sock *sk) diff --git a/init/Kconfig b/init/Kconfig index 235c7a2c0d20..e1d1d6936f92 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1719,6 +1719,7 @@ choice config SLAB bool "SLAB" + select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work well in all environments. It organizes cache hot objects in @@ -1726,6 +1727,7 @@ config SLAB config SLUB bool "SLUB (Unqueued Allocator)" + select HAVE_HARDENED_USERCOPY_ALLOCATOR help SLUB is a slab allocator that minimizes cache line usage instead of managing queues of cached objects (SLAB approach). diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a65d63463420..e2e294d997e0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -98,6 +98,7 @@ struct cpuset { /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; + cpumask_var_t cpus_requested; nodemask_t mems_allowed; /* effective CPUs and Memory Nodes allow to tasks */ @@ -386,7 +387,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { - return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && + return cpumask_subset(p->cpus_requested, q->cpus_requested) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); @@ -486,7 +487,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && - cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) + cpumask_intersects(trial->cpus_requested, c->cpus_requested)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && @@ -945,17 +946,18 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs->cpus_allowed); + retval = cpulist_parse(buf, trialcs->cpus_requested); if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, - top_cpuset.cpus_allowed)) + if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) return -EINVAL; + + cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); } /* Nothing to do if the cpus didn't change */ - if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) + if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested)) return 0; retval = validate_change(cs, trialcs); @@ -964,6 +966,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); + cpumask_copy(cs->cpus_requested, trialcs->cpus_requested); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ @@ -1754,7 +1757,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) switch (type) { case FILE_CPULIST: - seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); @@ -1943,11 +1946,14 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; + if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) + goto free_allowed; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) - goto free_cpus; + goto free_requested; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); + cpumask_clear(cs->cpus_requested); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); @@ -1956,7 +1962,9 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return &cs->css; -free_cpus: +free_requested: + free_cpumask_var(cs->cpus_requested); +free_allowed: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); @@ -2019,6 +2027,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->cpus_requested, parent->cpus_requested); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: @@ -2053,6 +2062,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); + free_cpumask_var(cs->cpus_requested); kfree(cs); } @@ -2120,8 +2130,11 @@ int __init cpuset_init(void) BUG(); if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); + cpumask_setall(top_cpuset.cpus_requested); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); @@ -2255,7 +2268,7 @@ retry: goto retry; } - cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index e358313a0d6c..b49cf3ac2d47 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -68,6 +68,7 @@ void rcu_sync_lockdep_assert(struct rcu_sync *rsp) RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(), "suspicious rcu_sync_is_idle() usage"); } +EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert); #endif /** diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index e0af6ff73d14..5a003a2ebd96 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -39,8 +39,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) - break; + unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); + *(unsigned long *)(dst+res) = c; if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); @@ -55,8 +55,7 @@ byte_at_a_time: while (max) { char c; - if (unlikely(__get_user(c,src+res))) - return -EFAULT; + unsafe_get_user(c,src+res, efault); dst[res] = c; if (!c) return res; @@ -75,6 +74,7 @@ byte_at_a_time: * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's an EFAULT. */ +efault: return -EFAULT; } @@ -107,7 +107,12 @@ long strncpy_from_user(char *dst, const char __user *src, long count) src_addr = (unsigned long)src; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strncpy_from_user(dst, src, count, max); + long retval; + + user_access_begin(); + retval = do_strncpy_from_user(dst, src, count, max); + user_access_end(); + return retval; } return -EFAULT; } diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 3a5f2b366d84..8e105ed4df12 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, src -= align; max += align; - if (unlikely(__get_user(c,(unsigned long __user *)src))) - return 0; + unsafe_get_user(c, (unsigned long __user *)src, efault); c |= aligned_byte_mask(align); for (;;) { @@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) - return 0; + unsafe_get_user(c, (unsigned long __user *)(src+res), efault); } res -= align; @@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's 0. */ +efault: return 0; } @@ -112,7 +111,12 @@ long strnlen_user(const char __user *str, long count) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, count, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, count, max); + user_access_end(); + return retval; } return 0; } @@ -141,7 +145,12 @@ long strlen_user(const char __user *str) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, ~0ul, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, ~0ul, max); + user_access_end(); + return retval; } return 0; } diff --git a/mm/Makefile b/mm/Makefile index 2ed43191fc3b..8b532c94008f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,6 +5,9 @@ KASAN_SANITIZE_slab_common.o := n KASAN_SANITIZE_slub.o := n +# Since __builtin_frame_address does work as used, disable the warning. +CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address) + mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ @@ -81,3 +84,4 @@ obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o +obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o diff --git a/mm/slab.c b/mm/slab.c index 4765c97ce690..24a615d42d74 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4228,6 +4228,36 @@ static int __init slab_proc_init(void) module_init(slab_proc_init); #endif +#ifdef CONFIG_HARDENED_USERCOPY +/* + * Rejects objects that are incorrectly sized. + * + * Returns NULL if check passes, otherwise const char * to name of cache + * to indicate an error. + */ +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page) +{ + struct kmem_cache *cachep; + unsigned int objnr; + unsigned long offset; + + /* Find and validate object. */ + cachep = page->slab_cache; + objnr = obj_to_index(cachep, page, (void *)ptr); + BUG_ON(objnr >= cachep->num); + + /* Find offset within object. */ + offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); + + /* Allow address range falling entirely within object size. */ + if (offset <= cachep->object_size && n <= cachep->object_size - offset) + return NULL; + + return cachep->name; +} +#endif /* CONFIG_HARDENED_USERCOPY */ + /** * ksize - get the actual amount of memory allocated for a given object * @objp: Pointer to the object diff --git a/mm/slub.c b/mm/slub.c index 65d5f92d51d2..41f7cae64a49 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline void *fixup_red_left(struct kmem_cache *s, void *p) +{ + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) + p += s->red_left_pad; + + return p; +} + static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL @@ -224,24 +232,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) * Core slab cache functions *******************************************************************/ -/* Verify that a pointer has an address that is valid within a slab page */ -static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, const void *object) -{ - void *base; - - if (!object) - return 1; - - base = page_address(page); - if (object < base || object >= base + page->objects * s->size || - (object - base) % s->size) { - return 0; - } - - return 1; -} - static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -271,12 +261,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ - for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ - __p += (__s)->size) + for (__p = fixup_red_left(__s, __addr); \ + __p < (__addr) + (__objects) * (__s)->size; \ + __p += (__s)->size) #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ - for (__p = (__addr), __idx = 1; __idx <= __objects;\ - __p += (__s)->size, __idx++) + for (__p = fixup_red_left(__s, __addr), __idx = 1; \ + __idx <= __objects; \ + __p += (__s)->size, __idx++) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -456,6 +448,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) set_bit(slab_index(p, s, addr), map); } +static inline int size_from_object(struct kmem_cache *s) +{ + if (s->flags & SLAB_RED_ZONE) + return s->size - s->red_left_pad; + + return s->size; +} + +static inline void *restore_red_left(struct kmem_cache *s, void *p) +{ + if (s->flags & SLAB_RED_ZONE) + p -= s->red_left_pad; + + return p; +} + /* * Debug settings: */ @@ -489,6 +497,26 @@ static inline void metadata_access_disable(void) /* * Object debugging */ + +/* Verify that a pointer has an address that is valid within a slab page */ +static inline int check_valid_pointer(struct kmem_cache *s, + struct page *page, void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + object = restore_red_left(s, object); + if (object < base || object >= base + page->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + static void print_section(char *text, u8 *addr, unsigned int length) { metadata_access_enable(); @@ -628,7 +656,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); - if (p > addr + 16) + if (s->flags & SLAB_RED_ZONE) + print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); + else if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); print_section("Object ", p, min_t(unsigned long, s->object_size, @@ -645,9 +675,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); - if (off != s->size) + if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section("Padding ", p + off, s->size - off); + print_section("Padding ", p + off, size_from_object(s) - off); dump_stack(); } @@ -677,6 +707,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = object; + if (s->flags & SLAB_RED_ZONE) + memset(p - s->red_left_pad, val, s->red_left_pad); + if (s->flags & __OBJECT_POISON) { memset(p, POISON_FREE, s->object_size - 1); p[s->object_size - 1] = POISON_END; @@ -769,11 +802,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) /* We also have user information there */ off += 2 * sizeof(struct track); - if (s->size == off) + if (size_from_object(s) == off) return 1; return check_bytes_and_report(s, page, p, "Object padding", - p + off, POISON_INUSE, s->size - off); + p + off, POISON_INUSE, size_from_object(s) - off); } /* Check the pad bytes at the end of a slab page */ @@ -817,6 +850,10 @@ static int check_object(struct kmem_cache *s, struct page *page, u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { + if (!check_bytes_and_report(s, page, object, "Redzone", + object - s->red_left_pad, val, s->red_left_pad)) + return 0; + if (!check_bytes_and_report(s, page, object, "Redzone", endobject, val, s->inuse - s->object_size)) return 0; @@ -1468,7 +1505,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, p, NULL); } - page->freelist = start; + page->freelist = fixup_red_left(s, start); page->inuse = page->objects; page->frozen = 1; @@ -3283,7 +3320,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ size += 2 * sizeof(struct track); - if (flags & SLAB_RED_ZONE) + if (flags & SLAB_RED_ZONE) { /* * Add some empty padding so that we can catch * overwrites from earlier objects rather than let @@ -3292,6 +3329,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * of the object. */ size += sizeof(void *); + + s->red_left_pad = sizeof(void *); + s->red_left_pad = ALIGN(s->red_left_pad, s->align); + size += s->red_left_pad; + } #endif /* @@ -3585,6 +3627,46 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_node); #endif +#ifdef CONFIG_HARDENED_USERCOPY +/* + * Rejects objects that are incorrectly sized. + * + * Returns NULL if check passes, otherwise const char * to name of cache + * to indicate an error. + */ +const char *__check_heap_object(const void *ptr, unsigned long n, + struct page *page) +{ + struct kmem_cache *s; + unsigned long offset; + size_t object_size; + + /* Find object and usable object size. */ + s = page->slab_cache; + object_size = slab_ksize(s); + + /* Reject impossible pointers. */ + if (ptr < page_address(page)) + return s->name; + + /* Find offset within object. */ + offset = (ptr - page_address(page)) % s->size; + + /* Adjust for redzone and reject if within the redzone. */ + if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { + if (offset < s->red_left_pad) + return s->name; + offset -= s->red_left_pad; + } + + /* Allow address range falling entirely within object size. */ + if (offset <= object_size && n <= object_size - offset) + return NULL; + + return s->name; +} +#endif /* CONFIG_HARDENED_USERCOPY */ + static size_t __ksize(const void *object) { struct page *page; diff --git a/mm/usercopy.c b/mm/usercopy.c new file mode 100644 index 000000000000..f78015e8b1e5 --- /dev/null +++ b/mm/usercopy.c @@ -0,0 +1,269 @@ +/* + * This implements the various checks for CONFIG_HARDENED_USERCOPY*, + * which are designed to protect kernel memory from needless exposure + * and overwrite under many unintended conditions. This code is based + * on PAX_USERCOPY, which is: + * + * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source + * Security Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +enum { + BAD_STACK = -1, + NOT_STACK = 0, + GOOD_FRAME, + GOOD_STACK, +}; + +/* + * Checks if a given pointer and length is contained by the current + * stack frame (if possible). + * + * Returns: + * NOT_STACK: not at all on the stack + * GOOD_FRAME: fully within a valid stack frame + * GOOD_STACK: fully on the stack (when can't do frame-checking) + * BAD_STACK: error condition (invalid stack position or bad stack frame) + */ +static noinline int check_stack_object(const void *obj, unsigned long len) +{ + const void * const stack = task_stack_page(current); + const void * const stackend = stack + THREAD_SIZE; + int ret; + + /* Object is not on the stack at all. */ + if (obj + len <= stack || stackend <= obj) + return NOT_STACK; + + /* + * Reject: object partially overlaps the stack (passing the + * the check above means at least one end is within the stack, + * so if this check fails, the other end is outside the stack). + */ + if (obj < stack || stackend < obj + len) + return BAD_STACK; + + /* Check if object is safely within a valid frame. */ + ret = arch_within_stack_frames(stack, stackend, obj, len); + if (ret) + return ret; + + return GOOD_STACK; +} + +static void report_usercopy(const void *ptr, unsigned long len, + bool to_user, const char *type) +{ + pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", ptr, type ? : "unknown", len); + /* + * For greater effect, it would be nice to do do_group_exit(), + * but BUG() actually hooks all the lock-breaking and per-arch + * Oops code, so that is used here instead. + */ + BUG(); +} + +/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ +static bool overlaps(const void *ptr, unsigned long n, unsigned long low, + unsigned long high) +{ + unsigned long check_low = (uintptr_t)ptr; + unsigned long check_high = check_low + n; + + /* Does not overlap if entirely above or entirely below. */ + if (check_low >= high || check_high <= low) + return false; + + return true; +} + +/* Is this address range in the kernel text area? */ +static inline const char *check_kernel_text_object(const void *ptr, + unsigned long n) +{ + unsigned long textlow = (unsigned long)_stext; + unsigned long texthigh = (unsigned long)_etext; + unsigned long textlow_linear, texthigh_linear; + + if (overlaps(ptr, n, textlow, texthigh)) + return ""; + + /* + * Some architectures have virtual memory mappings with a secondary + * mapping of the kernel text, i.e. there is more than one virtual + * kernel address that points to the kernel image. It is usually + * when there is a separate linear physical memory mapping, in that + * __pa() is not just the reverse of __va(). This can be detected + * and checked: + */ + textlow_linear = (unsigned long)__va(__pa(textlow)); + /* No different mapping: we're done. */ + if (textlow_linear == textlow) + return NULL; + + /* Check the secondary mapping... */ + texthigh_linear = (unsigned long)__va(__pa(texthigh)); + if (overlaps(ptr, n, textlow_linear, texthigh_linear)) + return ""; + + return NULL; +} + +static inline const char *check_bogus_address(const void *ptr, unsigned long n) +{ + /* Reject if object wraps past end of memory. */ + if ((unsigned long)ptr + n < (unsigned long)ptr) + return ""; + + /* Reject if NULL or ZERO-allocation. */ + if (ZERO_OR_NULL_PTR(ptr)) + return ""; + + return NULL; +} + +static inline const char *check_heap_object(const void *ptr, unsigned long n, + bool to_user) +{ + struct page *page, *endpage; + const void *end = ptr + n - 1; + bool is_reserved, is_cma; + + /* + * Some architectures (arm64) return true for virt_addr_valid() on + * vmalloced addresses. Work around this by checking for vmalloc + * first. + */ + if (is_vmalloc_addr(ptr)) + return NULL; + + if (!virt_addr_valid(ptr)) + return NULL; + + page = virt_to_head_page(ptr); + + /* Check slab allocator for flags and size. */ + if (PageSlab(page)) + return __check_heap_object(ptr, n, page); + + /* + * Sometimes the kernel data regions are not marked Reserved (see + * check below). And sometimes [_sdata,_edata) does not cover + * rodata and/or bss, so check each range explicitly. + */ + + /* Allow reads of kernel rodata region (if not marked as Reserved). */ + if (ptr >= (const void *)__start_rodata && + end <= (const void *)__end_rodata) { + if (!to_user) + return ""; + return NULL; + } + + /* Allow kernel data region (if not marked as Reserved). */ + if (ptr >= (const void *)_sdata && end <= (const void *)_edata) + return NULL; + + /* Allow kernel bss region (if not marked as Reserved). */ + if (ptr >= (const void *)__bss_start && + end <= (const void *)__bss_stop) + return NULL; + + /* Is the object wholly within one base page? */ + if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == + ((unsigned long)end & (unsigned long)PAGE_MASK))) + return NULL; + + /* Allow if start and end are inside the same compound page. */ + endpage = virt_to_head_page(end); + if (likely(endpage == page)) + return NULL; + + /* + * Reject if range is entirely either Reserved (i.e. special or + * device memory), or CMA. Otherwise, reject since the object spans + * several independently allocated pages. + */ + is_reserved = PageReserved(page); + is_cma = is_migrate_cma_page(page); + if (!is_reserved && !is_cma) + goto reject; + + for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { + page = virt_to_head_page(ptr); + if (is_reserved && !PageReserved(page)) + goto reject; + if (is_cma && !is_migrate_cma_page(page)) + goto reject; + } + + return NULL; + +reject: + return ""; +} + +/* + * Validates that the given object is: + * - not bogus address + * - known-safe heap or stack object + * - not in kernel text + */ +void __check_object_size(const void *ptr, unsigned long n, bool to_user) +{ + const char *err; + + /* Skip all tests if size is zero. */ + if (!n) + return; + + /* Check for invalid addresses. */ + err = check_bogus_address(ptr, n); + if (err) + goto report; + + /* Check for bad heap object. */ + err = check_heap_object(ptr, n, to_user); + if (err) + goto report; + + /* Check for bad stack object. */ + switch (check_stack_object(ptr, n)) { + case NOT_STACK: + /* Object is not touching the current process stack. */ + break; + case GOOD_FRAME: + case GOOD_STACK: + /* + * Object is either in the correct frame (when it + * is possible to check) or just generally on the + * process stack (when frame checking not available). + */ + return; + default: + err = ""; + goto report; + } + + /* Check for object in kernel to avoid text exposure. */ + err = check_kernel_text_object(ptr, n); + if (!err) + return; + +report: + report_usercopy(ptr, n, to_user, err); +} +EXPORT_SYMBOL(__check_object_size); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 77afe913d03d..9adedba78eea 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -326,10 +326,12 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || - skb->len < nlh->nlmsg_len) { - err = -EINVAL; - goto ack; + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len || + nlmsg_len(nlh) < sizeof(struct nfgenmsg)) { + nfnl_err_reset(&err_list); + status |= NFNL_BATCH_FAILURE; + goto done; } /* Only requests are handled by the kernel */ diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e2e7d54f9bb1..3bf0c59dab2f 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1946,7 +1946,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) ); f_count = atomic_long_read( &sock_tag_entry->socket->file->f_count); - seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u " + seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u " "f_count=%lu\n", sock_tag_entry->sk, sock_tag_entry->tag, uid, @@ -2548,8 +2548,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, uid_t stat_uid = get_uid_from_tag(tag); struct proc_print_info *ppi = m->private; /* Detailed tags are not available to everybody */ - if (get_atag_from_tag(tag) && !can_read_other_uid_stats( - make_kuid(&init_user_ns,stat_uid))) { + if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u stats.gid=%u\n", diff --git a/security/Kconfig b/security/Kconfig index 30a2603e8c85..2b42c225de28 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -127,6 +127,35 @@ config LSM_MMAP_MIN_ADDR this low address space will need the permission specific to the systems running LSM. +config HAVE_HARDENED_USERCOPY_ALLOCATOR + bool + help + The heap allocator implements __check_heap_object() for + validating memory ranges against heap object sizes in + support of CONFIG_HARDENED_USERCOPY. + +config HAVE_ARCH_HARDENED_USERCOPY + bool + help + The architecture supports CONFIG_HARDENED_USERCOPY by + calling check_object_size() just before performing the + userspace copies in the low level implementation of + copy_to_user() and copy_from_user(). + +config HARDENED_USERCOPY + bool "Harden memory copies between kernel and userspace" + depends on HAVE_ARCH_HARDENED_USERCOPY + depends on HAVE_HARDENED_USERCOPY_ALLOCATOR + select BUG + help + This option checks for obviously wrong memory regions when + copying memory to/from the kernel (via copy_to_user() and + copy_from_user() functions) by rejecting memory ranges that + are larger than the specified heap object, span multiple + separately allocates pages, are not on the process stack, + or are part of the kernel text. This kills entire classes + of heap overflow exploits and similar kernel memory exposures. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig