Merge "Merge android-4.4.110 (5cc8c2e) into msm-4.4" · 54b0b96ecd - evie/android_kernel_oneplus_msm8998 - Gay Catgirls Forgejo: gay catgirls having sex

evie/android_kernel_oneplus_msm8998

Merge "Merge android-4.4.110 (`5cc8c2e`) into msm-4.4"

This commit is contained in:

Linux Build Service Account

2018-01-19 09:30:28 -08:00

• committed by

Gerrit - the friendly Code Review server

parent 188c08e1bf de3efc405c

commit 54b0b96ecd

486 changed files with 6447 additions and 2136 deletions

5

Documentation/kasan.txt

View file

 @ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
 therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
 required for detection of out-of-bounds accesses to stack or global variables.
 Currently KASAN is supported only for x86_64 architecture and requires the
 kernel to be built with the SLUB allocator.
 Currently KASAN is supported only for x86_64 architecture.
 . Usage
 ========
 @ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
 the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
 version 5.0 or later.
 Currently KASAN works only with the SLUB memory allocator.
 KASAN works with both SLUB and SLAB memory allocators.
 For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
 To disable instrumentation for specific files or directories, add a line

111

Documentation/kcov.txt Normal file

View file

 @ -0,0 +1,111 @@
 kcov: code coverage for fuzzing
 ===============================
 kcov exposes kernel code coverage information in a form suitable for coverage-
 guided fuzzing (randomized testing). Coverage data of a running kernel is
 exported via the "kcov" debugfs file. Coverage collection is enabled on a task
 basis, and thus it can capture precise coverage of a single system call.
 Note that kcov does not aim to collect as much coverage as possible. It aims
 to collect more or less stable coverage that is function of syscall inputs.
 To achieve this goal it does not collect coverage in soft/hard interrupts
 and instrumentation of some inherently non-deterministic parts of kernel is
 disbled (e.g. scheduler, locking).
 Usage:
 ======
 Configure kernel with:
         CONFIG_KCOV=y
 CONFIG_KCOV requires gcc built on revision 231296 or later.
 Profiling data will only become accessible once debugfs has been mounted:
         mount -t debugfs none /sys/kernel/debug
 The following program demonstrates kcov usage from within a test program:
 #include <stdio.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
 #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
 #define KCOV_ENABLE			_IO('c', 100)
 #define KCOV_DISABLE			_IO('c', 101)
 #define COVER_SIZE			(64<<10)
 int main(int argc, char **argv)
 {
 	int fd;
 	unsigned long *cover, n, i;
 	/* A single fd descriptor allows coverage collection on a single
 	 * thread.
 	 */
 	fd = open("/sys/kernel/debug/kcov", O_RDWR);
 	if (fd == -1)
 		perror("open"), exit(1);
 	/* Setup trace mode and trace size. */
 	if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
 		perror("ioctl"), exit(1);
 	/* Mmap buffer shared between kernel- and user-space. */
 	cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
 				     PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	if ((void*)cover == MAP_FAILED)
 		perror("mmap"), exit(1);
 	/* Enable coverage collection on the current thread. */
 	if (ioctl(fd, KCOV_ENABLE, 0))
 		perror("ioctl"), exit(1);
 	/* Reset coverage from the tail of the ioctl() call. */
 	__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
 	/* That's the target syscal call. */
 	read(-1, NULL, 0);
 	/* Read number of PCs collected. */
 	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
 	for (i = 0; i < n; i++)
 		printf("0x%lx\n", cover[i + 1]);
 	/* Disable coverage collection for the current thread. After this call
 	 * coverage can be enabled for a different thread.
 	 */
 	if (ioctl(fd, KCOV_DISABLE, 0))
 		perror("ioctl"), exit(1);
 	/* Free resources. */
 	if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
 		perror("munmap"), exit(1);
 	if (close(fd))
 		perror("close"), exit(1);
 	return 0;
 }
 After piping through addr2line output of the program looks as follows:
 SyS_read
 fs/read_write.c:562
 __fdget_pos
 fs/file.c:774
 __fget_light
 fs/file.c:746
 __fget_light
 fs/file.c:750
 __fget_light
 fs/file.c:760
 __fdget_pos
 fs/file.c:784
 SyS_read
 fs/read_write.c:562
 If a program needs to collect coverage from several threads (independently),
 it needs to open /sys/kernel/debug/kcov in each thread separately.
 The interface is fine-grained to allow efficient forking of test processes.
 That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
 mmaps coverage buffer and then forks child processes in a loop. Child processes
 only need to enable coverage (disable happens automatically on thread end).

12

Documentation/kernel-parameters.txt

View file

 @ -2535,8 +2535,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nointroute	[IA-64]
 	noinvpcid	[X86] Disable the INVPCID cpu feature.
 	nojitter	[IA-64] Disables jitter checking for ITC timers.
 	nopti		[X86-64] Disable KAISER isolation of kernel from user.
 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
 	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
 @ -2569,6 +2573,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nopat		[X86] Disable PAT (page attribute table extension of
 			pagetables) support.
 	nopcid		[X86-64] Disable the PCID cpu feature.
 	norandmaps	Don't use address space randomization.  Equivalent to
 			echo 0 > /proc/sys/kernel/randomize_va_space
 @ -3071,6 +3077,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	pt.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
 	pti=		[X86_64]
 			Control KAISER user/kernel address space isolation:
 			on - enable
 			off - disable
 			auto - default setting
 	pty.legacy_count=
 			[KNL] Number of legacy pty's. Overwrites compiled-in
 			default number.

14

MAINTAINERS

View file

 @ -6002,6 +6002,20 @@ S:	Maintained
 F:	Documentation/hwmon/k8temp
 F:	drivers/hwmon/k8temp.c
 KASAN
 M:	Andrey Ryabinin <aryabinin@virtuozzo.com>
 R:	Alexander Potapenko <glider@google.com>
 R:	Dmitry Vyukov <dvyukov@google.com>
 L:	kasan-dev@googlegroups.com
 S:	Maintained
 F:	arch/*/include/asm/kasan.h
 F:	arch/*/mm/kasan_init*
 F:	Documentation/kasan.txt
 F:	include/linux/kasan*.h
 F:	lib/test_kasan.c
 F:	mm/kasan/
 F:	scripts/Makefile.kasan
 KCONFIG
 M:	"Yann E. MORIN" <yann.morin.1998@free.fr>
 L:	linux-kbuild@vger.kernel.org

									
										16

Makefile
									
										View file
										
				@ -1,6 +1,6 @@

				VERSION = 4

				PATCHLEVEL = 4

				SUBLEVEL = 105

				SUBLEVEL = 110

				EXTRAVERSION =

				NAME = Blurry Fish Butt

				@ -373,6 +373,7 @@ LDFLAGS_MODULE  =

				CFLAGS_KERNEL	=

				AFLAGS_KERNEL	=

				CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im

				CFLAGS_KCOV	= -fsanitize-coverage=trace-pc

				# Use USERINCLUDE when you must reference the UAPI directories only.

				@ -420,7 +421,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE

				export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS

				export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS

				export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN

				export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN

				export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE

				export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE

				export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL

				@ -697,6 +698,14 @@ endif

				endif

				KBUILD_CFLAGS += $(stackp-flag)

				ifdef CONFIG_KCOV

				  ifeq ($(call cc-option, $(CFLAGS_KCOV)),)

				    $(warning Cannot use CONFIG_KCOV: \

				             -fsanitize-coverage=trace-pc is not supported by compiler)

				    CFLAGS_KCOV =

				  endif

				endif

				ifeq ($(cc-name),clang)

				ifneq ($(CROSS_COMPILE),)

				CLANG_TRIPLE    ?= $(CROSS_COMPILE)

				@ -799,6 +808,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)

				# disable invalid "can't wrap" optimizations for signed / pointers

				KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)

				# Make sure -fstack-check isn't enabled (like gentoo apparently did)

				KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)

				# conserve stack if available

				KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)

									
										1

arch/alpha/include/asm/mmu_context.h
									
										View file
										
				@ -7,6 +7,7 @@

				 * Copyright (C) 1996, Linus Torvalds

				 */

				#include <linux/sched.h>

				#include <asm/machvec.h>

				#include <asm/compiler.h>

				#include <asm-generic/mm_hooks.h>

1

arch/arm/boot/dts/am335x-evmsk.dts

View file

 @ -668,6 +668,7 @@
 	ti,non-removable;
 	bus-width = <4>;
 	cap-power-off-card;
 	keep-power-in-suspend;
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc2_pins>;

2

arch/arm/boot/dts/dra7.dtsi

View file

 @ -227,6 +227,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 x82000000 0 0x20013000 0x13000 0 0xffed000>;
 				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				ti,hwmods = "pcie1";
 @ -262,6 +263,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 x82000000 0 0x30013000 0x13000 0 0xffed000>;
 				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				ti,hwmods = "pcie2";

									
										18

arch/arm/include/asm/assembler.h
									
										View file
										
				@ -512,4 +512,22 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)

				#endif

					.endm

					.macro	bug, msg, line

				#ifdef CONFIG_THUMB2_KERNEL

				1:	.inst	0xde02

				#else

				1:	.inst	0xe7f001f2

				#endif

				#ifdef CONFIG_DEBUG_BUGVERBOSE

					.pushsection .rodata.str, "aMS", %progbits, 1

				2:	.asciz	"\msg"

					.popsection

					.pushsection __bug_table, "aw"

					.align	2

					.word	1b, 2b

					.hword	\line

					.popsection

				#endif

					.endm

				#endif /* __ASM_ASSEMBLER_H__ */

									
										2

arch/arm/include/asm/exception.h
									
										View file
										
				@ -7,7 +7,7 @@

				#ifndef __ASM_ARM_EXCEPTION_H

				#define __ASM_ARM_EXCEPTION_H

				#include <linux/ftrace.h>

				#include <linux/interrupt.h>

				#define __exception	__attribute__((section(".exception.text")))

				#ifdef CONFIG_FUNCTION_GRAPH_TRACER

									
										4

arch/arm/include/asm/kvm_arm.h
									
										View file
										
				@ -161,8 +161,7 @@

				#else

				#define VTTBR_X		(5 - KVM_T0SZ)

				#endif

				#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)

				#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)

				#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)

				#define VTTBR_VMID_SHIFT  _AC(48, ULL)

				#define VTTBR_VMID_MASK(size)	(_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)

				@ -209,6 +208,7 @@

				#define HSR_EC_IABT_HYP	(0x21)

				#define HSR_EC_DABT	(0x24)

				#define HSR_EC_DABT_HYP	(0x25)

				#define HSR_EC_MAX	(0x3f)

				#define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)

									
										2

arch/arm/include/asm/mmu_context.h
									
										View file
										
				@ -61,6 +61,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,

						cpu_switch_mm(mm->pgd, mm);

				}

				#ifndef MODULE

				#define finish_arch_post_lock_switch \

					finish_arch_post_lock_switch

				static inline void finish_arch_post_lock_switch(void)

				@ -82,6 +83,7 @@ static inline void finish_arch_post_lock_switch(void)

						preempt_enable_no_resched();

					}

				}

				#endif /* !MODULE */

				#endif	/* CONFIG_MMU */

									
										7

arch/arm/include/asm/traps.h
									
										View file
										
				@ -18,7 +18,6 @@ struct undef_hook {

				void register_undef_hook(struct undef_hook *hook);

				void unregister_undef_hook(struct undef_hook *hook);

				#ifdef CONFIG_FUNCTION_GRAPH_TRACER

				static inline int __in_irqentry_text(unsigned long ptr)

				{

					extern char __irqentry_text_start[];

				@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)

					return ptr >= (unsigned long)&__irqentry_text_start &&

					       ptr < (unsigned long)&__irqentry_text_end;

				}

				#else

				static inline int __in_irqentry_text(unsigned long ptr)

				{

					return 0;

				}

				#endif

				static inline int in_exception_text(unsigned long ptr)

				{

									
										6

arch/arm/kernel/entry-header.S
									
										View file
										
				@ -295,6 +295,8 @@

					mov	r2, sp

					ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr

					ldr	lr, [r2, #\offset + S_PC]!	@ get pc

					tst	r1, #PSR_I_BIT | 0x0f

					bne	1f

					msr	spsr_cxsf, r1			@ save in spsr_svc

				#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)

					@ We must avoid clrex due to Cortex-A15 erratum #830321

				@ -309,6 +311,7 @@

										@ after ldm {}^

					add	sp, sp, #\offset + S_FRAME_SIZE

					movs	pc, lr				@ return & move spsr_svc into cpsr

				1:	bug	"Returning to usermode but unexpected PSR bits set?", \@

				#elif defined(CONFIG_CPU_V7M)

					@ V7M restore.

					@ Note that we don't need to do clrex here as clearing the local

				@ -324,6 +327,8 @@

					ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr

					ldr	lr, [sp, #\offset + S_PC]	@ get pc

					add	sp, sp, #\offset + S_SP

					tst	r1, #PSR_I_BIT | 0x0f

					bne	1f

					msr	spsr_cxsf, r1			@ save in spsr_svc

					@ We must avoid clrex due to Cortex-A15 erratum #830321

				@ -336,6 +341,7 @@

					.endif

					add	sp, sp, #S_FRAME_SIZE - S_SP

					movs	pc, lr				@ return & move spsr_svc into cpsr

				1:	bug	"Returning to usermode but unexpected PSR bits set?", \@

				#endif	/* !CONFIG_THUMB2_KERNEL */

					.endm

									
										1

arch/arm/kernel/vmlinux.lds.S
									
										View file
										
				@ -105,6 +105,7 @@ SECTIONS

							*(.exception.text)

							__exception_text_end = .;

							IRQENTRY_TEXT

							SOFTIRQENTRY_TEXT

							TEXT_TEXT

							SCHED_TEXT

							LOCK_TEXT

									
										19

arch/arm/kvm/handle_exit.c
									
										View file
										
				@ -100,7 +100,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)

					return 1;

				}

				static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)

				{

					u32 hsr = kvm_vcpu_get_hsr(vcpu);

					kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",

						      hsr);

					kvm_inject_undefined(vcpu);

					return 1;

				}

				static exit_handle_fn arm_exit_handlers[] = {

					[0 ... HSR_EC_MAX]	= kvm_handle_unknown_ec,

					[HSR_EC_WFI]		= kvm_handle_wfx,

					[HSR_EC_CP15_32]	= kvm_handle_cp15_32,

					[HSR_EC_CP15_64]	= kvm_handle_cp15_64,

				@ -122,13 +134,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)

				{

					u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);

					if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||

					    !arm_exit_handlers[hsr_ec]) {

						kvm_err("Unknown exception class: hsr: %#08x\n",

							(unsigned int)kvm_vcpu_get_hsr(vcpu));

						BUG();

					}

					return arm_exit_handlers[hsr_ec];

				}

									
										10

arch/arm/mach-omap2/gpmc-onenand.c
									
										View file
										
				@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)

					return ret;

				}

				void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)

				int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)

				{

					int err;

					struct device *dev = &gpmc_onenand_device.dev;

				@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)

					if (err < 0) {

						dev_err(dev, "Cannot request GPMC CS %d, error %d\n",

							gpmc_onenand_data->cs, err);

						return;

						return err;

					}

					gpmc_onenand_resource.end = gpmc_onenand_resource.start +

											ONENAND_IO_SIZE - 1;

					if (platform_device_register(&gpmc_onenand_device) < 0) {

					err = platform_device_register(&gpmc_onenand_device);

					if (err) {

						dev_err(dev, "Unable to register OneNAND device\n");

						gpmc_cs_free(gpmc_onenand_data->cs);

						return;

					}

					return err;

				}

									
										25

arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
									
										View file
										
				@ -3885,16 +3885,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {

				 * Return: 0 if device named @dev_name is not likely to be accessible,

				 * or 1 if it is likely to be accessible.

				 */

				static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,

										       const char *dev_name)

				static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,

											const char *dev_name)

				{

					struct device_node *node;

					bool available;

					if (!bus)

						return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;

						return omap_type() == OMAP2_DEVICE_TYPE_GP;

					if (of_device_is_available(of_find_node_by_name(bus, dev_name)))

						return 1;

					node = of_get_child_by_name(bus, dev_name);

					available = of_device_is_available(node);

					of_node_put(node);

					return 0;

					return available;

				}

				int __init omap3xxx_hwmod_init(void)

				@ -3963,15 +3967,20 @@ int __init omap3xxx_hwmod_init(void)

					if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {

						r = omap_hwmod_register_links(h_sham);

						if (r < 0)

						if (r < 0) {

							of_node_put(bus);

							return r;

						}

					}

					if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {

						r = omap_hwmod_register_links(h_aes);

						if (r < 0)

						if (r < 0) {

							of_node_put(bus);

							return r;

						}

					}

					of_node_put(bus);

					/*

					 * Register hwmod links specific to certain ES levels of a

									
										20

arch/arm/mm/dma-mapping.c
									
										View file
										
				@ -918,13 +918,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add

					__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);

				}

				/*

				 * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems

				 * that the intention is to allow exporting memory allocated via the

				 * coherent DMA APIs through the dma_buf API, which only accepts a

				 * scattertable.  This presents a couple of problems:

				 * 1. Not all memory allocated via the coherent DMA APIs is backed by

				 *    a struct page

				 * 2. Passing coherent DMA memory into the streaming APIs is not allowed

				 *    as we will try to flush the memory through a different alias to that

				 *    actually being used (and the flushes are redundant.)

				 */

				int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,

						 void *cpu_addr, dma_addr_t handle, size_t size,

						 struct dma_attrs *attrs)

				{

					struct page *page = pfn_to_page(dma_to_pfn(dev, handle));

					unsigned long pfn = dma_to_pfn(dev, handle);

					struct page *page;

					int ret;

					/* If the PFN is not valid, we do not have a struct page */

					if (!pfn_valid(pfn))

						return -ENXIO;

					page = pfn_to_page(pfn);

					ret = sg_alloc_table(sgt, 1, GFP_KERNEL);

					if (unlikely(ret))

						return ret;

									
										36

arch/arm/probes/kprobes/core.c
									
										View file
										
				@ -433,6 +433,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)

					struct hlist_node *tmp;

					unsigned long flags, orig_ret_address = 0;

					unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;

					kprobe_opcode_t *correct_ret_addr = NULL;

					INIT_HLIST_HEAD(&empty_rp);

					kretprobe_hash_lock(current, &head, &flags);

				@ -455,15 +456,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)

							/* another task is sharing our hash bucket */

							continue;

						if (ri->rp && ri->rp->handler) {

							__this_cpu_write(current_kprobe, &ri->rp->kp);

							get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;

							ri->rp->handler(ri, regs);

							__this_cpu_write(current_kprobe, NULL);

						}

						orig_ret_address = (unsigned long)ri->ret_addr;

						recycle_rp_inst(ri, &empty_rp);

						if (orig_ret_address != trampoline_address)

							/*

				@ -475,6 +468,33 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)

					}

					kretprobe_assert(ri, orig_ret_address, trampoline_address);

					correct_ret_addr = ri->ret_addr;

					hlist_for_each_entry_safe(ri, tmp, head, hlist) {

						if (ri->task != current)

							/* another task is sharing our hash bucket */

							continue;

						orig_ret_address = (unsigned long)ri->ret_addr;

						if (ri->rp && ri->rp->handler) {

							__this_cpu_write(current_kprobe, &ri->rp->kp);

							get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;

							ri->ret_addr = correct_ret_addr;

							ri->rp->handler(ri, regs);

							__this_cpu_write(current_kprobe, NULL);

						}

						recycle_rp_inst(ri, &empty_rp);

						if (orig_ret_address != trampoline_address)

							/*

							 * This is the real return address. Any other

							 * instances associated with this task are for

							 * other calls deeper on the call stack

							 */

							break;

					}

					kretprobe_hash_unlock(current, &flags);

					hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {

									
										11

arch/arm/probes/kprobes/test-core.c
									
										View file
										
				@ -976,7 +976,10 @@ static void coverage_end(void)

				void __naked __kprobes_test_case_start(void)

				{

					__asm__ __volatile__ (

						"stmdb	sp!, {r4-r11}				\n\t"

						"mov	r2, sp					\n\t"

						"bic	r3, r2, #7				\n\t"

						"mov	sp, r3					\n\t"

						"stmdb	sp!, {r2-r11}				\n\t"

						"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"

						"bic	r0, lr, #1  @ r0 = inline data		\n\t"

						"mov	r1, sp					\n\t"

				@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(void)

						"movne	pc, r0					\n\t"

						"mov	r0, r4					\n\t"

						"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"

						"ldmia	sp!, {r4-r11}				\n\t"

						"ldmia	sp!, {r2-r11}				\n\t"

						"mov	sp, r2					\n\t"

						"mov	pc, r0					\n\t"

					);

				}

				@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(void)

						"bxne	r0					\n\t"

						"mov	r0, r4					\n\t"

						"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"

						"ldmia	sp!, {r4-r11}				\n\t"

						"ldmia	sp!, {r2-r11}				\n\t"

						"mov	sp, r2					\n\t"

						"bx	r0					\n\t"

					);

				}

									
										2

arch/arm64/include/asm/exception.h
									
										View file
										
				@ -18,7 +18,7 @@

				#ifndef __ASM_EXCEPTION_H

				#define __ASM_EXCEPTION_H

				#include <linux/ftrace.h>

				#include <linux/interrupt.h>

				#define __exception	__attribute__((section(".exception.text")))

				#ifdef CONFIG_FUNCTION_GRAPH_TRACER

									
										3

arch/arm64/include/asm/kvm_arm.h
									
										View file
										
				@ -154,8 +154,7 @@

				#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)

				#endif

				#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)

				#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)

				#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)

				#define VTTBR_VMID_SHIFT  (UL(48))

				#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)

									
										5

arch/arm64/include/asm/memory.h
									
										View file
										
				@ -148,6 +148,11 @@ extern u64			kimage_vaddr;

				/* the offset between the kernel virtual and physical mappings */

				extern u64			kimage_voffset;

				static inline unsigned long kaslr_offset(void)

				{

					return kimage_vaddr - KIMAGE_VADDR;

				}

				/*

				 * Allow all memory at the discovery stage. We will clip it later.

				 */

									
										7

arch/arm64/include/asm/traps.h
									
										View file
										
				@ -34,7 +34,6 @@ struct undef_hook {

				void register_undef_hook(struct undef_hook *hook);

				void unregister_undef_hook(struct undef_hook *hook);

				#ifdef CONFIG_FUNCTION_GRAPH_TRACER

				static inline int __in_irqentry_text(unsigned long ptr)

				{

					extern char __irqentry_text_start[];

				@ -43,12 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)

					return ptr >= (unsigned long)&__irqentry_text_start &&

					       ptr < (unsigned long)&__irqentry_text_end;

				}

				#else

				static inline int __in_irqentry_text(unsigned long ptr)

				{

					return 0;

				}

				#endif

				static inline int in_exception_text(unsigned long ptr)

				{

									
										9

arch/arm64/kernel/process.c
									
										View file
										
				@ -326,6 +326,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,

					memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));

					/*

					 * In case p was allocated the same task_struct pointer as some

					 * other recently-exited task, make sure p is disassociated from

					 * any cpu that may have run that now-exited task recently.

					 * Otherwise we could erroneously skip reloading the FPSIMD

					 * registers for p.

					 */

					fpsimd_flush_task_state(p);

					if (likely(!(p->flags & PF_KTHREAD))) {

						*childregs = *current_pt_regs();

						childregs->regs[0] = 0;

									
										8

arch/arm64/kernel/setup.c
									
										View file
										
				@ -429,11 +429,11 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)

				static int dump_kernel_offset(struct notifier_block *self, unsigned long v,

							      void *p)

				{

					u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;

					const unsigned long offset = kaslr_offset();

					if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {

						pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",

							 kaslr_offset, KIMAGE_VADDR);

					if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) {

						pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",

							 offset, KIMAGE_VADDR);

					} else {

						pr_emerg("Kernel Offset: disabled\n");

					}

									
										2

arch/arm64/kernel/sleep.S
									
										View file
										
				@ -131,7 +131,7 @@ ENTRY(_cpu_resume)

				#ifdef CONFIG_KASAN

					mov	x0, sp

					bl	kasan_unpoison_remaining_stack

					bl	kasan_unpoison_task_stack_below

				#endif

					ldp	x19, x20, [x29, #16]

									
										1

arch/arm64/kernel/vmlinux.lds.S
									
										View file
										
				@ -120,6 +120,7 @@ SECTIONS

							__exception_text_end = .;

							IRQENTRY_TEXT

							ENTRY_TEXT

							SOFTIRQENTRY_TEXT

							TEXT_TEXT

							SCHED_TEXT

							LOCK_TEXT

									
										19

arch/arm64/kvm/handle_exit.c
									
										View file
										
				@ -122,7 +122,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)

					return ret;

				}

				static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)

				{

					u32 hsr = kvm_vcpu_get_hsr(vcpu);

					kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",

						      hsr, esr_get_class_string(hsr));

					kvm_inject_undefined(vcpu);

					return 1;

				}

				static exit_handle_fn arm_exit_handlers[] = {

					[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,

					[ESR_ELx_EC_WFx]	= kvm_handle_wfx,

					[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,

					[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,

				@ -148,13 +160,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)

					u32 hsr = kvm_vcpu_get_hsr(vcpu);

					u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;

					if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||

					    !arm_exit_handlers[hsr_ec]) {

						kvm_err("Unknown exception class: hsr: %#08x -- %s\n",

							hsr, esr_get_class_string(hsr));

						BUG();

					}

					return arm_exit_handlers[hsr_ec];

				}

									
										2

arch/arm64/mm/init.c
									
										View file
										
				@ -255,6 +255,7 @@ void __init arm64_memblock_init(void)

						arm64_dma_phys_limit = max_zone_dma_phys();

					else

						arm64_dma_phys_limit = PHYS_MASK + 1;

					high_memory = __va(memblock_end_of_DRAM() - 1) + 1;

					dma_contiguous_reserve(arm64_dma_phys_limit);

					memblock_allow_resize();

				@ -279,7 +280,6 @@ void __init bootmem_init(void)

					sparse_init();

					zone_sizes_init(min, max);

					high_memory = __va((max << PAGE_SHIFT) - 1) + 1;

					max_pfn = max_low_pfn = max;

				}

7

arch/blackfin/Kconfig

View file

 @ -318,11 +318,14 @@ config BF53x
 config GPIO_ADI
 	def_bool y
 	depends on !PINCTRL
 	depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561)
 config PINCTRL
 config PINCTRL_BLACKFIN_ADI2
 	def_bool y
 	depends on BF54x || BF60x
 	depends on (BF54x || BF60x)
 	select PINCTRL
 	select PINCTRL_ADI2
 config MEM_MT48LC64M4A2FB_7E
 	bool

1

arch/blackfin/Kconfig.debug

View file

 @ -17,6 +17,7 @@ config DEBUG_VERBOSE
 config DEBUG_MMRS
 	tristate "Generate Blackfin MMR tree"
 	depends on !PINCTRL
 	select DEBUG_FS
 	help
 	  Create a tree of Blackfin MMRs via the debugfs tree.  If

									
										1

arch/blackfin/kernel/vmlinux.lds.S
									
										View file
										
				@ -35,6 +35,7 @@ SECTIONS

				#endif

						LOCK_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						KPROBES_TEXT

				#ifdef CONFIG_ROMKERNEL

						__sinittext = .;

									
										1

arch/c6x/kernel/vmlinux.lds.S
									
										View file
										
				@ -72,6 +72,7 @@ SECTIONS

						SCHED_TEXT

						LOCK_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						KPROBES_TEXT

						*(.fixup)

						*(.gnu.warning)

									
										1

arch/metag/kernel/vmlinux.lds.S
									
										View file
										
				@ -24,6 +24,7 @@ SECTIONS

					LOCK_TEXT

					KPROBES_TEXT

					IRQENTRY_TEXT

					SOFTIRQENTRY_TEXT

					*(.text.*)

					*(.gnu.warning)

					}

									
										1

arch/microblaze/kernel/vmlinux.lds.S
									
										View file
										
				@ -36,6 +36,7 @@ SECTIONS {

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						. = ALIGN (4) ;

						_etext = . ;

					}

									
										1

arch/mips/kernel/vmlinux.lds.S
									
										View file
										
				@ -58,6 +58,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						*(.text.*)

						*(.fixup)

						*(.gnu.warning)

									
										28

arch/mips/math-emu/cp1emu.c
									
										View file
										
				@ -1777,7 +1777,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(fs, MIPSInst_FS(ir));

							SPFROMREG(fd, MIPSInst_FD(ir));

							rv.s = ieee754sp_maddf(fd, fs, ft);

							break;

							goto copcsr;

						}

						case fmsubf_op: {

				@ -1790,7 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(fs, MIPSInst_FS(ir));

							SPFROMREG(fd, MIPSInst_FD(ir));

							rv.s = ieee754sp_msubf(fd, fs, ft);

							break;

							goto copcsr;

						}

						case frint_op: {

				@ -1814,7 +1814,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(fs, MIPSInst_FS(ir));

							rv.w = ieee754sp_2008class(fs);

							rfmt = w_fmt;

							break;

							goto copcsr;

						}

						case fmin_op: {

				@ -1826,7 +1826,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(ft, MIPSInst_FT(ir));

							SPFROMREG(fs, MIPSInst_FS(ir));

							rv.s = ieee754sp_fmin(fs, ft);

							break;

							goto copcsr;

						}

						case fmina_op: {

				@ -1838,7 +1838,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(ft, MIPSInst_FT(ir));

							SPFROMREG(fs, MIPSInst_FS(ir));

							rv.s = ieee754sp_fmina(fs, ft);

							break;

							goto copcsr;

						}

						case fmax_op: {

				@ -1850,7 +1850,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(ft, MIPSInst_FT(ir));

							SPFROMREG(fs, MIPSInst_FS(ir));

							rv.s = ieee754sp_fmax(fs, ft);

							break;

							goto copcsr;

						}

						case fmaxa_op: {

				@ -1862,7 +1862,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,

							SPFROMREG(ft, MIPSInst_FT(ir));

							SPFROMREG(fs, MIPSInst_FS(ir));

							rv.s = ieee754sp_fmaxa(fs, ft);

							break;

							goto copcsr;

						}

						case fabs_op:

				@ -2095,7 +2095,7 @@ copcsr:

							DPFROMREG(fs, MIPSInst_FS(ir));

							DPFROMREG(fd, MIPSInst_FD(ir));

							rv.d = ieee754dp_maddf(fd, fs, ft);

							break;

							goto copcsr;

						}

						case fmsubf_op: {

				@ -2108,7 +2108,7 @@ copcsr:

							DPFROMREG(fs, MIPSInst_FS(ir));

							DPFROMREG(fd, MIPSInst_FD(ir));

							rv.d = ieee754dp_msubf(fd, fs, ft);

							break;

							goto copcsr;

						}

						case frint_op: {

				@ -2132,7 +2132,7 @@ copcsr:

							DPFROMREG(fs, MIPSInst_FS(ir));

							rv.w = ieee754dp_2008class(fs);

							rfmt = w_fmt;

							break;

							goto copcsr;

						}

						case fmin_op: {

				@ -2144,7 +2144,7 @@ copcsr:

							DPFROMREG(ft, MIPSInst_FT(ir));

							DPFROMREG(fs, MIPSInst_FS(ir));

							rv.d = ieee754dp_fmin(fs, ft);

							break;

							goto copcsr;

						}

						case fmina_op: {

				@ -2156,7 +2156,7 @@ copcsr:

							DPFROMREG(ft, MIPSInst_FT(ir));

							DPFROMREG(fs, MIPSInst_FS(ir));

							rv.d = ieee754dp_fmina(fs, ft);

							break;

							goto copcsr;

						}

						case fmax_op: {

				@ -2168,7 +2168,7 @@ copcsr:

							DPFROMREG(ft, MIPSInst_FT(ir));

							DPFROMREG(fs, MIPSInst_FS(ir));

							rv.d = ieee754dp_fmax(fs, ft);

							break;

							goto copcsr;

						}

						case fmaxa_op: {

				@ -2180,7 +2180,7 @@ copcsr:

							DPFROMREG(ft, MIPSInst_FT(ir));

							DPFROMREG(fs, MIPSInst_FS(ir));

							rv.d = ieee754dp_fmaxa(fs, ft);

							break;

							goto copcsr;

						}

						case fabs_op:

									
										1

arch/nios2/kernel/vmlinux.lds.S
									
										View file
										
				@ -39,6 +39,7 @@ SECTIONS

						SCHED_TEXT

						LOCK_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						KPROBES_TEXT

					} =0

					_etext = .;

									
										2

arch/openrisc/include/asm/uaccess.h
									
										View file
										
				@ -215,7 +215,7 @@ do {									\

					case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;		\

					case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;		\

					case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;		\

					case 8: __get_user_asm2(x, ptr, retval);			\

					case 8: __get_user_asm2(x, ptr, retval); break;			\

					default: (x) = __get_user_bad();				\

					}								\

				} while (0)

									
										1

arch/openrisc/kernel/vmlinux.lds.S
									
										View file
										
				@ -52,6 +52,7 @@ SECTIONS

					  LOCK_TEXT

					  KPROBES_TEXT

					  IRQENTRY_TEXT

					  SOFTIRQENTRY_TEXT

					  *(.fixup)

					  *(.text.__*)

					  _etext = .;

									
										1

arch/parisc/kernel/vmlinux.lds.S
									
										View file
										
				@ -72,6 +72,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						*(.text.do_softirq)

						*(.text.sys_exit)

						*(.text.do_sigaltstack)

									
										1

arch/powerpc/kernel/vmlinux.lds.S
									
										View file
										
				@ -55,6 +55,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

				#ifdef CONFIG_PPC32

						*(.got1)

									
										8

arch/powerpc/perf/core-book3s.c
									
										View file
										
				@ -401,8 +401,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)

					int ret;

					__u64 target;

					if (is_kernel_addr(addr))

						return branch_target((unsigned int *)addr);

					if (is_kernel_addr(addr)) {

						if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))

							return 0;

						return branch_target(&instr);

					}

					/* Userspace: need copy instruction here then translate it */

					pagefault_disable();

									
										2

arch/powerpc/perf/hv-24x7.c
									
										View file
										
				@ -514,7 +514,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)

				{

					if (s1 < s2)

						return 1;

					if (s2 > s1)

					if (s1 > s2)

						return -1;

					return memcmp(d1, d2, s1);

									
										6

arch/powerpc/platforms/powernv/opal-async.c
									
										View file
										
				@ -39,18 +39,18 @@ int __opal_async_get_token(void)

					int token;

					spin_lock_irqsave(&opal_async_comp_lock, flags);

					token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);

					token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens);

					if (token >= opal_max_async_tokens) {

						token = -EBUSY;

						goto out;

					}

					if (__test_and_set_bit(token, opal_async_token_map)) {

					if (!__test_and_clear_bit(token, opal_async_complete_map)) {

						token = -EBUSY;

						goto out;

					}

					__clear_bit(token, opal_async_complete_map);

					__set_bit(token, opal_async_token_map);

				out:

					spin_unlock_irqrestore(&opal_async_comp_lock, flags);

									
										3

arch/powerpc/platforms/powernv/pci-ioda.c
									
										View file
										
				@ -2270,6 +2270,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,

					level_shift = entries_shift + 3;

					level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);

					if ((level_shift - 3) * levels + page_shift >= 60)

						return -EINVAL;

					/* Allocate TCE table */

					addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,

							levels, tce_table_size, &offset, &total_allocated);

									
										2

arch/powerpc/platforms/powernv/setup.c
									
										View file
										
				@ -295,7 +295,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)

				{

					unsigned long ret_freq;

					ret_freq = cpufreq_quick_get(cpu) * 1000ul;

					ret_freq = cpufreq_get(cpu) * 1000ul;

					/*

					 * If the backend cpufreq driver does not exist,

									
										5

arch/powerpc/sysdev/axonram.c
									
										View file
										
				@ -276,7 +276,9 @@ failed:

							if (bank->disk->major > 0)

								unregister_blkdev(bank->disk->major,

										bank->disk->disk_name);

							del_gendisk(bank->disk);

							if (bank->disk->flags & GENHD_FL_UP)

								del_gendisk(bank->disk);

							put_disk(bank->disk);

						}

						device->dev.platform_data = NULL;

						if (bank->io_addr != 0)

				@ -301,6 +303,7 @@ axon_ram_remove(struct platform_device *device)

					device_remove_file(&device->dev, &dev_attr_ecc);

					free_irq(bank->irq_id, device);

					del_gendisk(bank->disk);

					put_disk(bank->disk);

					iounmap((void __iomem *) bank->io_addr);

					kfree(bank);

									
										4

arch/powerpc/sysdev/ipic.c
									
										View file
										
				@ -845,12 +845,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)

				u32 ipic_get_mcp_status(void)

				{

					return ipic_read(primary_ipic->regs, IPIC_SERMR);

					return ipic_read(primary_ipic->regs, IPIC_SERSR);

				}

				void ipic_clear_mcp_status(u32 mask)

				{

					ipic_write(primary_ipic->regs, IPIC_SERMR, mask);

					ipic_write(primary_ipic->regs, IPIC_SERSR, mask);

				}

				/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */

									
										8

arch/s390/include/asm/asm-prototypes.h
									
										View file
									
				@ -1,8 +0,0 @@

				#ifndef _ASM_S390_PROTOTYPES_H

				#include <linux/kvm_host.h>

				#include <linux/ftrace.h>

				#include <asm/fpu/api.h>

				#include <asm-generic/asm-prototypes.h>

				#endif /* _ASM_S390_PROTOTYPES_H */

									
										19

arch/s390/include/asm/switch_to.h
									
										View file
										
				@ -29,17 +29,16 @@ static inline void restore_access_regs(unsigned int *acrs)

				}

				#define switch_to(prev,next,last) do {					\

					if (prev->mm) {							\

						save_fpu_regs();					\

						save_access_regs(&prev->thread.acrs[0]);		\

						save_ri_cb(prev->thread.ri_cb);				\

					}								\

					/* save_fpu_regs() sets the CIF_FPU flag, which enforces	\

					 * a restore of the floating point / vector registers as	\

					 * soon as the next task returns to user space			\

					 */								\

					save_fpu_regs();						\

					save_access_regs(&prev->thread.acrs[0]);			\

					save_ri_cb(prev->thread.ri_cb);					\

					update_cr_regs(next);						\

					if (next->mm) {							\

						set_cpu_flag(CIF_FPU);					\

						restore_access_regs(&next->thread.acrs[0]);		\

						restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\

					}								\

					restore_access_regs(&next->thread.acrs[0]);			\

					restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);		\

					prev = __switch_to(prev,next);					\

				} while (0)

									
										6

arch/s390/kernel/syscalls.S
									
										View file
										
				@ -369,10 +369,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)

				SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)

				SYSCALL(sys_socket,sys_socket)

				SYSCALL(sys_socketpair,compat_sys_socketpair)		/* 360 */

				SYSCALL(sys_bind,sys_bind)

				SYSCALL(sys_connect,sys_connect)

				SYSCALL(sys_bind,compat_sys_bind)

				SYSCALL(sys_connect,compat_sys_connect)

				SYSCALL(sys_listen,sys_listen)

				SYSCALL(sys_accept4,sys_accept4)

				SYSCALL(sys_accept4,compat_sys_accept4)

				SYSCALL(sys_getsockopt,compat_sys_getsockopt)		/* 365 */

				SYSCALL(sys_setsockopt,compat_sys_setsockopt)

				SYSCALL(sys_getsockname,compat_sys_getsockname)

									
										1

arch/s390/kernel/vmlinux.lds.S
									
										View file
										
				@ -28,6 +28,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						*(.fixup)

						*(.gnu.warning)

					} :text = 0x0700

									
										1

arch/sh/kernel/vmlinux.lds.S
									
										View file
										
				@ -39,6 +39,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						*(.fixup)

						*(.gnu.warning)

						_etext = .;		/* End of text section */

									
										1

arch/sparc/kernel/vmlinux.lds.S
									
										View file
										
				@ -52,6 +52,7 @@ SECTIONS

						LOCK_TEXT

						KPROBES_TEXT

						IRQENTRY_TEXT

						SOFTIRQENTRY_TEXT

						*(.gnu.warning)

					} = 0

					_etext = .;

									
										9

arch/sparc/mm/init_64.c
									
										View file
										
				@ -2402,9 +2402,16 @@ void __init mem_init(void)

				{

					high_memory = __va(last_valid_pfn << PAGE_SHIFT);

					register_page_bootmem_info();

					free_all_bootmem();

					/*

					 * Must be done after boot memory is put on freelist, because here we

					 * might set fields in deferred struct pages that have not yet been

					 * initialized, and free_all_bootmem() initializes all the reserved

					 * deferred pages for us.

					 */

					register_page_bootmem_info();

					/*

					 * Set up the zero page, mark it reserved, so that page count

					 * is not manipulated when freeing the page from user ptes.

									
										1

arch/tile/kernel/vmlinux.lds.S
									
										View file
										
				@ -45,6 +45,7 @@ SECTIONS

				    LOCK_TEXT

				    KPROBES_TEXT

				    IRQENTRY_TEXT

				    SOFTIRQENTRY_TEXT

				    __fix_text_end = .;   /* tile-cpack won't rearrange before this */

				    ALIGN_FUNCTION();

				    *(.hottext*)

3

arch/x86/Kconfig

View file

 @ -27,6 +27,7 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 @ -43,7 +44,7 @@ config X86
 	select ARCH_USE_CMPXCHG_LOCKREF		if X86_64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32

									
										7

arch/x86/boot/Makefile
									
										View file
										
				@ -11,6 +11,13 @@

				KASAN_SANITIZE := n

				# Kernel does not boot with kcov instrumentation here.

				# One of the problems observed was insertion of __sanitizer_cov_trace_pc()

				# callback into middle of per-cpu data enabling code. Thus the callback observed

				# inconsistent state and crashed. We are interested mostly in syscall coverage,

				# so boot code is not interesting anyway.

				KCOV_INSTRUMENT		:= n

				# If you want to preset the SVGA mode, uncomment the next line and

				# set SVGA_MODE to whatever number you want.

				# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.

									
										3

arch/x86/boot/compressed/Makefile
									
										View file
										
				@ -18,6 +18,9 @@

				KASAN_SANITIZE := n

				# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.

				KCOV_INSTRUMENT		:= n

				targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \

					vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4

									
										1

arch/x86/boot/compressed/misc.h
									
										View file
										
				@ -9,6 +9,7 @@

				 */

				#undef CONFIG_PARAVIRT

				#undef CONFIG_PARAVIRT_SPINLOCKS

				#undef CONFIG_PAGE_TABLE_ISOLATION

				#undef CONFIG_KASAN

				#include <linux/linkage.h>

									
										7

arch/x86/crypto/salsa20_glue.c
									
										View file
										
				@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,

					salsa20_ivsetup(ctx, walk.iv);

					if (likely(walk.nbytes == nbytes))

					{

						salsa20_encrypt_bytes(ctx, walk.src.virt.addr,

								      walk.dst.virt.addr, nbytes);

						return blkcipher_walk_done(desc, &walk, 0);

					}

					while (walk.nbytes >= 64) {

						salsa20_encrypt_bytes(ctx, walk.src.virt.addr,

								      walk.dst.virt.addr,

									
										174

arch/x86/entry/entry_64.S
									
										View file
										
				@ -35,6 +35,7 @@

				#include <asm/asm.h>

				#include <asm/smap.h>

				#include <asm/pgtable_types.h>

				#include <asm/kaiser.h>

				#include <linux/err.h>

				/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */

				@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)

					 * it is too small to ever cause noticeable irq latency.

					 */

					SWAPGS_UNSAFE_STACK

					SWITCH_KERNEL_CR3_NO_STACK

					/*

					 * A hypervisor implementation might want to use a label

					 * after the swapgs, so that it can do the swapgs

				@ -207,9 +209,17 @@ entry_SYSCALL_64_fastpath:

					testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)

					jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */

					RESTORE_C_REGS_EXCEPT_RCX_R11

					movq	RIP(%rsp), %rcx

					movq	EFLAGS(%rsp), %r11

					RESTORE_C_REGS_EXCEPT_RCX_R11

					/*

					 * This opens a window where we have a user CR3, but are

					 * running in the kernel.  This makes using the CS

					 * register useless for telling whether or not we need to

					 * switch CR3 in NMIs.  Normal interrupts are OK because

					 * they are off here.

					 */

					SWITCH_USER_CR3

					movq	RSP(%rsp), %rsp

					/*

					 * 64-bit SYSRET restores rip from rcx,

				@ -347,10 +357,26 @@ GLOBAL(int_ret_from_sys_call)

				syscall_return_via_sysret:

					/* rcx and r11 are already restored (see code above) */

					RESTORE_C_REGS_EXCEPT_RCX_R11

					/*

					 * This opens a window where we have a user CR3, but are

					 * running in the kernel.  This makes using the CS

					 * register useless for telling whether or not we need to

					 * switch CR3 in NMIs.  Normal interrupts are OK because

					 * they are off here.

					 */

					SWITCH_USER_CR3

					movq	RSP(%rsp), %rsp

					USERGS_SYSRET64

				opportunistic_sysret_failed:

					/*

					 * This opens a window where we have a user CR3, but are

					 * running in the kernel.  This makes using the CS

					 * register useless for telling whether or not we need to

					 * switch CR3 in NMIs.  Normal interrupts are OK because

					 * they are off here.

					 */

					SWITCH_USER_CR3

					SWAPGS

					jmp	restore_c_regs_and_iret

				END(entry_SYSCALL_64)

				@ -509,6 +535,7 @@ END(irq_entries_start)

					 * tracking that we're in kernel mode.

					 */

					SWAPGS

					SWITCH_KERNEL_CR3

					/*

					 * We need to tell lockdep that IRQs are off.  We can't do this until

				@ -568,6 +595,7 @@ GLOBAL(retint_user)

					mov	%rsp,%rdi

					call	prepare_exit_to_usermode

					TRACE_IRQS_IRETQ

					SWITCH_USER_CR3

					SWAPGS

					jmp	restore_regs_and_iret

				@ -625,6 +653,7 @@ native_irq_return_ldt:

					pushq	%rax

					pushq	%rdi

					SWAPGS

					SWITCH_KERNEL_CR3

					movq	PER_CPU_VAR(espfix_waddr), %rdi

					movq	%rax, (0*8)(%rdi)		/* RAX */

					movq	(2*8)(%rsp), %rax		/* RIP */

				@ -640,6 +669,7 @@ native_irq_return_ldt:

					andl	$0xffff0000, %eax

					popq	%rdi

					orq	PER_CPU_VAR(espfix_stack), %rax

					SWITCH_USER_CR3

					SWAPGS

					movq	%rax, %rsp

					popq	%rax

				@ -672,9 +702,15 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)

				.endm

				#endif

				/* Make sure APIC interrupt handlers end up in the irqentry section: */

				#define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"

				#define POP_SECTION_IRQENTRY	.popsection

				.macro apicinterrupt num sym do_sym

				PUSH_SECTION_IRQENTRY

				apicinterrupt3 \num \sym \do_sym

				trace_apicinterrupt \num \sym

				POP_SECTION_IRQENTRY

				.endm

				#ifdef CONFIG_SMP

				@ -995,7 +1031,11 @@ idtentry machine_check					has_error_code=0	paranoid=1 do_sym=*machine_check_vec

				/*

				 * Save all registers in pt_regs, and switch gs if needed.

				 * Use slow, but surefire "are we in kernel?" check.

				 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise

				 *

				 * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit

				 *         ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit

				 *         ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit

				 *         ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit

				 */

				ENTRY(paranoid_entry)

					cld

				@ -1008,7 +1048,26 @@ ENTRY(paranoid_entry)

					js	1f				/* negative -> in kernel */

					SWAPGS

					xorl	%ebx, %ebx

				1:	ret

				1:

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/*

					 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3

					 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.

					 * Do a conditional SWITCH_KERNEL_CR3: this could safely be done

					 * unconditionally, but we need to find out whether the reverse

					 * should be done on return (conveyed to paranoid_exit in %ebx).

					 */

					ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER

					testl	$KAISER_SHADOW_PGD_OFFSET, %eax

					jz	2f

					orl	$2, %ebx

					andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax

					/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */

					ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID

					movq	%rax, %cr3

				2:

				#endif

					ret

				END(paranoid_entry)

				/*

				@ -1021,19 +1080,26 @@ END(paranoid_entry)

				 * be complicated.  Fortunately, we there's no good reason

				 * to try to handle preemption here.

				 *

				 * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)

				 * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3

				 *           ebx=1: needs neither swapgs nor SWITCH_USER_CR3

				 *           ebx=2: needs both swapgs and SWITCH_USER_CR3

				 *           ebx=3: needs SWITCH_USER_CR3 but not swapgs

				 */

				ENTRY(paranoid_exit)

					DISABLE_INTERRUPTS(CLBR_NONE)

					TRACE_IRQS_OFF_DEBUG

					testl	%ebx, %ebx			/* swapgs needed? */

					jnz	paranoid_exit_no_swapgs

					TRACE_IRQS_IRETQ

					SWAPGS_UNSAFE_STACK

					jmp	paranoid_exit_restore

				paranoid_exit_no_swapgs:

					TRACE_IRQS_IRETQ_DEBUG

				paranoid_exit_restore:

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */

					testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */

					jz	paranoid_exit_no_switch

					SWITCH_USER_CR3

				paranoid_exit_no_switch:

				#endif

					testl	$1, %ebx			/* swapgs needed? */

					jnz	paranoid_exit_no_swapgs

					SWAPGS_UNSAFE_STACK

				paranoid_exit_no_swapgs:

					RESTORE_EXTRA_REGS

					RESTORE_C_REGS

					REMOVE_PT_GPREGS_FROM_STACK 8

				@ -1048,6 +1114,13 @@ ENTRY(error_entry)

					cld

					SAVE_C_REGS 8

					SAVE_EXTRA_REGS 8

					/*

					 * error_entry() always returns with a kernel gsbase and

					 * CR3.  We must also have a kernel CR3/gsbase before

					 * calling TRACE_IRQS_*.  Just unconditionally switch to

					 * the kernel CR3 here.

					 */

					SWITCH_KERNEL_CR3

					xorl	%ebx, %ebx

					testb	$3, CS+8(%rsp)

					jz	.Lerror_kernelspace

				@ -1210,6 +1283,10 @@ ENTRY(nmi)

					 */

					SWAPGS_UNSAFE_STACK

					/*

					 * percpu variables are mapped with user CR3, so no need

					 * to switch CR3 here.

					 */

					cld

					movq	%rsp, %rdx

					movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

				@ -1243,12 +1320,34 @@ ENTRY(nmi)

					movq	%rsp, %rdi

					movq	$-1, %rsi

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/* Unconditionally use kernel CR3 for do_nmi() */

					/* %rax is saved above, so OK to clobber here */

					ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER

					/* If PCID enabled, NOFLUSH now and NOFLUSH on return */

					ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID

					pushq	%rax

					/* mask off "user" bit of pgd address and 12 PCID bits: */

					andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax

					movq	%rax, %cr3

				2:

				#endif

					call	do_nmi

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/*

					 * Unconditionally restore CR3.  I know we return to

					 * kernel code that needs user CR3, but do we ever return

					 * to "user mode" where we need the kernel CR3?

					 */

					ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER

				#endif

					/*

					 * Return back to user mode.  We must *not* do the normal exit

					 * work, because we don't want to enable interrupts.  Fortunately,

					 * do_nmi doesn't modify pt_regs.

					 * work, because we don't want to enable interrupts.  Do not

					 * switch to user CR3: we might be going back to kernel code

					 * that had a user CR3 set.

					 */

					SWAPGS

					jmp	restore_c_regs_and_iret

				@ -1445,22 +1544,55 @@ end_repeat_nmi:

					ALLOC_PT_GPREGS_ON_STACK

					/*

					 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit

					 * as we should not be calling schedule in NMI context.

					 * Even with normal interrupts enabled. An NMI should not be

					 * setting NEED_RESCHED or anything that normal interrupts and

					 * exceptions might do.

					 * Use the same approach as paranoid_entry to handle SWAPGS, but

					 * without CR3 handling since we do that differently in NMIs.  No

					 * need to use paranoid_exit as we should not be calling schedule

					 * in NMI context.  Even with normal interrupts enabled. An NMI

					 * should not be setting NEED_RESCHED or anything that normal

					 * interrupts and exceptions might do.

					 */

					call	paranoid_entry

					/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */

					cld

					SAVE_C_REGS

					SAVE_EXTRA_REGS

					movl	$1, %ebx

					movl	$MSR_GS_BASE, %ecx

					rdmsr

					testl	%edx, %edx

					js	1f				/* negative -> in kernel */

					SWAPGS

					xorl	%ebx, %ebx

				1:

					movq	%rsp, %rdi

					movq	$-1, %rsi

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/* Unconditionally use kernel CR3 for do_nmi() */

					/* %rax is saved above, so OK to clobber here */

					ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER

					/* If PCID enabled, NOFLUSH now and NOFLUSH on return */

					ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID

					pushq	%rax

					/* mask off "user" bit of pgd address and 12 PCID bits: */

					andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax

					movq	%rax, %cr3

				2:

				#endif

					/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */

					call	do_nmi

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					/*

					 * Unconditionally restore CR3.  We might be returning to

					 * kernel code that needs user CR3, like just just before

					 * a sysret.

					 */

					ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER

				#endif

					testl	%ebx, %ebx			/* swapgs needed? */

					jnz	nmi_restore

				nmi_swapgs:

					/* We fixed up CR3 above, so no need to switch it here */

					SWAPGS_UNSAFE_STACK

				nmi_restore:

					RESTORE_EXTRA_REGS

									
										7

arch/x86/entry/entry_64_compat.S
									
										View file
										
				@ -13,6 +13,8 @@

				#include <asm/irqflags.h>

				#include <asm/asm.h>

				#include <asm/smap.h>

				#include <asm/pgtable_types.h>

				#include <asm/kaiser.h>

				#include <linux/linkage.h>

				#include <linux/err.h>

				@ -50,6 +52,7 @@ ENDPROC(native_usergs_sysret32)

				ENTRY(entry_SYSENTER_compat)

					/* Interrupts are off on entry. */

					SWAPGS_UNSAFE_STACK

					SWITCH_KERNEL_CR3_NO_STACK

					movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

					/*

				@ -161,6 +164,7 @@ ENDPROC(entry_SYSENTER_compat)

				ENTRY(entry_SYSCALL_compat)

					/* Interrupts are off on entry. */

					SWAPGS_UNSAFE_STACK

					SWITCH_KERNEL_CR3_NO_STACK

					/* Stash user ESP and switch to the kernel stack. */

					movl	%esp, %r8d

				@ -208,6 +212,7 @@ ENTRY(entry_SYSCALL_compat)

					/* Opportunistic SYSRET */

				sysret32_from_system_call:

					TRACE_IRQS_ON			/* User mode traces as IRQs on. */

					SWITCH_USER_CR3

					movq	RBX(%rsp), %rbx		/* pt_regs->rbx */

					movq	RBP(%rsp), %rbp		/* pt_regs->rbp */

					movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */

				@ -269,6 +274,7 @@ ENTRY(entry_INT80_compat)

					PARAVIRT_ADJUST_EXCEPTION_FRAME

					ASM_CLAC			/* Do this early to minimize exposure */

					SWAPGS

					SWITCH_KERNEL_CR3_NO_STACK

					/*

					 * User tracing code (ptrace or signal handlers) might assume that

				@ -311,6 +317,7 @@ ENTRY(entry_INT80_compat)

					/* Go back to user mode. */

					TRACE_IRQS_ON

					SWITCH_USER_CR3

					SWAPGS

					jmp	restore_regs_and_iret

				END(entry_INT80_compat)

									
										3

arch/x86/entry/vdso/Makefile
									
										View file
										
				@ -5,6 +5,9 @@

				KBUILD_CFLAGS += $(DISABLE_LTO)

				KASAN_SANITIZE := n

				# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.

				KCOV_INSTRUMENT		:= n

				VDSO64-$(CONFIG_X86_64)		:= y

				VDSOX32-$(CONFIG_X86_X32_ABI)	:= y

				VDSO32-$(CONFIG_X86_32)		:= y

									
										97

arch/x86/entry/vdso/vclock_gettime.c
									
										View file
										
				@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)

				}

				#endif

				#ifdef CONFIG_PARAVIRT_CLOCK

				extern u8 pvclock_page

					__attribute__((visibility("hidden")));

				#endif

				#ifndef BUILD_VDSO32

				#include <linux/kernel.h>

				@ -62,63 +67,65 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)

				#ifdef CONFIG_PARAVIRT_CLOCK

				static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)

				static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)

				{

					const struct pvclock_vsyscall_time_info *pvti_base;

					int idx = cpu / (PAGE_SIZE/PVTI_SIZE);

					int offset = cpu % (PAGE_SIZE/PVTI_SIZE);

					BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);

					pvti_base = (struct pvclock_vsyscall_time_info *)

						    __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);

					return &pvti_base[offset];

					return (const struct pvclock_vsyscall_time_info *)&pvclock_page;

				}

				static notrace cycle_t vread_pvclock(int *mode)

				{

					const struct pvclock_vsyscall_time_info *pvti;

					const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;

					cycle_t ret;

					u64 last;

					u32 version;

					u8 flags;

					unsigned cpu, cpu1;

					u64 tsc, pvti_tsc;

					u64 last, delta, pvti_system_time;

					u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;

					/*

					 * Note: hypervisor must guarantee that:

					 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.

					 * 2. that per-CPU pvclock time info is updated if the

					 *    underlying CPU changes.

					 * 3. that version is increased whenever underlying CPU

					 *    changes.

					 * Note: The kernel and hypervisor must guarantee that cpu ID

					 * number maps 1:1 to per-CPU pvclock time info.

					 *

					 * Because the hypervisor is entirely unaware of guest userspace

					 * preemption, it cannot guarantee that per-CPU pvclock time

					 * info is updated if the underlying CPU changes or that that

					 * version is increased whenever underlying CPU changes.

					 *

					 * On KVM, we are guaranteed that pvti updates for any vCPU are

					 * atomic as seen by *all* vCPUs.  This is an even stronger

					 * guarantee than we get with a normal seqlock.

					 *

					 * On Xen, we don't appear to have that guarantee, but Xen still

					 * supplies a valid seqlock using the version field.

					 * We only do pvclock vdso timing at all if

					 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to

					 * mean that all vCPUs have matching pvti and that the TSC is

					 * synced, so we can just look at vCPU 0's pvti.

					 */

					do {

						cpu = __getcpu() & VGETCPU_CPU_MASK;

						/* TODO: We can put vcpu id into higher bits of pvti.version.

						 * This will save a couple of cycles by getting rid of

						 * __getcpu() calls (Gleb).

						 */

						pvti = get_pvti(cpu);

						version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);

						/*

						 * Test we're still on the cpu as well as the version.

						 * We could have been migrated just after the first

						 * vgetcpu but before fetching the version, so we

						 * wouldn't notice a version change.

						 */

						cpu1 = __getcpu() & VGETCPU_CPU_MASK;

					} while (unlikely(cpu != cpu1 ||

							  (pvti->pvti.version & 1) ||

							  pvti->pvti.version != version));

					if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))

					if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {

						*mode = VCLOCK_NONE;

						return 0;

					}

					do {

						version = pvti->version;

						/* This is also a read barrier, so we'll read version first. */

						tsc = rdtsc_ordered();

						pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;

						pvti_tsc_shift = pvti->tsc_shift;

						pvti_system_time = pvti->system_time;

						pvti_tsc = pvti->tsc_timestamp;

						/* Make sure that the version double-check is last. */

						smp_rmb();

					} while (unlikely((version & 1) || version != pvti->version));

					delta = tsc - pvti_tsc;

					ret = pvti_system_time +

						pvclock_scale_delta(delta, pvti_tsc_to_system_mul,

								    pvti_tsc_shift);

					/* refer to tsc.c read_tsc() comment for rationale */

					last = gtod->cycle_last;

									
										3

arch/x86/entry/vdso/vdso-layout.lds.S
									
										View file
										
				@ -25,7 +25,7 @@ SECTIONS

					 * segment.

					 */

					vvar_start = . - 2 * PAGE_SIZE;

					vvar_start = . - 3 * PAGE_SIZE;

					vvar_page = vvar_start;

					/* Place all vvars at the offsets in asm/vvar.h. */

				@ -36,6 +36,7 @@ SECTIONS

				#undef EMIT_VVAR

					hpet_page = vvar_start + PAGE_SIZE;

					pvclock_page = vvar_start + 2 * PAGE_SIZE;

					. = SIZEOF_HEADERS;

									
										3

arch/x86/entry/vdso/vdso2c.c
									
										View file
										
				@ -73,6 +73,7 @@ enum {

					sym_vvar_start,

					sym_vvar_page,

					sym_hpet_page,

					sym_pvclock_page,

					sym_VDSO_FAKE_SECTION_TABLE_START,

					sym_VDSO_FAKE_SECTION_TABLE_END,

				};

				@ -80,6 +81,7 @@ enum {

				const int special_pages[] = {

					sym_vvar_page,

					sym_hpet_page,

					sym_pvclock_page,

				};

				struct vdso_sym {

				@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {

					[sym_vvar_start] = {"vvar_start", true},

					[sym_vvar_page] = {"vvar_page", true},

					[sym_hpet_page] = {"hpet_page", true},

					[sym_pvclock_page] = {"pvclock_page", true},

					[sym_VDSO_FAKE_SECTION_TABLE_START] = {

						"VDSO_FAKE_SECTION_TABLE_START", false

					},

									
										13

arch/x86/entry/vdso/vma.c
									
										View file
										
				@ -100,6 +100,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)

						.name = "[vvar]",

						.pages = no_pages,

					};

					struct pvclock_vsyscall_time_info *pvti;

					if (calculate_addr) {

						addr = vdso_addr(current->mm->start_stack,

				@ -169,6 +170,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)

					}

				#endif

					pvti = pvclock_pvti_cpu0_va();

					if (pvti && image->sym_pvclock_page) {

						ret = remap_pfn_range(vma,

								      text_start + image->sym_pvclock_page,

								      __pa(pvti) >> PAGE_SHIFT,

								      PAGE_SIZE,

								      PAGE_READONLY);

						if (ret)

							goto up_fail;

					}

				up_fail:

					if (ret)

						current->mm->context.vdso = NULL;

									
										2

arch/x86/include/asm/cmdline.h
									
										View file
										
				@ -2,5 +2,7 @@

				#define _ASM_X86_CMDLINE_H

				int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);

				int cmdline_find_option(const char *cmdline_ptr, const char *option,

							char *buffer, int bufsize);

				#endif /* _ASM_X86_CMDLINE_H */

									
										4

arch/x86/include/asm/cpufeature.h
									
										View file
										
				@ -187,6 +187,7 @@

				#define X86_FEATURE_ARAT	( 7*32+ 1) /* Always Running APIC Timer */

				#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */

				#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */

				#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */

				#define X86_FEATURE_PLN		( 7*32+ 5) /* Intel Power Limit Notification */

				#define X86_FEATURE_PTS		( 7*32+ 6) /* Intel Package Thermal Status */

				#define X86_FEATURE_DTHERM	( 7*32+ 7) /* Digital Thermal Sensor */

				@ -199,6 +200,9 @@

				#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */

				#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */

				/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */

				#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */

				/* Virtualization flags: Linux defined, word 8 */

				#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */

				#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */

									
										2

arch/x86/include/asm/desc.h
									
										View file
										
				@ -43,7 +43,7 @@ struct gdt_page {

					struct desc_struct gdt[GDT_ENTRIES];

				} __attribute__((aligned(PAGE_SIZE)));

				DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);

				DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);

				static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)

				{

									
										4

arch/x86/include/asm/disabled-features.h
									
										View file
										
				@ -21,11 +21,13 @@

				# define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))

				# define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))

				# define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))

				# define DISABLE_PCID		0

				#else

				# define DISABLE_VME		0

				# define DISABLE_K6_MTRR	0

				# define DISABLE_CYRIX_ARR	0

				# define DISABLE_CENTAUR_MCR	0

				# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))

				#endif /* CONFIG_X86_64 */

				/*

				@ -35,7 +37,7 @@

				#define DISABLED_MASK1	0

				#define DISABLED_MASK2	0

				#define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)

				#define DISABLED_MASK4	0

				#define DISABLED_MASK4	(DISABLE_PCID)

				#define DISABLED_MASK5	0

				#define DISABLED_MASK6	0

				#define DISABLED_MASK7	0

									
										26

arch/x86/include/asm/efi.h
									
										View file
										
				@ -3,7 +3,6 @@

				#include <asm/fpu/api.h>

				#include <asm/pgtable.h>

				#include <asm/tlb.h>

				/*

				 * We map the EFI regions needed for runtime services non-contiguously,

				@ -67,17 +66,6 @@ extern u64 asmlinkage efi_call(void *fp, ...);

				#define efi_call_phys(f, args...)		efi_call((f), args)

				/*

				 * Scratch space used for switching the pagetable in the EFI stub

				 */

				struct efi_scratch {

					u64	r15;

					u64	prev_cr3;

					pgd_t	*efi_pgt;

					bool	use_pgd;

					u64	phys_stack;

				} __packed;

				#define efi_call_virt(f, ...)						\

				({									\

					efi_status_t __s;						\

				@ -85,20 +73,7 @@ struct efi_scratch {

					efi_sync_low_kernel_mappings();					\

					preempt_disable();						\

					__kernel_fpu_begin();						\

													\

					if (efi_scratch.use_pgd) {					\

						efi_scratch.prev_cr3 = read_cr3();			\

						write_cr3((unsigned long)efi_scratch.efi_pgt);		\

						__flush_tlb_all();					\

					}								\

													\

					__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);	\

													\

					if (efi_scratch.use_pgd) {					\

						write_cr3(efi_scratch.prev_cr3);			\

						__flush_tlb_all();					\

					}								\

													\

					__kernel_fpu_end();						\

					preempt_enable();						\

					__s;								\

				@ -138,7 +113,6 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size);

				extern void __init efi_map_region(efi_memory_desc_t *md);

				extern void __init efi_map_region_fixed(efi_memory_desc_t *md);

				extern void efi_sync_low_kernel_mappings(void);

				extern int __init efi_alloc_page_tables(void);

				extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);

				extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);

				extern void __init old_map_region(efi_memory_desc_t *md);

									
										6

arch/x86/include/asm/hardirq.h
									
										View file
										
				@ -22,12 +22,8 @@ typedef struct {

				#ifdef CONFIG_SMP

					unsigned int irq_resched_count;

					unsigned int irq_call_count;

					/*

					 * irq_tlb_count is double-counted in irq_call_count, so it must be

					 * subtracted from irq_call_count when displaying irq_call_count

					 */

					unsigned int irq_tlb_count;

				#endif

					unsigned int irq_tlb_count;

				#ifdef CONFIG_X86_THERMAL_VECTOR

					unsigned int irq_thermal_count;

				#endif

									
										2

arch/x86/include/asm/hw_irq.h
									
										View file
										
				@ -187,7 +187,7 @@ extern char irq_entries_start[];

				#define VECTOR_RETRIGGERED	((void *)~0UL)

				typedef struct irq_desc* vector_irq_t[NR_VECTORS];

				DECLARE_PER_CPU(vector_irq_t, vector_irq);

				DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);

				#endif /* !ASSEMBLY_ */

									
										141

arch/x86/include/asm/kaiser.h
									
										Normal file
									
										View file
										
				@ -0,0 +1,141 @@

				#ifndef _ASM_X86_KAISER_H

				#define _ASM_X86_KAISER_H

				#include <uapi/asm/processor-flags.h> /* For PCID constants */

				/*

				 * This file includes the definitions for the KAISER feature.

				 * KAISER is a counter measure against x86_64 side channel attacks on

				 * the kernel virtual memory.  It has a shadow pgd for every process: the

				 * shadow pgd has a minimalistic kernel-set mapped, but includes the whole

				 * user memory. Within a kernel context switch, or when an interrupt is handled,

				 * the pgd is switched to the normal one. When the system switches to user mode,

				 * the shadow pgd is enabled. By this, the virtual memory caches are freed,

				 * and the user may not attack the whole kernel memory.

				 *

				 * A minimalistic kernel mapping holds the parts needed to be mapped in user

				 * mode, such as the entry/exit functions of the user space, or the stacks.

				 */

				#define KAISER_SHADOW_PGD_OFFSET 0x1000

				#ifdef __ASSEMBLY__

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				.macro _SWITCH_TO_KERNEL_CR3 reg

				movq %cr3, \reg

				andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg

				/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */

				ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID

				movq \reg, %cr3

				.endm

				.macro _SWITCH_TO_USER_CR3 reg regb

				/*

				 * regb must be the low byte portion of reg: because we have arranged

				 * for the low byte of the user PCID to serve as the high byte of NOFLUSH

				 * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are

				 * not enabled): so that the one register can update both memory and cr3.

				 */

				movq %cr3, \reg

				orq  PER_CPU_VAR(x86_cr3_pcid_user), \reg

				js   9f

				/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */

				movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)

				9:

				movq \reg, %cr3

				.endm

				.macro SWITCH_KERNEL_CR3

				ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER

				_SWITCH_TO_KERNEL_CR3 %rax

				popq %rax

				8:

				.endm

				.macro SWITCH_USER_CR3

				ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER

				_SWITCH_TO_USER_CR3 %rax %al

				popq %rax

				8:

				.endm

				.macro SWITCH_KERNEL_CR3_NO_STACK

				ALTERNATIVE "jmp 8f", \

					__stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \

					X86_FEATURE_KAISER

				_SWITCH_TO_KERNEL_CR3 %rax

				movq PER_CPU_VAR(unsafe_stack_register_backup), %rax

				8:

				.endm

				#else /* CONFIG_PAGE_TABLE_ISOLATION */

				.macro SWITCH_KERNEL_CR3

				.endm

				.macro SWITCH_USER_CR3

				.endm

				.macro SWITCH_KERNEL_CR3_NO_STACK

				.endm

				#endif /* CONFIG_PAGE_TABLE_ISOLATION */

				#else /* __ASSEMBLY__ */

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				/*

				 * Upon kernel/user mode switch, it may happen that the address

				 * space has to be switched before the registers have been

				 * stored.  To change the address space, another register is

				 * needed.  A register therefore has to be stored/restored.

				*/

				DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);

				DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);

				extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];

				extern int kaiser_enabled;

				extern void __init kaiser_check_boottime_disable(void);

				#else

				#define kaiser_enabled	0

				static inline void __init kaiser_check_boottime_disable(void) {}

				#endif /* CONFIG_PAGE_TABLE_ISOLATION */

				/*

				 * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,

				 * so as to build with tests on kaiser_enabled instead of #ifdefs.

				 */

				/**

				 *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping

				 *  @addr: the start address of the range

				 *  @size: the size of the range

				 *  @flags: The mapping flags of the pages

				 *

				 *  The mapping is done on a global scope, so no bigger

				 *  synchronization has to be done.  the pages have to be

				 *  manually unmapped again when they are not needed any longer.

				 */

				extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);

				/**

				 *  kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping

				 *  @addr: the start address of the range

				 *  @size: the size of the range

				 */

				extern void kaiser_remove_mapping(unsigned long start, unsigned long size);

				/**

				 *  kaiser_init - Initialize the shadow mapping

				 *

				 *  Most parts of the shadow mapping can be mapped upon boot

				 *  time.  Only per-process things like the thread stacks

				 *  or a new LDT have to be mapped at runtime.  These boot-

				 *  time mappings are permanent and never unmapped.

				 */

				extern void kaiser_init(void);

				#endif /* __ASSEMBLY */

				#endif /* _ASM_X86_KAISER_H */

									
										6

arch/x86/include/asm/mmu.h
									
										View file
										
				@ -24,12 +24,6 @@ typedef struct {

					atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */

				} mm_context_t;

				#ifdef CONFIG_SMP

				void leave_mm(int cpu);

				#else

				static inline void leave_mm(int cpu)

				{

				}

				#endif

				#endif /* _ASM_X86_MMU_H */

									
										103

arch/x86/include/asm/mmu_context.h
									
										View file
										
				@ -98,109 +98,16 @@ static inline void load_mm_ldt(struct mm_struct *mm)

				static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)

				{

				#ifdef CONFIG_SMP

					if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)

						this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);

				#endif

				}

				static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,

							     struct task_struct *tsk)

				{

					unsigned cpu = smp_processor_id();

				extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,

						      struct task_struct *tsk);

					if (likely(prev != next)) {

				#ifdef CONFIG_SMP

						this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);

						this_cpu_write(cpu_tlbstate.active_mm, next);

				#endif

						cpumask_set_cpu(cpu, mm_cpumask(next));

						/*

						 * Re-load page tables.

						 *

						 * This logic has an ordering constraint:

						 *

						 *  CPU 0: Write to a PTE for 'next'

						 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.

						 *  CPU 1: set bit 1 in next's mm_cpumask

						 *  CPU 1: load from the PTE that CPU 0 writes (implicit)

						 *

						 * We need to prevent an outcome in which CPU 1 observes

						 * the new PTE value and CPU 0 observes bit 1 clear in

						 * mm_cpumask.  (If that occurs, then the IPI will never

						 * be sent, and CPU 0's TLB will contain a stale entry.)

						 *

						 * The bad outcome can occur if either CPU's load is

						 * reordered before that CPU's store, so both CPUs must

						 * execute full barriers to prevent this from happening.

						 *

						 * Thus, switch_mm needs a full barrier between the

						 * store to mm_cpumask and any operation that could load

						 * from next->pgd.  TLB fills are special and can happen

						 * due to instruction fetches or for no reason at all,

						 * and neither LOCK nor MFENCE orders them.

						 * Fortunately, load_cr3() is serializing and gives the

						 * ordering guarantee we need.

						 *

						 */

						load_cr3(next->pgd);

						trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);

						/* Stop flush ipis for the previous mm */

						cpumask_clear_cpu(cpu, mm_cpumask(prev));

						/* Load per-mm CR4 state */

						load_mm_cr4(next);

				#ifdef CONFIG_MODIFY_LDT_SYSCALL

						/*

						 * Load the LDT, if the LDT is different.

						 *

						 * It's possible that prev->context.ldt doesn't match

						 * the LDT register.  This can happen if leave_mm(prev)

						 * was called and then modify_ldt changed

						 * prev->context.ldt but suppressed an IPI to this CPU.

						 * In this case, prev->context.ldt != NULL, because we

						 * never set context.ldt to NULL while the mm still

						 * exists.  That means that next->context.ldt !=

						 * prev->context.ldt, because mms never share an LDT.

						 */

						if (unlikely(prev->context.ldt != next->context.ldt))

							load_mm_ldt(next);

				#endif

					}

				#ifdef CONFIG_SMP

					  else {

						this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);

						BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);

						if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {

							/*

							 * On established mms, the mm_cpumask is only changed

							 * from irq context, from ptep_clear_flush() while in

							 * lazy tlb mode, and here. Irqs are blocked during

							 * schedule, protecting us from simultaneous changes.

							 */

							cpumask_set_cpu(cpu, mm_cpumask(next));

							/*

							 * We were in lazy tlb mode and leave_mm disabled

							 * tlb flush IPI delivery. We must reload CR3

							 * to make sure to use no freed page tables.

							 *

							 * As above, load_cr3() is serializing and orders TLB

							 * fills with respect to the mm_cpumask write.

							 */

							load_cr3(next->pgd);

							trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);

							load_mm_cr4(next);

							load_mm_ldt(next);

						}

					}

				#endif

				}

				extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,

							       struct task_struct *tsk);

				#define switch_mm_irqs_off switch_mm_irqs_off

				#define activate_mm(prev, next)			\

				do {						\

									
										28

arch/x86/include/asm/pgtable.h
									
										View file
										
				@ -18,6 +18,12 @@

				#ifndef __ASSEMBLY__

				#include <asm/x86_init.h>

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				extern int kaiser_enabled;

				#else

				#define kaiser_enabled 0

				#endif

				void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);

				void ptdump_walk_pgd_level_checkwx(void);

				@ -653,7 +659,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)

				static inline int pgd_bad(pgd_t pgd)

				{

					return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;

					pgdval_t ignore_flags = _PAGE_USER;

					/*

					 * We set NX on KAISER pgds that map userspace memory so

					 * that userspace can not meaningfully use the kernel

					 * page table by accident; it will fault on the first

					 * instruction it tries to run.  See native_set_pgd().

					 */

					if (kaiser_enabled)

						ignore_flags |= _PAGE_NX;

					return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;

				}

				static inline int pgd_none(pgd_t pgd)

				@ -855,7 +871,15 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,

				 */

				static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)

				{

				       memcpy(dst, src, count * sizeof(pgd_t));

					memcpy(dst, src, count * sizeof(pgd_t));

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					if (kaiser_enabled) {

						/* Clone the shadow pgd part as well */

						memcpy(native_get_shadow_pgd(dst),

							native_get_shadow_pgd(src),

							count * sizeof(pgd_t));

					}

				#endif

				}

				#define PTE_SHIFT ilog2(PTRS_PER_PTE)

									
										25

arch/x86/include/asm/pgtable_64.h
									
										View file
										
				@ -106,9 +106,32 @@ static inline void native_pud_clear(pud_t *pud)

					native_set_pud(pud, native_make_pud(0));

				}

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);

				static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)

				{

				#ifdef CONFIG_DEBUG_VM

					/* linux/mmdebug.h may not have been included at this point */

					BUG_ON(!kaiser_enabled);

				#endif

					return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);

				}

				#else

				static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)

				{

					return pgd;

				}

				static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)

				{

					BUILD_BUG_ON(1);

					return NULL;

				}

				#endif /* CONFIG_PAGE_TABLE_ISOLATION */

				static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)

				{

					*pgdp = pgd;

					*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);

				}

				static inline void native_pgd_clear(pgd_t *pgd)

									
										29

arch/x86/include/asm/pgtable_types.h
									
										View file
										
				@ -89,7 +89,7 @@

				#define _PAGE_NX	(_AT(pteval_t, 0))

				#endif

				#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)

				#define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)

				#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\

							 _PAGE_ACCESSED | _PAGE_DIRTY)

				@ -102,6 +102,33 @@

							 _PAGE_SOFT_DIRTY)

				#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)

				/* The ASID is the lower 12 bits of CR3 */

				#define X86_CR3_PCID_ASID_MASK  (_AC((1<<12)-1,UL))

				/* Mask for all the PCID-related bits in CR3: */

				#define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)

				#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))

				#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)

				/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */

				#define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))

				#define X86_CR3_PCID_KERN_FLUSH		(X86_CR3_PCID_ASID_KERN)

				#define X86_CR3_PCID_USER_FLUSH		(X86_CR3_PCID_ASID_USER)

				#define X86_CR3_PCID_KERN_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)

				#define X86_CR3_PCID_USER_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)

				#else

				#define X86_CR3_PCID_ASID_USER  (_AC(0x0,UL))

				/*

				 * PCIDs are unsupported on 32-bit and none of these bits can be

				 * set in CR3:

				 */

				#define X86_CR3_PCID_KERN_FLUSH		(0)

				#define X86_CR3_PCID_USER_FLUSH		(0)

				#define X86_CR3_PCID_KERN_NOFLUSH	(0)

				#define X86_CR3_PCID_USER_NOFLUSH	(0)

				#endif

				/*

				 * The cache modes defined here are used to translate between pure SW usage

				 * and the HW defined cache mode bits and/or PAT entries.

									
										2

arch/x86/include/asm/processor.h
									
										View file
										
				@ -305,7 +305,7 @@ struct tss_struct {

				} ____cacheline_aligned;

				DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);

				DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);

				#ifdef CONFIG_X86_32

				DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);

									
										9

arch/x86/include/asm/pvclock.h
									
										View file
										
				@ -4,6 +4,15 @@

				#include <linux/clocksource.h>

				#include <asm/pvclock-abi.h>

				#ifdef CONFIG_PARAVIRT_CLOCK

				extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);

				#else

				static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)

				{

					return NULL;

				}

				#endif

				/* some helper functions for xen and kvm pv clock sources */

				cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);

				u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);

									
										232

arch/x86/include/asm/tlbflush.h
									
										View file
										
				@ -6,6 +6,55 @@

				#include <asm/processor.h>

				#include <asm/special_insns.h>

				#include <asm/smp.h>

				static inline void __invpcid(unsigned long pcid, unsigned long addr,

							     unsigned long type)

				{

					struct { u64 d[2]; } desc = { { pcid, addr } };

					/*

					 * The memory clobber is because the whole point is to invalidate

					 * stale TLB entries and, especially if we're flushing global

					 * mappings, we don't want the compiler to reorder any subsequent

					 * memory accesses before the TLB flush.

					 *

					 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and

					 * invpcid (%rcx), %rax in long mode.

					 */

					asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"

						      : : "m" (desc), "a" (type), "c" (&desc) : "memory");

				}

				#define INVPCID_TYPE_INDIV_ADDR		0

				#define INVPCID_TYPE_SINGLE_CTXT	1

				#define INVPCID_TYPE_ALL_INCL_GLOBAL	2

				#define INVPCID_TYPE_ALL_NON_GLOBAL	3

				/* Flush all mappings for a given pcid and addr, not including globals. */

				static inline void invpcid_flush_one(unsigned long pcid,

								     unsigned long addr)

				{

					__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);

				}

				/* Flush all mappings for a given PCID, not including globals. */

				static inline void invpcid_flush_single_context(unsigned long pcid)

				{

					__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);

				}

				/* Flush all mappings, including globals, for all PCIDs. */

				static inline void invpcid_flush_all(void)

				{

					__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);

				}

				/* Flush all mappings for all PCIDs except globals. */

				static inline void invpcid_flush_all_nonglobals(void)

				{

					__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);

				}

				#ifdef CONFIG_PARAVIRT

				#include <asm/paravirt.h>

				@ -16,10 +65,8 @@

				#endif

				struct tlb_state {

				#ifdef CONFIG_SMP

					struct mm_struct *active_mm;

					int state;

				#endif

					/*

					 * Access to this CR4 shadow and to H/W CR4 is protected by

				@ -84,6 +131,24 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)

					cr4_set_bits(mask);

				}

				/*

				 * Declare a couple of kaiser interfaces here for convenience,

				 * to avoid the need for asm/kaiser.h in unexpected places.

				 */

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				extern int kaiser_enabled;

				extern void kaiser_setup_pcid(void);

				extern void kaiser_flush_tlb_on_return_to_user(void);

				#else

				#define kaiser_enabled 0

				static inline void kaiser_setup_pcid(void)

				{

				}

				static inline void kaiser_flush_tlb_on_return_to_user(void)

				{

				}

				#endif

				static inline void __native_flush_tlb(void)

				{

					/*

				@ -92,6 +157,8 @@ static inline void __native_flush_tlb(void)

					 * back:

					 */

					preempt_disable();

					if (kaiser_enabled)

						kaiser_flush_tlb_on_return_to_user();

					native_write_cr3(native_read_cr3());

					preempt_enable();

				}

				@ -101,39 +168,84 @@ static inline void __native_flush_tlb_global_irq_disabled(void)

					unsigned long cr4;

					cr4 = this_cpu_read(cpu_tlbstate.cr4);

					/* clear PGE */

					native_write_cr4(cr4 & ~X86_CR4_PGE);

					/* write old PGE again and flush TLBs */

					native_write_cr4(cr4);

					if (cr4 & X86_CR4_PGE) {

						/* clear PGE and flush TLB of all entries */

						native_write_cr4(cr4 & ~X86_CR4_PGE);

						/* restore PGE as it was before */

						native_write_cr4(cr4);

					} else {

						/* do it with cr3, letting kaiser flush user PCID */

						__native_flush_tlb();

					}

				}

				static inline void __native_flush_tlb_global(void)

				{

					unsigned long flags;

					if (this_cpu_has(X86_FEATURE_INVPCID)) {

						/*

						 * Using INVPCID is considerably faster than a pair of writes

						 * to CR4 sandwiched inside an IRQ flag save/restore.

						 *

					 	 * Note, this works with CR4.PCIDE=0 or 1.

						 */

						invpcid_flush_all();

						return;

					}

					/*

					 * Read-modify-write to CR4 - protect it from preemption and

					 * from interrupts. (Use the raw variant because this code can

					 * be called from deep inside debugging code.)

					 */

					raw_local_irq_save(flags);

					__native_flush_tlb_global_irq_disabled();

					raw_local_irq_restore(flags);

				}

				static inline void __native_flush_tlb_single(unsigned long addr)

				{

					asm volatile("invlpg (%0)" ::"r" (addr) : "memory");

					/*

					 * SIMICS #GP's if you run INVPCID with type 2/3

					 * and X86_CR4_PCIDE clear.  Shame!

					 *

					 * The ASIDs used below are hard-coded.  But, we must not

					 * call invpcid(type=1/2) before CR4.PCIDE=1.  Just call

					 * invlpg in the case we are called early.

					 */

					if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {

						if (kaiser_enabled)

							kaiser_flush_tlb_on_return_to_user();

						asm volatile("invlpg (%0)" ::"r" (addr) : "memory");

						return;

					}

					/* Flush the address out of both PCIDs. */

					/*

					 * An optimization here might be to determine addresses

					 * that are only kernel-mapped and only flush the kernel

					 * ASID.  But, userspace flushes are probably much more

					 * important performance-wise.

					 *

					 * Make sure to do only a single invpcid when KAISER is

					 * disabled and we have only a single ASID.

					 */

					if (kaiser_enabled)

						invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);

					invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);

				}

				static inline void __flush_tlb_all(void)

				{

					if (cpu_has_pge)

						__flush_tlb_global();

					else

						__flush_tlb();

					__flush_tlb_global();

					/*

					 * Note: if we somehow had PCID but not PGE, then this wouldn't work --

					 * we'd end up flushing kernel translations for the current ASID but

					 * we might fail to flush kernel translations for other cached ASIDs.

					 *

					 * To avoid this issue, we force PCID off if PGE is off.

					 */

				}

				static inline void __flush_tlb_one(unsigned long addr)

				@ -147,7 +259,6 @@ static inline void __flush_tlb_one(unsigned long addr)

				/*

				 * TLB flushing:

				 *

				 *  - flush_tlb() flushes the current mm struct TLBs

				 *  - flush_tlb_all() flushes all processes TLBs

				 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's

				 *  - flush_tlb_page(vma, vmaddr) flushes one page

				@ -159,84 +270,6 @@ static inline void __flush_tlb_one(unsigned long addr)

				 * and page-granular flushes are available only on i486 and up.

				 */

				#ifndef CONFIG_SMP

				/* "_up" is for UniProcessor.

				 *

				 * This is a helper for other header functions.  *Not* intended to be called

				 * directly.  All global TLB flushes need to either call this, or to bump the

				 * vm statistics themselves.

				 */

				static inline void __flush_tlb_up(void)

				{

					count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);

					__flush_tlb();

				}

				static inline void flush_tlb_all(void)

				{

					count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);

					__flush_tlb_all();

				}

				static inline void flush_tlb(void)

				{

					__flush_tlb_up();

				}

				static inline void local_flush_tlb(void)

				{

					__flush_tlb_up();

				}

				static inline void flush_tlb_mm(struct mm_struct *mm)

				{

					if (mm == current->active_mm)

						__flush_tlb_up();

				}

				static inline void flush_tlb_page(struct vm_area_struct *vma,

								  unsigned long addr)

				{

					if (vma->vm_mm == current->active_mm)

						__flush_tlb_one(addr);

				}

				static inline void flush_tlb_range(struct vm_area_struct *vma,

								   unsigned long start, unsigned long end)

				{

					if (vma->vm_mm == current->active_mm)

						__flush_tlb_up();

				}

				static inline void flush_tlb_mm_range(struct mm_struct *mm,

					   unsigned long start, unsigned long end, unsigned long vmflag)

				{

					if (mm == current->active_mm)

						__flush_tlb_up();

				}

				static inline void native_flush_tlb_others(const struct cpumask *cpumask,

									   struct mm_struct *mm,

									   unsigned long start,

									   unsigned long end)

				{

				}

				static inline void reset_lazy_tlbstate(void)

				{

				}

				static inline void flush_tlb_kernel_range(unsigned long start,

									  unsigned long end)

				{

					flush_tlb_all();

				}

				#else  /* SMP */

				#include <asm/smp.h>

				#define local_flush_tlb() __flush_tlb()

				#define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)

				@ -245,13 +278,14 @@ static inline void flush_tlb_kernel_range(unsigned long start,

						flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)

				extern void flush_tlb_all(void);

				extern void flush_tlb_current_task(void);

				extern void flush_tlb_page(struct vm_area_struct *, unsigned long);

				extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,

								unsigned long end, unsigned long vmflag);

				extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);

				#define flush_tlb()	flush_tlb_current_task()

				static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)

				{

					flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);

				}

				void native_flush_tlb_others(const struct cpumask *cpumask,

								struct mm_struct *mm,

				@ -266,14 +300,6 @@ static inline void reset_lazy_tlbstate(void)

					this_cpu_write(cpu_tlbstate.active_mm, &init_mm);

				}

				#endif	/* SMP */

				/* Not inlined due to inc_irq_stat not being defined yet */

				#define flush_tlb_local() {		\

					inc_irq_stat(irq_tlb_count);	\

					local_flush_tlb();		\

				}

				#ifndef CONFIG_PARAVIRT

				#define flush_tlb_others(mask, mm, start, end)	\

					native_flush_tlb_others(mask, mm, start, end)

									
										1

arch/x86/include/asm/vdso.h
									
										View file
										
				@ -22,6 +22,7 @@ struct vdso_image {

					long sym_vvar_page;

					long sym_hpet_page;

					long sym_pvclock_page;

					long sym_VDSO32_NOTE_MASK;

					long sym___kernel_sigreturn;

					long sym___kernel_rt_sigreturn;

									
										3

arch/x86/include/uapi/asm/processor-flags.h
									
										View file
										
				@ -77,7 +77,8 @@

				#define X86_CR3_PWT		_BITUL(X86_CR3_PWT_BIT)

				#define X86_CR3_PCD_BIT		4 /* Page Cache Disable */

				#define X86_CR3_PCD		_BITUL(X86_CR3_PCD_BIT)

				#define X86_CR3_PCID_MASK	_AC(0x00000fff,UL) /* PCID Mask */

				#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */

				#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)

				/*

				 * Intel CPU features in CR4

									
										18

arch/x86/kernel/Makefile
									
										View file
										
				@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg

				CFLAGS_REMOVE_early_printk.o = -pg

				endif

				KASAN_SANITIZE_head$(BITS).o := n

				KASAN_SANITIZE_dumpstack.o := n

				KASAN_SANITIZE_dumpstack_$(BITS).o := n

				KASAN_SANITIZE_head$(BITS).o				:= n

				KASAN_SANITIZE_dumpstack.o				:= n

				KASAN_SANITIZE_dumpstack_$(BITS).o			:= n

				KASAN_SANITIZE_stacktrace.o := n

				OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y

				OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y

				OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o		:= y

				OBJECT_FILES_NON_STANDARD_test_nx.o			:= y

				# If instrumentation of this dir is enabled, boot hangs during first second.

				# Probably could be more selective here, but note that files related to irqs,

				# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to

				# non-deterministic coverage.

				KCOV_INSTRUMENT		:= n

				CFLAGS_irq.o := -I$(src)/../include/asm/trace

									
										9

arch/x86/kernel/acpi/wakeup_64.S
									
										View file
										
				@ -107,6 +107,15 @@ ENTRY(do_suspend_lowlevel)

					movq	pt_regs_r14(%rax), %r14

					movq	pt_regs_r15(%rax), %r15

				#ifdef CONFIG_KASAN

					/*

					 * The suspend path may have poisoned some areas deeper in the stack,

					 * which we now need to unpoison.

					 */

					movq	%rsp, %rdi

					call	kasan_unpoison_task_stack_below

				#endif

					xorl	%eax, %eax

					addq	$8, %rsp

					jmp	restore_processor_state

									
										4

arch/x86/kernel/apic/Makefile
									
										View file
										
				@ -2,6 +2,10 @@

				# Makefile for local APIC drivers and for the IO-APIC code

				#

				# Leads to non-deterministic coverage that is not a function of syscall inputs.

				# In particualr, smp_apic_timer_interrupt() is called in random places.

				KCOV_INSTRUMENT		:= n

				obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o

				obj-y				+= hw_nmi.o

									
										4

arch/x86/kernel/cpu/Makefile
									
										View file
										
				@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg

				CFLAGS_REMOVE_perf_event.o = -pg

				endif

				# If these files are instrumented, boot hangs during the first second.

				KCOV_INSTRUMENT_common.o := n

				KCOV_INSTRUMENT_perf_event.o := n

				# Make sure load_percpu_segment has no stackprotector

				nostackp := $(call cc-option, -fno-stack-protector)

				CFLAGS_common.o		:= $(nostackp)

									
										8

arch/x86/kernel/cpu/bugs.c
									
										View file
										
				@ -19,6 +19,14 @@

				void __init check_bugs(void)

				{

				#ifdef CONFIG_X86_32

					/*

					 * Regardless of whether PCID is enumerated, the SDM says

					 * that it can't be enabled in 32-bit mode.

					 */

					setup_clear_cpu_cap(X86_FEATURE_PCID);

				#endif

					identify_boot_cpu();

				#ifndef CONFIG_SMP

					pr_info("CPU: ");

									
										82

arch/x86/kernel/cpu/common.c
									
										View file
										
				@ -92,7 +92,7 @@ static const struct cpu_dev default_cpu = {

				static const struct cpu_dev *this_cpu = &default_cpu;

				DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {

				DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {

				#ifdef CONFIG_X86_64

					/*

					 * We need valid kernel segments for data and code in long mode too

				@ -162,6 +162,40 @@ static int __init x86_mpx_setup(char *s)

				}

				__setup("nompx", x86_mpx_setup);

				#ifdef CONFIG_X86_64

				static int __init x86_pcid_setup(char *s)

				{

					/* require an exact match without trailing characters */

					if (strlen(s))

						return 0;

					/* do not emit a message if the feature is not present */

					if (!boot_cpu_has(X86_FEATURE_PCID))

						return 1;

					setup_clear_cpu_cap(X86_FEATURE_PCID);

					pr_info("nopcid: PCID feature disabled\n");

					return 1;

				}

				__setup("nopcid", x86_pcid_setup);

				#endif

				static int __init x86_noinvpcid_setup(char *s)

				{

					/* noinvpcid doesn't accept parameters */

					if (s)

						return -EINVAL;

					/* do not emit a message if the feature is not present */

					if (!boot_cpu_has(X86_FEATURE_INVPCID))

						return 0;

					setup_clear_cpu_cap(X86_FEATURE_INVPCID);

					pr_info("noinvpcid: INVPCID feature disabled\n");

					return 0;

				}

				early_param("noinvpcid", x86_noinvpcid_setup);

				#ifdef CONFIG_X86_32

				static int cachesize_override = -1;

				static int disable_x86_serial_nr = 1;

				@ -287,6 +321,39 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)

					}

				}

				static void setup_pcid(struct cpuinfo_x86 *c)

				{

					if (cpu_has(c, X86_FEATURE_PCID)) {

						if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {

							cr4_set_bits(X86_CR4_PCIDE);

							/*

							 * INVPCID has two "groups" of types:

							 * 1/2: Invalidate an individual address

							 * 3/4: Invalidate all contexts

							 *

							 * 1/2 take a PCID, but 3/4 do not.  So, 3/4

							 * ignore the PCID argument in the descriptor.

							 * But, we have to be careful not to call 1/2

							 * with an actual non-zero PCID in them before

							 * we do the above cr4_set_bits().

							 */

							if (cpu_has(c, X86_FEATURE_INVPCID))

								set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);

						} else {

							/*

							 * flush_tlb_all(), as currently implemented, won't

							 * work if PCID is on but PGE is not.  Since that

							 * combination doesn't exist on real hardware, there's

							 * no reason to try to fully support it, but it's

							 * polite to avoid corrupting data if we're on

							 * an improperly configured VM.

							 */

							clear_cpu_cap(c, X86_FEATURE_PCID);

						}

					}

					kaiser_setup_pcid();

				}

				/*

				 * Some CPU features depend on higher CPUID levels, which may not always

				 * be available due to CPUID level capping or broken virtualization

				@ -918,6 +985,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)

					setup_smep(c);

					setup_smap(c);

					/* Set up PCID */

					setup_pcid(c);

					/*

					 * The vendor-specific functions might have changed features.

					 * Now we do "generic changes."

				@ -1173,7 +1243,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {

					  [DEBUG_STACK - 1]			= DEBUG_STKSZ

				};

				static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks

				DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks

					[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);

				/* May not be marked __init: used by software suspend */

				@ -1336,6 +1406,14 @@ void cpu_init(void)

					 * try to read it.

					 */

					cr4_init_shadow();

					if (!kaiser_enabled) {

						/*

						 * secondary_startup_64() deferred setting PGE in cr4:

						 * probe_page_size_mask() sets it on the boot cpu,

						 * but it needs to be set on each secondary cpu.

						 */

						cr4_set_bits(X86_CR4_PGE);

					}

					/*

					 * Load microcode on this cpu if a valid microcode is available.

									
										57

arch/x86/kernel/cpu/perf_event_intel_ds.c
									
										View file
										
				@ -2,11 +2,15 @@

				#include <linux/types.h>

				#include <linux/slab.h>

				#include <asm/kaiser.h>

				#include <asm/perf_event.h>

				#include <asm/insn.h>

				#include "perf_event.h"

				static

				DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);

				/* The size of a BTS record in bytes: */

				#define BTS_RECORD_SIZE		24

				@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)

				static DEFINE_PER_CPU(void *, insn_buffer);

				static void *dsalloc(size_t size, gfp_t flags, int node)

				{

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					unsigned int order = get_order(size);

					struct page *page;

					unsigned long addr;

					page = __alloc_pages_node(node, flags | __GFP_ZERO, order);

					if (!page)

						return NULL;

					addr = (unsigned long)page_address(page);

					if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {

						__free_pages(page, order);

						addr = 0;

					}

					return (void *)addr;

				#else

					return kmalloc_node(size, flags | __GFP_ZERO, node);

				#endif

				}

				static void dsfree(const void *buffer, size_t size)

				{

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

					if (!buffer)

						return;

					kaiser_remove_mapping((unsigned long)buffer, size);

					free_pages((unsigned long)buffer, get_order(size));

				#else

					kfree(buffer);

				#endif

				}

				static int alloc_pebs_buffer(int cpu)

				{

					struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

				@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)

					if (!x86_pmu.pebs)

						return 0;

					buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);

					buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);

					if (unlikely(!buffer))

						return -ENOMEM;

				@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)

					if (x86_pmu.intel_cap.pebs_format < 2) {

						ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);

						if (!ibuffer) {

							kfree(buffer);

							dsfree(buffer, x86_pmu.pebs_buffer_size);

							return -ENOMEM;

						}

						per_cpu(insn_buffer, cpu) = ibuffer;

				@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)

					kfree(per_cpu(insn_buffer, cpu));

					per_cpu(insn_buffer, cpu) = NULL;

					kfree((void *)(unsigned long)ds->pebs_buffer_base);

					dsfree((void *)(unsigned long)ds->pebs_buffer_base,

							x86_pmu.pebs_buffer_size);

					ds->pebs_buffer_base = 0;

				}

				@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)

					if (!x86_pmu.bts)

						return 0;

					buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);

					buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);

					if (unlikely(!buffer)) {

						WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);

						return -ENOMEM;

				@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)

					if (!ds || !x86_pmu.bts)

						return;

					kfree((void *)(unsigned long)ds->bts_buffer_base);

					dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);

					ds->bts_buffer_base = 0;

				}

				static int alloc_ds_buffer(int cpu)

				{

					int node = cpu_to_node(cpu);

					struct debug_store *ds;

					ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);

					if (unlikely(!ds))

						return -ENOMEM;

					struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);

					memset(ds, 0, sizeof(*ds));

					per_cpu(cpu_hw_events, cpu).ds = ds;

					return 0;

				@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)

						return;

					per_cpu(cpu_hw_events, cpu).ds = NULL;

					kfree(ds);

				}

				void release_ds_buffers(void)

									
										10

arch/x86/kernel/espfix_64.c
									
										View file
										
				@ -41,6 +41,7 @@

				#include <asm/pgalloc.h>

				#include <asm/setup.h>

				#include <asm/espfix.h>

				#include <asm/kaiser.h>

				/*

				 * Note: we only need 6*8 = 48 bytes for the espfix stack, but round

				@ -126,6 +127,15 @@ void __init init_espfix_bsp(void)

					/* Install the espfix pud into the kernel page directory */

					pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];

					pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);

					/*

					 * Just copy the top-level PGD that is mapping the espfix

					 * area to ensure it is mapped into the shadow user page

					 * tables.

					 */

					if (kaiser_enabled) {

						set_pgd(native_get_shadow_pgd(pgd_p),

							__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));

					}

					/* Randomize the locations */

					init_espfix_random();

									
										35

arch/x86/kernel/head_64.S
									
										View file
										
				@ -183,8 +183,8 @@ ENTRY(secondary_startup_64)

					movq	$(init_level4_pgt - __START_KERNEL_map), %rax

				1:

					/* Enable PAE mode and PGE */

					movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx

					/* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */

					movl	$(X86_CR4_PAE | X86_CR4_PSE), %ecx

					movq	%rcx, %cr4

					/* Setup early boot stage 4 level pagetables. */

				@ -441,6 +441,27 @@ early_idt_ripmsg:

					.balign	PAGE_SIZE; \

				GLOBAL(name)

				#ifdef CONFIG_PAGE_TABLE_ISOLATION

				/*

				 * Each PGD needs to be 8k long and 8k aligned.  We do not

				 * ever go out to userspace with these, so we do not

				 * strictly *need* the second page, but this allows us to

				 * have a single set_pgd() implementation that does not

				 * need to worry about whether it has 4k or 8k to work

				 * with.

				 *

				 * This ensures PGDs are 8k long:

				 */

				#define KAISER_USER_PGD_FILL	512

				/* This ensures they are 8k-aligned: */

				#define NEXT_PGD_PAGE(name) \

					.balign 2 * PAGE_SIZE; \

				GLOBAL(name)

				#else

				#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)

				#define KAISER_USER_PGD_FILL	0

				#endif

				/* Automate the creation of 1 to 1 mapping pmd entries */

				#define PMDS(START, PERM, COUNT)			\

					i = 0 ;						\

				@ -450,9 +471,10 @@ GLOBAL(name)

					.endr

					__INITDATA

				NEXT_PAGE(early_level4_pgt)

				NEXT_PGD_PAGE(early_level4_pgt)

					.fill	511,8,0

					.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE

					.fill	KAISER_USER_PGD_FILL,8,0

				NEXT_PAGE(early_dynamic_pgts)

					.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0

				@ -460,16 +482,18 @@ NEXT_PAGE(early_dynamic_pgts)

					.data

				#ifndef CONFIG_XEN

				NEXT_PAGE(init_level4_pgt)

				NEXT_PGD_PAGE(init_level4_pgt)

					.fill	512,8,0

					.fill	KAISER_USER_PGD_FILL,8,0

				#else

				NEXT_PAGE(init_level4_pgt)

				NEXT_PGD_PAGE(init_level4_pgt)

					.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE

					.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0

					.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE

					.org    init_level4_pgt + L4_START_KERNEL*8, 0

					/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */

					.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE

					.fill	KAISER_USER_PGD_FILL,8,0

				NEXT_PAGE(level3_ident_pgt)

					.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE

				@ -480,6 +504,7 @@ NEXT_PAGE(level2_ident_pgt)

					 */

					PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)

				#endif

					.fill	KAISER_USER_PGD_FILL,8,0

				NEXT_PAGE(level3_kernel_pgt)

					.fill	L3_START_KERNEL,8,0

									
										2

arch/x86/kernel/hpet.c
									
										View file
										
				@ -353,7 +353,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)

						irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));

						irq_domain_activate_irq(irq_get_irq_data(hdev->irq));

						disable_irq(hdev->irq);

						disable_hardirq(hdev->irq);

						irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));

						enable_irq(hdev->irq);

					}

									
										3

arch/x86/kernel/irq.c
									
										View file
										
				@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)

					seq_puts(p, "  Rescheduling interrupts\n");

					seq_printf(p, "%*s: ", prec, "CAL");

					for_each_online_cpu(j)

						seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -

									irq_stats(j)->irq_tlb_count);

						seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);

					seq_puts(p, "  Function call interrupts\n");

					seq_printf(p, "%*s: ", prec, "TLB");

					for_each_online_cpu(j)