Merge "Merge android-4.4.110 (5cc8c2e
) into msm-4.4"
This commit is contained in:
commit
54b0b96ecd
486 changed files with 6447 additions and 2136 deletions
|
@ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
|
|||
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
|
||||
required for detection of out-of-bounds accesses to stack or global variables.
|
||||
|
||||
Currently KASAN is supported only for x86_64 architecture and requires the
|
||||
kernel to be built with the SLUB allocator.
|
||||
Currently KASAN is supported only for x86_64 architecture.
|
||||
|
||||
1. Usage
|
||||
========
|
||||
|
@ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
|
|||
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
|
||||
version 5.0 or later.
|
||||
|
||||
Currently KASAN works only with the SLUB memory allocator.
|
||||
KASAN works with both SLUB and SLAB memory allocators.
|
||||
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
|
||||
|
||||
To disable instrumentation for specific files or directories, add a line
|
||||
|
|
111
Documentation/kcov.txt
Normal file
111
Documentation/kcov.txt
Normal file
|
@ -0,0 +1,111 @@
|
|||
kcov: code coverage for fuzzing
|
||||
===============================
|
||||
|
||||
kcov exposes kernel code coverage information in a form suitable for coverage-
|
||||
guided fuzzing (randomized testing). Coverage data of a running kernel is
|
||||
exported via the "kcov" debugfs file. Coverage collection is enabled on a task
|
||||
basis, and thus it can capture precise coverage of a single system call.
|
||||
|
||||
Note that kcov does not aim to collect as much coverage as possible. It aims
|
||||
to collect more or less stable coverage that is function of syscall inputs.
|
||||
To achieve this goal it does not collect coverage in soft/hard interrupts
|
||||
and instrumentation of some inherently non-deterministic parts of kernel is
|
||||
disbled (e.g. scheduler, locking).
|
||||
|
||||
Usage:
|
||||
======
|
||||
|
||||
Configure kernel with:
|
||||
|
||||
CONFIG_KCOV=y
|
||||
|
||||
CONFIG_KCOV requires gcc built on revision 231296 or later.
|
||||
Profiling data will only become accessible once debugfs has been mounted:
|
||||
|
||||
mount -t debugfs none /sys/kernel/debug
|
||||
|
||||
The following program demonstrates kcov usage from within a test program:
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
|
||||
#define KCOV_ENABLE _IO('c', 100)
|
||||
#define KCOV_DISABLE _IO('c', 101)
|
||||
#define COVER_SIZE (64<<10)
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
unsigned long *cover, n, i;
|
||||
|
||||
/* A single fd descriptor allows coverage collection on a single
|
||||
* thread.
|
||||
*/
|
||||
fd = open("/sys/kernel/debug/kcov", O_RDWR);
|
||||
if (fd == -1)
|
||||
perror("open"), exit(1);
|
||||
/* Setup trace mode and trace size. */
|
||||
if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
|
||||
perror("ioctl"), exit(1);
|
||||
/* Mmap buffer shared between kernel- and user-space. */
|
||||
cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if ((void*)cover == MAP_FAILED)
|
||||
perror("mmap"), exit(1);
|
||||
/* Enable coverage collection on the current thread. */
|
||||
if (ioctl(fd, KCOV_ENABLE, 0))
|
||||
perror("ioctl"), exit(1);
|
||||
/* Reset coverage from the tail of the ioctl() call. */
|
||||
__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
|
||||
/* That's the target syscal call. */
|
||||
read(-1, NULL, 0);
|
||||
/* Read number of PCs collected. */
|
||||
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
|
||||
for (i = 0; i < n; i++)
|
||||
printf("0x%lx\n", cover[i + 1]);
|
||||
/* Disable coverage collection for the current thread. After this call
|
||||
* coverage can be enabled for a different thread.
|
||||
*/
|
||||
if (ioctl(fd, KCOV_DISABLE, 0))
|
||||
perror("ioctl"), exit(1);
|
||||
/* Free resources. */
|
||||
if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
|
||||
perror("munmap"), exit(1);
|
||||
if (close(fd))
|
||||
perror("close"), exit(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
After piping through addr2line output of the program looks as follows:
|
||||
|
||||
SyS_read
|
||||
fs/read_write.c:562
|
||||
__fdget_pos
|
||||
fs/file.c:774
|
||||
__fget_light
|
||||
fs/file.c:746
|
||||
__fget_light
|
||||
fs/file.c:750
|
||||
__fget_light
|
||||
fs/file.c:760
|
||||
__fdget_pos
|
||||
fs/file.c:784
|
||||
SyS_read
|
||||
fs/read_write.c:562
|
||||
|
||||
If a program needs to collect coverage from several threads (independently),
|
||||
it needs to open /sys/kernel/debug/kcov in each thread separately.
|
||||
|
||||
The interface is fine-grained to allow efficient forking of test processes.
|
||||
That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
|
||||
mmaps coverage buffer and then forks child processes in a loop. Child processes
|
||||
only need to enable coverage (disable happens automatically on thread end).
|
|
@ -2535,8 +2535,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
|
||||
nointroute [IA-64]
|
||||
|
||||
noinvpcid [X86] Disable the INVPCID cpu feature.
|
||||
|
||||
nojitter [IA-64] Disables jitter checking for ITC timers.
|
||||
|
||||
nopti [X86-64] Disable KAISER isolation of kernel from user.
|
||||
|
||||
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
|
||||
|
||||
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
|
||||
|
@ -2569,6 +2573,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
nopat [X86] Disable PAT (page attribute table extension of
|
||||
pagetables) support.
|
||||
|
||||
nopcid [X86-64] Disable the PCID cpu feature.
|
||||
|
||||
norandmaps Don't use address space randomization. Equivalent to
|
||||
echo 0 > /proc/sys/kernel/randomize_va_space
|
||||
|
||||
|
@ -3071,6 +3077,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
pt. [PARIDE]
|
||||
See Documentation/blockdev/paride.txt.
|
||||
|
||||
pti= [X86_64]
|
||||
Control KAISER user/kernel address space isolation:
|
||||
on - enable
|
||||
off - disable
|
||||
auto - default setting
|
||||
|
||||
pty.legacy_count=
|
||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
default number.
|
||||
|
|
14
MAINTAINERS
14
MAINTAINERS
|
@ -6002,6 +6002,20 @@ S: Maintained
|
|||
F: Documentation/hwmon/k8temp
|
||||
F: drivers/hwmon/k8temp.c
|
||||
|
||||
KASAN
|
||||
M: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
R: Alexander Potapenko <glider@google.com>
|
||||
R: Dmitry Vyukov <dvyukov@google.com>
|
||||
L: kasan-dev@googlegroups.com
|
||||
S: Maintained
|
||||
F: arch/*/include/asm/kasan.h
|
||||
F: arch/*/mm/kasan_init*
|
||||
F: Documentation/kasan.txt
|
||||
F: include/linux/kasan*.h
|
||||
F: lib/test_kasan.c
|
||||
F: mm/kasan/
|
||||
F: scripts/Makefile.kasan
|
||||
|
||||
KCONFIG
|
||||
M: "Yann E. MORIN" <yann.morin.1998@free.fr>
|
||||
L: linux-kbuild@vger.kernel.org
|
||||
|
|
16
Makefile
16
Makefile
|
@ -1,6 +1,6 @@
|
|||
VERSION = 4
|
||||
PATCHLEVEL = 4
|
||||
SUBLEVEL = 105
|
||||
SUBLEVEL = 110
|
||||
EXTRAVERSION =
|
||||
NAME = Blurry Fish Butt
|
||||
|
||||
|
@ -373,6 +373,7 @@ LDFLAGS_MODULE =
|
|||
CFLAGS_KERNEL =
|
||||
AFLAGS_KERNEL =
|
||||
CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
|
||||
CFLAGS_KCOV = -fsanitize-coverage=trace-pc
|
||||
|
||||
|
||||
# Use USERINCLUDE when you must reference the UAPI directories only.
|
||||
|
@ -420,7 +421,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
|
|||
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
|
||||
|
||||
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
|
||||
export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN
|
||||
export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN
|
||||
export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
|
||||
export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
|
||||
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
|
||||
|
@ -697,6 +698,14 @@ endif
|
|||
endif
|
||||
KBUILD_CFLAGS += $(stackp-flag)
|
||||
|
||||
ifdef CONFIG_KCOV
|
||||
ifeq ($(call cc-option, $(CFLAGS_KCOV)),)
|
||||
$(warning Cannot use CONFIG_KCOV: \
|
||||
-fsanitize-coverage=trace-pc is not supported by compiler)
|
||||
CFLAGS_KCOV =
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(cc-name),clang)
|
||||
ifneq ($(CROSS_COMPILE),)
|
||||
CLANG_TRIPLE ?= $(CROSS_COMPILE)
|
||||
|
@ -799,6 +808,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
|
|||
# disable invalid "can't wrap" optimizations for signed / pointers
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
|
||||
|
||||
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
|
||||
|
||||
# conserve stack if available
|
||||
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
* Copyright (C) 1996, Linus Torvalds
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <asm/machvec.h>
|
||||
#include <asm/compiler.h>
|
||||
#include <asm-generic/mm_hooks.h>
|
||||
|
|
|
@ -668,6 +668,7 @@
|
|||
ti,non-removable;
|
||||
bus-width = <4>;
|
||||
cap-power-off-card;
|
||||
keep-power-in-suspend;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&mmc2_pins>;
|
||||
|
||||
|
|
|
@ -227,6 +227,7 @@
|
|||
device_type = "pci";
|
||||
ranges = <0x81000000 0 0 0x03000 0 0x00010000
|
||||
0x82000000 0 0x20013000 0x13000 0 0xffed000>;
|
||||
bus-range = <0x00 0xff>;
|
||||
#interrupt-cells = <1>;
|
||||
num-lanes = <1>;
|
||||
ti,hwmods = "pcie1";
|
||||
|
@ -262,6 +263,7 @@
|
|||
device_type = "pci";
|
||||
ranges = <0x81000000 0 0 0x03000 0 0x00010000
|
||||
0x82000000 0 0x30013000 0x13000 0 0xffed000>;
|
||||
bus-range = <0x00 0xff>;
|
||||
#interrupt-cells = <1>;
|
||||
num-lanes = <1>;
|
||||
ti,hwmods = "pcie2";
|
||||
|
|
|
@ -512,4 +512,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
|||
#endif
|
||||
.endm
|
||||
|
||||
.macro bug, msg, line
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
1: .inst 0xde02
|
||||
#else
|
||||
1: .inst 0xe7f001f2
|
||||
#endif
|
||||
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
.pushsection .rodata.str, "aMS", %progbits, 1
|
||||
2: .asciz "\msg"
|
||||
.popsection
|
||||
.pushsection __bug_table, "aw"
|
||||
.align 2
|
||||
.word 1b, 2b
|
||||
.hword \line
|
||||
.popsection
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#endif /* __ASM_ASSEMBLER_H__ */
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef __ASM_ARM_EXCEPTION_H
|
||||
#define __ASM_ARM_EXCEPTION_H
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#define __exception __attribute__((section(".exception.text")))
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
|
|
|
@ -161,8 +161,7 @@
|
|||
#else
|
||||
#define VTTBR_X (5 - KVM_T0SZ)
|
||||
#endif
|
||||
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
|
||||
#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
|
||||
#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
|
||||
#define VTTBR_VMID_SHIFT _AC(48, ULL)
|
||||
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
|
||||
|
||||
|
@ -209,6 +208,7 @@
|
|||
#define HSR_EC_IABT_HYP (0x21)
|
||||
#define HSR_EC_DABT (0x24)
|
||||
#define HSR_EC_DABT_HYP (0x25)
|
||||
#define HSR_EC_MAX (0x3f)
|
||||
|
||||
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
|
|||
cpu_switch_mm(mm->pgd, mm);
|
||||
}
|
||||
|
||||
#ifndef MODULE
|
||||
#define finish_arch_post_lock_switch \
|
||||
finish_arch_post_lock_switch
|
||||
static inline void finish_arch_post_lock_switch(void)
|
||||
|
@ -82,6 +83,7 @@ static inline void finish_arch_post_lock_switch(void)
|
|||
preempt_enable_no_resched();
|
||||
}
|
||||
}
|
||||
#endif /* !MODULE */
|
||||
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ struct undef_hook {
|
|||
void register_undef_hook(struct undef_hook *hook);
|
||||
void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
extern char __irqentry_text_start[];
|
||||
|
@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
|
|||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
#else
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
|
|
|
@ -295,6 +295,8 @@
|
|||
mov r2, sp
|
||||
ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr
|
||||
ldr lr, [r2, #\offset + S_PC]! @ get pc
|
||||
tst r1, #PSR_I_BIT | 0x0f
|
||||
bne 1f
|
||||
msr spsr_cxsf, r1 @ save in spsr_svc
|
||||
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
|
||||
@ We must avoid clrex due to Cortex-A15 erratum #830321
|
||||
|
@ -309,6 +311,7 @@
|
|||
@ after ldm {}^
|
||||
add sp, sp, #\offset + S_FRAME_SIZE
|
||||
movs pc, lr @ return & move spsr_svc into cpsr
|
||||
1: bug "Returning to usermode but unexpected PSR bits set?", \@
|
||||
#elif defined(CONFIG_CPU_V7M)
|
||||
@ V7M restore.
|
||||
@ Note that we don't need to do clrex here as clearing the local
|
||||
|
@ -324,6 +327,8 @@
|
|||
ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
|
||||
ldr lr, [sp, #\offset + S_PC] @ get pc
|
||||
add sp, sp, #\offset + S_SP
|
||||
tst r1, #PSR_I_BIT | 0x0f
|
||||
bne 1f
|
||||
msr spsr_cxsf, r1 @ save in spsr_svc
|
||||
|
||||
@ We must avoid clrex due to Cortex-A15 erratum #830321
|
||||
|
@ -336,6 +341,7 @@
|
|||
.endif
|
||||
add sp, sp, #S_FRAME_SIZE - S_SP
|
||||
movs pc, lr @ return & move spsr_svc into cpsr
|
||||
1: bug "Returning to usermode but unexpected PSR bits set?", \@
|
||||
#endif /* !CONFIG_THUMB2_KERNEL */
|
||||
.endm
|
||||
|
||||
|
|
|
@ -105,6 +105,7 @@ SECTIONS
|
|||
*(.exception.text)
|
||||
__exception_text_end = .;
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
|
|
|
@ -100,7 +100,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
u32 hsr = kvm_vcpu_get_hsr(vcpu);
|
||||
|
||||
kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
|
||||
hsr);
|
||||
|
||||
kvm_inject_undefined(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static exit_handle_fn arm_exit_handlers[] = {
|
||||
[0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
|
||||
[HSR_EC_WFI] = kvm_handle_wfx,
|
||||
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
|
||||
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
|
||||
|
@ -122,13 +134,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
|
||||
if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
|
||||
!arm_exit_handlers[hsr_ec]) {
|
||||
kvm_err("Unknown exception class: hsr: %#08x\n",
|
||||
(unsigned int)kvm_vcpu_get_hsr(vcpu));
|
||||
BUG();
|
||||
}
|
||||
|
||||
return arm_exit_handlers[hsr_ec];
|
||||
}
|
||||
|
||||
|
|
|
@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
|
||||
int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
|
||||
{
|
||||
int err;
|
||||
struct device *dev = &gpmc_onenand_device.dev;
|
||||
|
@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
|
|||
if (err < 0) {
|
||||
dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
|
||||
gpmc_onenand_data->cs, err);
|
||||
return;
|
||||
return err;
|
||||
}
|
||||
|
||||
gpmc_onenand_resource.end = gpmc_onenand_resource.start +
|
||||
ONENAND_IO_SIZE - 1;
|
||||
|
||||
if (platform_device_register(&gpmc_onenand_device) < 0) {
|
||||
err = platform_device_register(&gpmc_onenand_device);
|
||||
if (err) {
|
||||
dev_err(dev, "Unable to register OneNAND device\n");
|
||||
gpmc_cs_free(gpmc_onenand_data->cs);
|
||||
return;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -3885,16 +3885,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {
|
|||
* Return: 0 if device named @dev_name is not likely to be accessible,
|
||||
* or 1 if it is likely to be accessible.
|
||||
*/
|
||||
static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
|
||||
const char *dev_name)
|
||||
static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
|
||||
const char *dev_name)
|
||||
{
|
||||
struct device_node *node;
|
||||
bool available;
|
||||
|
||||
if (!bus)
|
||||
return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
|
||||
return omap_type() == OMAP2_DEVICE_TYPE_GP;
|
||||
|
||||
if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
|
||||
return 1;
|
||||
node = of_get_child_by_name(bus, dev_name);
|
||||
available = of_device_is_available(node);
|
||||
of_node_put(node);
|
||||
|
||||
return 0;
|
||||
return available;
|
||||
}
|
||||
|
||||
int __init omap3xxx_hwmod_init(void)
|
||||
|
@ -3963,15 +3967,20 @@ int __init omap3xxx_hwmod_init(void)
|
|||
|
||||
if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
|
||||
r = omap_hwmod_register_links(h_sham);
|
||||
if (r < 0)
|
||||
if (r < 0) {
|
||||
of_node_put(bus);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
|
||||
r = omap_hwmod_register_links(h_aes);
|
||||
if (r < 0)
|
||||
if (r < 0) {
|
||||
of_node_put(bus);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
of_node_put(bus);
|
||||
|
||||
/*
|
||||
* Register hwmod links specific to certain ES levels of a
|
||||
|
|
|
@ -918,13 +918,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
|
|||
__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
|
||||
* that the intention is to allow exporting memory allocated via the
|
||||
* coherent DMA APIs through the dma_buf API, which only accepts a
|
||||
* scattertable. This presents a couple of problems:
|
||||
* 1. Not all memory allocated via the coherent DMA APIs is backed by
|
||||
* a struct page
|
||||
* 2. Passing coherent DMA memory into the streaming APIs is not allowed
|
||||
* as we will try to flush the memory through a different alias to that
|
||||
* actually being used (and the flushes are redundant.)
|
||||
*/
|
||||
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
|
||||
void *cpu_addr, dma_addr_t handle, size_t size,
|
||||
struct dma_attrs *attrs)
|
||||
{
|
||||
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
|
||||
unsigned long pfn = dma_to_pfn(dev, handle);
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
/* If the PFN is not valid, we do not have a struct page */
|
||||
if (!pfn_valid(pfn))
|
||||
return -ENXIO;
|
||||
|
||||
page = pfn_to_page(pfn);
|
||||
|
||||
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
|
|
@ -433,6 +433,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
|||
struct hlist_node *tmp;
|
||||
unsigned long flags, orig_ret_address = 0;
|
||||
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
|
||||
kprobe_opcode_t *correct_ret_addr = NULL;
|
||||
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
kretprobe_hash_lock(current, &head, &flags);
|
||||
|
@ -455,15 +456,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
|||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
|
||||
if (ri->rp && ri->rp->handler) {
|
||||
__this_cpu_write(current_kprobe, &ri->rp->kp);
|
||||
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
ri->rp->handler(ri, regs);
|
||||
__this_cpu_write(current_kprobe, NULL);
|
||||
}
|
||||
|
||||
orig_ret_address = (unsigned long)ri->ret_addr;
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
|
||||
if (orig_ret_address != trampoline_address)
|
||||
/*
|
||||
|
@ -475,6 +468,33 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
|||
}
|
||||
|
||||
kretprobe_assert(ri, orig_ret_address, trampoline_address);
|
||||
|
||||
correct_ret_addr = ri->ret_addr;
|
||||
hlist_for_each_entry_safe(ri, tmp, head, hlist) {
|
||||
if (ri->task != current)
|
||||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
|
||||
orig_ret_address = (unsigned long)ri->ret_addr;
|
||||
if (ri->rp && ri->rp->handler) {
|
||||
__this_cpu_write(current_kprobe, &ri->rp->kp);
|
||||
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
ri->ret_addr = correct_ret_addr;
|
||||
ri->rp->handler(ri, regs);
|
||||
__this_cpu_write(current_kprobe, NULL);
|
||||
}
|
||||
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
|
||||
if (orig_ret_address != trampoline_address)
|
||||
/*
|
||||
* This is the real return address. Any other
|
||||
* instances associated with this task are for
|
||||
* other calls deeper on the call stack
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
kretprobe_hash_unlock(current, &flags);
|
||||
|
||||
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
|
||||
|
|
|
@ -976,7 +976,10 @@ static void coverage_end(void)
|
|||
void __naked __kprobes_test_case_start(void)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"stmdb sp!, {r4-r11} \n\t"
|
||||
"mov r2, sp \n\t"
|
||||
"bic r3, r2, #7 \n\t"
|
||||
"mov sp, r3 \n\t"
|
||||
"stmdb sp!, {r2-r11} \n\t"
|
||||
"sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
|
||||
"bic r0, lr, #1 @ r0 = inline data \n\t"
|
||||
"mov r1, sp \n\t"
|
||||
|
@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(void)
|
|||
"movne pc, r0 \n\t"
|
||||
"mov r0, r4 \n\t"
|
||||
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
|
||||
"ldmia sp!, {r4-r11} \n\t"
|
||||
"ldmia sp!, {r2-r11} \n\t"
|
||||
"mov sp, r2 \n\t"
|
||||
"mov pc, r0 \n\t"
|
||||
);
|
||||
}
|
||||
|
@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(void)
|
|||
"bxne r0 \n\t"
|
||||
"mov r0, r4 \n\t"
|
||||
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
|
||||
"ldmia sp!, {r4-r11} \n\t"
|
||||
"ldmia sp!, {r2-r11} \n\t"
|
||||
"mov sp, r2 \n\t"
|
||||
"bx r0 \n\t"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#ifndef __ASM_EXCEPTION_H
|
||||
#define __ASM_EXCEPTION_H
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#define __exception __attribute__((section(".exception.text")))
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
|
|
|
@ -154,8 +154,7 @@
|
|||
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
|
||||
#endif
|
||||
|
||||
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
|
||||
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
|
||||
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
|
||||
#define VTTBR_VMID_SHIFT (UL(48))
|
||||
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
|
||||
|
||||
|
|
|
@ -148,6 +148,11 @@ extern u64 kimage_vaddr;
|
|||
/* the offset between the kernel virtual and physical mappings */
|
||||
extern u64 kimage_voffset;
|
||||
|
||||
static inline unsigned long kaslr_offset(void)
|
||||
{
|
||||
return kimage_vaddr - KIMAGE_VADDR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow all memory at the discovery stage. We will clip it later.
|
||||
*/
|
||||
|
|
|
@ -34,7 +34,6 @@ struct undef_hook {
|
|||
void register_undef_hook(struct undef_hook *hook);
|
||||
void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
extern char __irqentry_text_start[];
|
||||
|
@ -43,12 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
|
|||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
#else
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
|
|
|
@ -326,6 +326,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
|
|||
|
||||
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
|
||||
|
||||
/*
|
||||
* In case p was allocated the same task_struct pointer as some
|
||||
* other recently-exited task, make sure p is disassociated from
|
||||
* any cpu that may have run that now-exited task recently.
|
||||
* Otherwise we could erroneously skip reloading the FPSIMD
|
||||
* registers for p.
|
||||
*/
|
||||
fpsimd_flush_task_state(p);
|
||||
|
||||
if (likely(!(p->flags & PF_KTHREAD))) {
|
||||
*childregs = *current_pt_regs();
|
||||
childregs->regs[0] = 0;
|
||||
|
|
|
@ -429,11 +429,11 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
|
|||
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
|
||||
void *p)
|
||||
{
|
||||
u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
|
||||
const unsigned long offset = kaslr_offset();
|
||||
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
|
||||
pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
|
||||
kaslr_offset, KIMAGE_VADDR);
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) {
|
||||
pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
|
||||
offset, KIMAGE_VADDR);
|
||||
} else {
|
||||
pr_emerg("Kernel Offset: disabled\n");
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ ENTRY(_cpu_resume)
|
|||
|
||||
#ifdef CONFIG_KASAN
|
||||
mov x0, sp
|
||||
bl kasan_unpoison_remaining_stack
|
||||
bl kasan_unpoison_task_stack_below
|
||||
#endif
|
||||
|
||||
ldp x19, x20, [x29, #16]
|
||||
|
|
|
@ -120,6 +120,7 @@ SECTIONS
|
|||
__exception_text_end = .;
|
||||
IRQENTRY_TEXT
|
||||
ENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
|
|
|
@ -122,7 +122,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
u32 hsr = kvm_vcpu_get_hsr(vcpu);
|
||||
|
||||
kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
|
||||
hsr, esr_get_class_string(hsr));
|
||||
|
||||
kvm_inject_undefined(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static exit_handle_fn arm_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
|
||||
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
|
||||
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
|
||||
|
@ -148,13 +160,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
|
|||
u32 hsr = kvm_vcpu_get_hsr(vcpu);
|
||||
u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;
|
||||
|
||||
if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
|
||||
!arm_exit_handlers[hsr_ec]) {
|
||||
kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
|
||||
hsr, esr_get_class_string(hsr));
|
||||
BUG();
|
||||
}
|
||||
|
||||
return arm_exit_handlers[hsr_ec];
|
||||
}
|
||||
|
||||
|
|
|
@ -255,6 +255,7 @@ void __init arm64_memblock_init(void)
|
|||
arm64_dma_phys_limit = max_zone_dma_phys();
|
||||
else
|
||||
arm64_dma_phys_limit = PHYS_MASK + 1;
|
||||
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
|
||||
dma_contiguous_reserve(arm64_dma_phys_limit);
|
||||
|
||||
memblock_allow_resize();
|
||||
|
@ -279,7 +280,6 @@ void __init bootmem_init(void)
|
|||
sparse_init();
|
||||
zone_sizes_init(min, max);
|
||||
|
||||
high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
|
||||
max_pfn = max_low_pfn = max;
|
||||
}
|
||||
|
||||
|
|
|
@ -318,11 +318,14 @@ config BF53x
|
|||
|
||||
config GPIO_ADI
|
||||
def_bool y
|
||||
depends on !PINCTRL
|
||||
depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561)
|
||||
|
||||
config PINCTRL
|
||||
config PINCTRL_BLACKFIN_ADI2
|
||||
def_bool y
|
||||
depends on BF54x || BF60x
|
||||
depends on (BF54x || BF60x)
|
||||
select PINCTRL
|
||||
select PINCTRL_ADI2
|
||||
|
||||
config MEM_MT48LC64M4A2FB_7E
|
||||
bool
|
||||
|
|
|
@ -17,6 +17,7 @@ config DEBUG_VERBOSE
|
|||
|
||||
config DEBUG_MMRS
|
||||
tristate "Generate Blackfin MMR tree"
|
||||
depends on !PINCTRL
|
||||
select DEBUG_FS
|
||||
help
|
||||
Create a tree of Blackfin MMRs via the debugfs tree. If
|
||||
|
|
|
@ -35,6 +35,7 @@ SECTIONS
|
|||
#endif
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
#ifdef CONFIG_ROMKERNEL
|
||||
__sinittext = .;
|
||||
|
|
|
@ -72,6 +72,7 @@ SECTIONS
|
|||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
|
|
@ -24,6 +24,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.*)
|
||||
*(.gnu.warning)
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ SECTIONS {
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
. = ALIGN (4) ;
|
||||
_etext = . ;
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.*)
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
|
|
@ -1777,7 +1777,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
SPFROMREG(fd, MIPSInst_FD(ir));
|
||||
rv.s = ieee754sp_maddf(fd, fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmsubf_op: {
|
||||
|
@ -1790,7 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
SPFROMREG(fd, MIPSInst_FD(ir));
|
||||
rv.s = ieee754sp_msubf(fd, fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case frint_op: {
|
||||
|
@ -1814,7 +1814,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.w = ieee754sp_2008class(fs);
|
||||
rfmt = w_fmt;
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmin_op: {
|
||||
|
@ -1826,7 +1826,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(ft, MIPSInst_FT(ir));
|
||||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.s = ieee754sp_fmin(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmina_op: {
|
||||
|
@ -1838,7 +1838,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(ft, MIPSInst_FT(ir));
|
||||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.s = ieee754sp_fmina(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmax_op: {
|
||||
|
@ -1850,7 +1850,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(ft, MIPSInst_FT(ir));
|
||||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.s = ieee754sp_fmax(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmaxa_op: {
|
||||
|
@ -1862,7 +1862,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
SPFROMREG(ft, MIPSInst_FT(ir));
|
||||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.s = ieee754sp_fmaxa(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fabs_op:
|
||||
|
@ -2095,7 +2095,7 @@ copcsr:
|
|||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
DPFROMREG(fd, MIPSInst_FD(ir));
|
||||
rv.d = ieee754dp_maddf(fd, fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmsubf_op: {
|
||||
|
@ -2108,7 +2108,7 @@ copcsr:
|
|||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
DPFROMREG(fd, MIPSInst_FD(ir));
|
||||
rv.d = ieee754dp_msubf(fd, fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case frint_op: {
|
||||
|
@ -2132,7 +2132,7 @@ copcsr:
|
|||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.w = ieee754dp_2008class(fs);
|
||||
rfmt = w_fmt;
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmin_op: {
|
||||
|
@ -2144,7 +2144,7 @@ copcsr:
|
|||
DPFROMREG(ft, MIPSInst_FT(ir));
|
||||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.d = ieee754dp_fmin(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmina_op: {
|
||||
|
@ -2156,7 +2156,7 @@ copcsr:
|
|||
DPFROMREG(ft, MIPSInst_FT(ir));
|
||||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.d = ieee754dp_fmina(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmax_op: {
|
||||
|
@ -2168,7 +2168,7 @@ copcsr:
|
|||
DPFROMREG(ft, MIPSInst_FT(ir));
|
||||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.d = ieee754dp_fmax(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fmaxa_op: {
|
||||
|
@ -2180,7 +2180,7 @@ copcsr:
|
|||
DPFROMREG(ft, MIPSInst_FT(ir));
|
||||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.d = ieee754dp_fmaxa(fs, ft);
|
||||
break;
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
case fabs_op:
|
||||
|
|
|
@ -39,6 +39,7 @@ SECTIONS
|
|||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
} =0
|
||||
_etext = .;
|
||||
|
|
|
@ -215,7 +215,7 @@ do { \
|
|||
case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \
|
||||
case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
|
||||
case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
|
||||
case 8: __get_user_asm2(x, ptr, retval); \
|
||||
case 8: __get_user_asm2(x, ptr, retval); break; \
|
||||
default: (x) = __get_user_bad(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
|
|
@ -52,6 +52,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.text.__*)
|
||||
_etext = .;
|
||||
|
|
|
@ -72,6 +72,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.do_softirq)
|
||||
*(.text.sys_exit)
|
||||
*(.text.do_sigaltstack)
|
||||
|
|
|
@ -55,6 +55,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
*(.got1)
|
||||
|
|
|
@ -401,8 +401,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
|
|||
int ret;
|
||||
__u64 target;
|
||||
|
||||
if (is_kernel_addr(addr))
|
||||
return branch_target((unsigned int *)addr);
|
||||
if (is_kernel_addr(addr)) {
|
||||
if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
|
||||
return 0;
|
||||
|
||||
return branch_target(&instr);
|
||||
}
|
||||
|
||||
/* Userspace: need copy instruction here then translate it */
|
||||
pagefault_disable();
|
||||
|
|
|
@ -514,7 +514,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
|
|||
{
|
||||
if (s1 < s2)
|
||||
return 1;
|
||||
if (s2 > s1)
|
||||
if (s1 > s2)
|
||||
return -1;
|
||||
|
||||
return memcmp(d1, d2, s1);
|
||||
|
|
|
@ -39,18 +39,18 @@ int __opal_async_get_token(void)
|
|||
int token;
|
||||
|
||||
spin_lock_irqsave(&opal_async_comp_lock, flags);
|
||||
token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
|
||||
token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens);
|
||||
if (token >= opal_max_async_tokens) {
|
||||
token = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (__test_and_set_bit(token, opal_async_token_map)) {
|
||||
if (!__test_and_clear_bit(token, opal_async_complete_map)) {
|
||||
token = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__clear_bit(token, opal_async_complete_map);
|
||||
__set_bit(token, opal_async_token_map);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
|
||||
|
|
|
@ -2270,6 +2270,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
|
|||
level_shift = entries_shift + 3;
|
||||
level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
|
||||
|
||||
if ((level_shift - 3) * levels + page_shift >= 60)
|
||||
return -EINVAL;
|
||||
|
||||
/* Allocate TCE table */
|
||||
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
|
||||
levels, tce_table_size, &offset, &total_allocated);
|
||||
|
|
|
@ -295,7 +295,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
|
|||
{
|
||||
unsigned long ret_freq;
|
||||
|
||||
ret_freq = cpufreq_quick_get(cpu) * 1000ul;
|
||||
ret_freq = cpufreq_get(cpu) * 1000ul;
|
||||
|
||||
/*
|
||||
* If the backend cpufreq driver does not exist,
|
||||
|
|
|
@ -276,7 +276,9 @@ failed:
|
|||
if (bank->disk->major > 0)
|
||||
unregister_blkdev(bank->disk->major,
|
||||
bank->disk->disk_name);
|
||||
del_gendisk(bank->disk);
|
||||
if (bank->disk->flags & GENHD_FL_UP)
|
||||
del_gendisk(bank->disk);
|
||||
put_disk(bank->disk);
|
||||
}
|
||||
device->dev.platform_data = NULL;
|
||||
if (bank->io_addr != 0)
|
||||
|
@ -301,6 +303,7 @@ axon_ram_remove(struct platform_device *device)
|
|||
device_remove_file(&device->dev, &dev_attr_ecc);
|
||||
free_irq(bank->irq_id, device);
|
||||
del_gendisk(bank->disk);
|
||||
put_disk(bank->disk);
|
||||
iounmap((void __iomem *) bank->io_addr);
|
||||
kfree(bank);
|
||||
|
||||
|
|
|
@ -845,12 +845,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
|
|||
|
||||
u32 ipic_get_mcp_status(void)
|
||||
{
|
||||
return ipic_read(primary_ipic->regs, IPIC_SERMR);
|
||||
return ipic_read(primary_ipic->regs, IPIC_SERSR);
|
||||
}
|
||||
|
||||
void ipic_clear_mcp_status(u32 mask)
|
||||
{
|
||||
ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
|
||||
ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
|
||||
}
|
||||
|
||||
/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
#ifndef _ASM_S390_PROTOTYPES_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm-generic/asm-prototypes.h>
|
||||
|
||||
#endif /* _ASM_S390_PROTOTYPES_H */
|
|
@ -29,17 +29,16 @@ static inline void restore_access_regs(unsigned int *acrs)
|
|||
}
|
||||
|
||||
#define switch_to(prev,next,last) do { \
|
||||
if (prev->mm) { \
|
||||
save_fpu_regs(); \
|
||||
save_access_regs(&prev->thread.acrs[0]); \
|
||||
save_ri_cb(prev->thread.ri_cb); \
|
||||
} \
|
||||
/* save_fpu_regs() sets the CIF_FPU flag, which enforces \
|
||||
* a restore of the floating point / vector registers as \
|
||||
* soon as the next task returns to user space \
|
||||
*/ \
|
||||
save_fpu_regs(); \
|
||||
save_access_regs(&prev->thread.acrs[0]); \
|
||||
save_ri_cb(prev->thread.ri_cb); \
|
||||
update_cr_regs(next); \
|
||||
if (next->mm) { \
|
||||
set_cpu_flag(CIF_FPU); \
|
||||
restore_access_regs(&next->thread.acrs[0]); \
|
||||
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
|
||||
} \
|
||||
restore_access_regs(&next->thread.acrs[0]); \
|
||||
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
|
||||
prev = __switch_to(prev,next); \
|
||||
} while (0)
|
||||
|
||||
|
|
|
@ -369,10 +369,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
|
|||
SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
|
||||
SYSCALL(sys_socket,sys_socket)
|
||||
SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
|
||||
SYSCALL(sys_bind,sys_bind)
|
||||
SYSCALL(sys_connect,sys_connect)
|
||||
SYSCALL(sys_bind,compat_sys_bind)
|
||||
SYSCALL(sys_connect,compat_sys_connect)
|
||||
SYSCALL(sys_listen,sys_listen)
|
||||
SYSCALL(sys_accept4,sys_accept4)
|
||||
SYSCALL(sys_accept4,compat_sys_accept4)
|
||||
SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
|
||||
SYSCALL(sys_setsockopt,compat_sys_setsockopt)
|
||||
SYSCALL(sys_getsockname,compat_sys_getsockname)
|
||||
|
|
|
@ -28,6 +28,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
} :text = 0x0700
|
||||
|
|
|
@ -39,6 +39,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
_etext = .; /* End of text section */
|
||||
|
|
|
@ -52,6 +52,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.gnu.warning)
|
||||
} = 0
|
||||
_etext = .;
|
||||
|
|
|
@ -2402,9 +2402,16 @@ void __init mem_init(void)
|
|||
{
|
||||
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
|
||||
|
||||
register_page_bootmem_info();
|
||||
free_all_bootmem();
|
||||
|
||||
/*
|
||||
* Must be done after boot memory is put on freelist, because here we
|
||||
* might set fields in deferred struct pages that have not yet been
|
||||
* initialized, and free_all_bootmem() initializes all the reserved
|
||||
* deferred pages for us.
|
||||
*/
|
||||
register_page_bootmem_info();
|
||||
|
||||
/*
|
||||
* Set up the zero page, mark it reserved, so that page count
|
||||
* is not manipulated when freeing the page from user ptes.
|
||||
|
|
|
@ -45,6 +45,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
__fix_text_end = .; /* tile-cpack won't rearrange before this */
|
||||
ALIGN_FUNCTION();
|
||||
*(.hottext*)
|
||||
|
|
|
@ -27,6 +27,7 @@ config X86
|
|||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_FAST_MULTIPLIER
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_KCOV if X86_64
|
||||
select ARCH_HAS_PMEM_API if X86_64
|
||||
select ARCH_HAS_MMIO_FLUSH
|
||||
select ARCH_HAS_SG_CHAIN
|
||||
|
@ -43,7 +44,7 @@ config X86
|
|||
select ARCH_USE_CMPXCHG_LOCKREF if X86_64
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANT_IPC_PARSE_VERSION if X86_32
|
||||
|
|
|
@ -11,6 +11,13 @@
|
|||
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
# Kernel does not boot with kcov instrumentation here.
|
||||
# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
|
||||
# callback into middle of per-cpu data enabling code. Thus the callback observed
|
||||
# inconsistent state and crashed. We are interested mostly in syscall coverage,
|
||||
# so boot code is not interesting anyway.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
# If you want to preset the SVGA mode, uncomment the next line and
|
||||
# set SVGA_MODE to whatever number you want.
|
||||
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
|
||||
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
*/
|
||||
#undef CONFIG_PARAVIRT
|
||||
#undef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#undef CONFIG_PAGE_TABLE_ISOLATION
|
||||
#undef CONFIG_KASAN
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
|
|
@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
|
|||
|
||||
salsa20_ivsetup(ctx, walk.iv);
|
||||
|
||||
if (likely(walk.nbytes == nbytes))
|
||||
{
|
||||
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
|
||||
walk.dst.virt.addr, nbytes);
|
||||
return blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
|
||||
while (walk.nbytes >= 64) {
|
||||
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
|
||||
walk.dst.virt.addr,
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/kaiser.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
|
@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)
|
|||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
SWITCH_KERNEL_CR3_NO_STACK
|
||||
/*
|
||||
* A hypervisor implementation might want to use a label
|
||||
* after the swapgs, so that it can do the swapgs
|
||||
|
@ -207,9 +209,17 @@ entry_SYSCALL_64_fastpath:
|
|||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
|
||||
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
/*
|
||||
* This opens a window where we have a user CR3, but are
|
||||
* running in the kernel. This makes using the CS
|
||||
* register useless for telling whether or not we need to
|
||||
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||
* they are off here.
|
||||
*/
|
||||
SWITCH_USER_CR3
|
||||
movq RSP(%rsp), %rsp
|
||||
/*
|
||||
* 64-bit SYSRET restores rip from rcx,
|
||||
|
@ -347,10 +357,26 @@ GLOBAL(int_ret_from_sys_call)
|
|||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
/*
|
||||
* This opens a window where we have a user CR3, but are
|
||||
* running in the kernel. This makes using the CS
|
||||
* register useless for telling whether or not we need to
|
||||
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||
* they are off here.
|
||||
*/
|
||||
SWITCH_USER_CR3
|
||||
movq RSP(%rsp), %rsp
|
||||
USERGS_SYSRET64
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
/*
|
||||
* This opens a window where we have a user CR3, but are
|
||||
* running in the kernel. This makes using the CS
|
||||
* register useless for telling whether or not we need to
|
||||
* switch CR3 in NMIs. Normal interrupts are OK because
|
||||
* they are off here.
|
||||
*/
|
||||
SWITCH_USER_CR3
|
||||
SWAPGS
|
||||
jmp restore_c_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
@ -509,6 +535,7 @@ END(irq_entries_start)
|
|||
* tracking that we're in kernel mode.
|
||||
*/
|
||||
SWAPGS
|
||||
SWITCH_KERNEL_CR3
|
||||
|
||||
/*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
|
@ -568,6 +595,7 @@ GLOBAL(retint_user)
|
|||
mov %rsp,%rdi
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
SWITCH_USER_CR3
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
|
||||
|
@ -625,6 +653,7 @@ native_irq_return_ldt:
|
|||
pushq %rax
|
||||
pushq %rdi
|
||||
SWAPGS
|
||||
SWITCH_KERNEL_CR3
|
||||
movq PER_CPU_VAR(espfix_waddr), %rdi
|
||||
movq %rax, (0*8)(%rdi) /* RAX */
|
||||
movq (2*8)(%rsp), %rax /* RIP */
|
||||
|
@ -640,6 +669,7 @@ native_irq_return_ldt:
|
|||
andl $0xffff0000, %eax
|
||||
popq %rdi
|
||||
orq PER_CPU_VAR(espfix_stack), %rax
|
||||
SWITCH_USER_CR3
|
||||
SWAPGS
|
||||
movq %rax, %rsp
|
||||
popq %rax
|
||||
|
@ -672,9 +702,15 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
|
|||
.endm
|
||||
#endif
|
||||
|
||||
/* Make sure APIC interrupt handlers end up in the irqentry section: */
|
||||
#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
#define POP_SECTION_IRQENTRY .popsection
|
||||
|
||||
.macro apicinterrupt num sym do_sym
|
||||
PUSH_SECTION_IRQENTRY
|
||||
apicinterrupt3 \num \sym \do_sym
|
||||
trace_apicinterrupt \num \sym
|
||||
POP_SECTION_IRQENTRY
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -995,7 +1031,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
|
|||
/*
|
||||
* Save all registers in pt_regs, and switch gs if needed.
|
||||
* Use slow, but surefire "are we in kernel?" check.
|
||||
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
|
||||
*
|
||||
* Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
|
||||
* ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
|
||||
* ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
|
||||
* ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
|
||||
*/
|
||||
ENTRY(paranoid_entry)
|
||||
cld
|
||||
|
@ -1008,7 +1048,26 @@ ENTRY(paranoid_entry)
|
|||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx, %ebx
|
||||
1: ret
|
||||
1:
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* We might have come in between a swapgs and a SWITCH_KERNEL_CR3
|
||||
* on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
|
||||
* Do a conditional SWITCH_KERNEL_CR3: this could safely be done
|
||||
* unconditionally, but we need to find out whether the reverse
|
||||
* should be done on return (conveyed to paranoid_exit in %ebx).
|
||||
*/
|
||||
ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
|
||||
testl $KAISER_SHADOW_PGD_OFFSET, %eax
|
||||
jz 2f
|
||||
orl $2, %ebx
|
||||
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
|
||||
/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
|
||||
ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
|
||||
movq %rax, %cr3
|
||||
2:
|
||||
#endif
|
||||
ret
|
||||
END(paranoid_entry)
|
||||
|
||||
/*
|
||||
|
@ -1021,19 +1080,26 @@ END(paranoid_entry)
|
|||
* be complicated. Fortunately, we there's no good reason
|
||||
* to try to handle preemption here.
|
||||
*
|
||||
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
|
||||
* On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
|
||||
* ebx=1: needs neither swapgs nor SWITCH_USER_CR3
|
||||
* ebx=2: needs both swapgs and SWITCH_USER_CR3
|
||||
* ebx=3: needs SWITCH_USER_CR3 but not swapgs
|
||||
*/
|
||||
ENTRY(paranoid_exit)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz paranoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS_UNSAFE_STACK
|
||||
jmp paranoid_exit_restore
|
||||
paranoid_exit_no_swapgs:
|
||||
TRACE_IRQS_IRETQ_DEBUG
|
||||
paranoid_exit_restore:
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
|
||||
testl $2, %ebx /* SWITCH_USER_CR3 needed? */
|
||||
jz paranoid_exit_no_switch
|
||||
SWITCH_USER_CR3
|
||||
paranoid_exit_no_switch:
|
||||
#endif
|
||||
testl $1, %ebx /* swapgs needed? */
|
||||
jnz paranoid_exit_no_swapgs
|
||||
SWAPGS_UNSAFE_STACK
|
||||
paranoid_exit_no_swapgs:
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
|
@ -1048,6 +1114,13 @@ ENTRY(error_entry)
|
|||
cld
|
||||
SAVE_C_REGS 8
|
||||
SAVE_EXTRA_REGS 8
|
||||
/*
|
||||
* error_entry() always returns with a kernel gsbase and
|
||||
* CR3. We must also have a kernel CR3/gsbase before
|
||||
* calling TRACE_IRQS_*. Just unconditionally switch to
|
||||
* the kernel CR3 here.
|
||||
*/
|
||||
SWITCH_KERNEL_CR3
|
||||
xorl %ebx, %ebx
|
||||
testb $3, CS+8(%rsp)
|
||||
jz .Lerror_kernelspace
|
||||
|
@ -1210,6 +1283,10 @@ ENTRY(nmi)
|
|||
*/
|
||||
|
||||
SWAPGS_UNSAFE_STACK
|
||||
/*
|
||||
* percpu variables are mapped with user CR3, so no need
|
||||
* to switch CR3 here.
|
||||
*/
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
@ -1243,12 +1320,34 @@ ENTRY(nmi)
|
|||
|
||||
movq %rsp, %rdi
|
||||
movq $-1, %rsi
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/* Unconditionally use kernel CR3 for do_nmi() */
|
||||
/* %rax is saved above, so OK to clobber here */
|
||||
ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
|
||||
/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
|
||||
ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
|
||||
pushq %rax
|
||||
/* mask off "user" bit of pgd address and 12 PCID bits: */
|
||||
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
|
||||
movq %rax, %cr3
|
||||
2:
|
||||
#endif
|
||||
call do_nmi
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Unconditionally restore CR3. I know we return to
|
||||
* kernel code that needs user CR3, but do we ever return
|
||||
* to "user mode" where we need the kernel CR3?
|
||||
*/
|
||||
ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return back to user mode. We must *not* do the normal exit
|
||||
* work, because we don't want to enable interrupts. Fortunately,
|
||||
* do_nmi doesn't modify pt_regs.
|
||||
* work, because we don't want to enable interrupts. Do not
|
||||
* switch to user CR3: we might be going back to kernel code
|
||||
* that had a user CR3 set.
|
||||
*/
|
||||
SWAPGS
|
||||
jmp restore_c_regs_and_iret
|
||||
|
@ -1445,22 +1544,55 @@ end_repeat_nmi:
|
|||
ALLOC_PT_GPREGS_ON_STACK
|
||||
|
||||
/*
|
||||
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
|
||||
* as we should not be calling schedule in NMI context.
|
||||
* Even with normal interrupts enabled. An NMI should not be
|
||||
* setting NEED_RESCHED or anything that normal interrupts and
|
||||
* exceptions might do.
|
||||
* Use the same approach as paranoid_entry to handle SWAPGS, but
|
||||
* without CR3 handling since we do that differently in NMIs. No
|
||||
* need to use paranoid_exit as we should not be calling schedule
|
||||
* in NMI context. Even with normal interrupts enabled. An NMI
|
||||
* should not be setting NEED_RESCHED or anything that normal
|
||||
* interrupts and exceptions might do.
|
||||
*/
|
||||
call paranoid_entry
|
||||
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
cld
|
||||
SAVE_C_REGS
|
||||
SAVE_EXTRA_REGS
|
||||
movl $1, %ebx
|
||||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
testl %edx, %edx
|
||||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx, %ebx
|
||||
1:
|
||||
movq %rsp, %rdi
|
||||
movq $-1, %rsi
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/* Unconditionally use kernel CR3 for do_nmi() */
|
||||
/* %rax is saved above, so OK to clobber here */
|
||||
ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
|
||||
/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
|
||||
ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
|
||||
pushq %rax
|
||||
/* mask off "user" bit of pgd address and 12 PCID bits: */
|
||||
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
|
||||
movq %rax, %cr3
|
||||
2:
|
||||
#endif
|
||||
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
call do_nmi
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Unconditionally restore CR3. We might be returning to
|
||||
* kernel code that needs user CR3, like just just before
|
||||
* a sysret.
|
||||
*/
|
||||
ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
|
||||
#endif
|
||||
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz nmi_restore
|
||||
nmi_swapgs:
|
||||
/* We fixed up CR3 above, so no need to switch it here */
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
RESTORE_EXTRA_REGS
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#include <asm/irqflags.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/kaiser.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
|
@ -50,6 +52,7 @@ ENDPROC(native_usergs_sysret32)
|
|||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
SWAPGS_UNSAFE_STACK
|
||||
SWITCH_KERNEL_CR3_NO_STACK
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/*
|
||||
|
@ -161,6 +164,7 @@ ENDPROC(entry_SYSENTER_compat)
|
|||
ENTRY(entry_SYSCALL_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
SWAPGS_UNSAFE_STACK
|
||||
SWITCH_KERNEL_CR3_NO_STACK
|
||||
|
||||
/* Stash user ESP and switch to the kernel stack. */
|
||||
movl %esp, %r8d
|
||||
|
@ -208,6 +212,7 @@ ENTRY(entry_SYSCALL_compat)
|
|||
/* Opportunistic SYSRET */
|
||||
sysret32_from_system_call:
|
||||
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
||||
SWITCH_USER_CR3
|
||||
movq RBX(%rsp), %rbx /* pt_regs->rbx */
|
||||
movq RBP(%rsp), %rbp /* pt_regs->rbp */
|
||||
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
|
||||
|
@ -269,6 +274,7 @@ ENTRY(entry_INT80_compat)
|
|||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
ASM_CLAC /* Do this early to minimize exposure */
|
||||
SWAPGS
|
||||
SWITCH_KERNEL_CR3_NO_STACK
|
||||
|
||||
/*
|
||||
* User tracing code (ptrace or signal handlers) might assume that
|
||||
|
@ -311,6 +317,7 @@ ENTRY(entry_INT80_compat)
|
|||
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
SWITCH_USER_CR3
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
END(entry_INT80_compat)
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
KBUILD_CFLAGS += $(DISABLE_LTO)
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
VDSO64-$(CONFIG_X86_64) := y
|
||||
VDSOX32-$(CONFIG_X86_X32_ABI) := y
|
||||
VDSO32-$(CONFIG_X86_32) := y
|
||||
|
|
|
@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
extern u8 pvclock_page
|
||||
__attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_VDSO32
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
@ -62,63 +67,65 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
|||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti_base;
|
||||
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
|
||||
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
|
||||
|
||||
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
|
||||
|
||||
pvti_base = (struct pvclock_vsyscall_time_info *)
|
||||
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
|
||||
|
||||
return &pvti_base[offset];
|
||||
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti;
|
||||
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
unsigned cpu, cpu1;
|
||||
|
||||
u64 tsc, pvti_tsc;
|
||||
u64 last, delta, pvti_system_time;
|
||||
u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
|
||||
|
||||
/*
|
||||
* Note: hypervisor must guarantee that:
|
||||
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
|
||||
* 2. that per-CPU pvclock time info is updated if the
|
||||
* underlying CPU changes.
|
||||
* 3. that version is increased whenever underlying CPU
|
||||
* changes.
|
||||
* Note: The kernel and hypervisor must guarantee that cpu ID
|
||||
* number maps 1:1 to per-CPU pvclock time info.
|
||||
*
|
||||
* Because the hypervisor is entirely unaware of guest userspace
|
||||
* preemption, it cannot guarantee that per-CPU pvclock time
|
||||
* info is updated if the underlying CPU changes or that that
|
||||
* version is increased whenever underlying CPU changes.
|
||||
*
|
||||
* On KVM, we are guaranteed that pvti updates for any vCPU are
|
||||
* atomic as seen by *all* vCPUs. This is an even stronger
|
||||
* guarantee than we get with a normal seqlock.
|
||||
*
|
||||
* On Xen, we don't appear to have that guarantee, but Xen still
|
||||
* supplies a valid seqlock using the version field.
|
||||
|
||||
* We only do pvclock vdso timing at all if
|
||||
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
|
||||
* mean that all vCPUs have matching pvti and that the TSC is
|
||||
* synced, so we can just look at vCPU 0's pvti.
|
||||
*/
|
||||
do {
|
||||
cpu = __getcpu() & VGETCPU_CPU_MASK;
|
||||
/* TODO: We can put vcpu id into higher bits of pvti.version.
|
||||
* This will save a couple of cycles by getting rid of
|
||||
* __getcpu() calls (Gleb).
|
||||
*/
|
||||
|
||||
pvti = get_pvti(cpu);
|
||||
|
||||
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
|
||||
|
||||
/*
|
||||
* Test we're still on the cpu as well as the version.
|
||||
* We could have been migrated just after the first
|
||||
* vgetcpu but before fetching the version, so we
|
||||
* wouldn't notice a version change.
|
||||
*/
|
||||
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
|
||||
} while (unlikely(cpu != cpu1 ||
|
||||
(pvti->pvti.version & 1) ||
|
||||
pvti->pvti.version != version));
|
||||
|
||||
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
|
||||
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
version = pvti->version;
|
||||
|
||||
/* This is also a read barrier, so we'll read version first. */
|
||||
tsc = rdtsc_ordered();
|
||||
|
||||
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
|
||||
pvti_tsc_shift = pvti->tsc_shift;
|
||||
pvti_system_time = pvti->system_time;
|
||||
pvti_tsc = pvti->tsc_timestamp;
|
||||
|
||||
/* Make sure that the version double-check is last. */
|
||||
smp_rmb();
|
||||
} while (unlikely((version & 1) || version != pvti->version));
|
||||
|
||||
delta = tsc - pvti_tsc;
|
||||
ret = pvti_system_time +
|
||||
pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
|
||||
pvti_tsc_shift);
|
||||
|
||||
/* refer to tsc.c read_tsc() comment for rationale */
|
||||
last = gtod->cycle_last;
|
||||
|
|
|
@ -25,7 +25,7 @@ SECTIONS
|
|||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 2 * PAGE_SIZE;
|
||||
vvar_start = . - 3 * PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
|
@ -36,6 +36,7 @@ SECTIONS
|
|||
#undef EMIT_VVAR
|
||||
|
||||
hpet_page = vvar_start + PAGE_SIZE;
|
||||
pvclock_page = vvar_start + 2 * PAGE_SIZE;
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ enum {
|
|||
sym_vvar_start,
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
|
@ -80,6 +81,7 @@ enum {
|
|||
const int special_pages[] = {
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
|
@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
|
|||
[sym_vvar_start] = {"vvar_start", true},
|
||||
[sym_vvar_page] = {"vvar_page", true},
|
||||
[sym_hpet_page] = {"hpet_page", true},
|
||||
[sym_pvclock_page] = {"pvclock_page", true},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||
},
|
||||
|
|
|
@ -100,6 +100,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
};
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
|
@ -169,6 +170,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
}
|
||||
#endif
|
||||
|
||||
pvti = pvclock_pvti_cpu0_va();
|
||||
if (pvti && image->sym_pvclock_page) {
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_pvclock_page,
|
||||
__pa(pvti) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
|
|
|
@ -2,5 +2,7 @@
|
|||
#define _ASM_X86_CMDLINE_H
|
||||
|
||||
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
|
||||
int cmdline_find_option(const char *cmdline_ptr, const char *option,
|
||||
char *buffer, int bufsize);
|
||||
|
||||
#endif /* _ASM_X86_CMDLINE_H */
|
||||
|
|
|
@ -187,6 +187,7 @@
|
|||
#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
|
||||
#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
|
||||
|
@ -199,6 +200,9 @@
|
|||
#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
|
||||
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
|
||||
#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
|
|
|
@ -43,7 +43,7 @@ struct gdt_page {
|
|||
struct desc_struct gdt[GDT_ENTRIES];
|
||||
} __attribute__((aligned(PAGE_SIZE)));
|
||||
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
|
||||
|
||||
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
||||
{
|
||||
|
|
|
@ -21,11 +21,13 @@
|
|||
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
|
||||
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
|
||||
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
|
||||
# define DISABLE_PCID 0
|
||||
#else
|
||||
# define DISABLE_VME 0
|
||||
# define DISABLE_K6_MTRR 0
|
||||
# define DISABLE_CYRIX_ARR 0
|
||||
# define DISABLE_CENTAUR_MCR 0
|
||||
# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
|
@ -35,7 +37,7 @@
|
|||
#define DISABLED_MASK1 0
|
||||
#define DISABLED_MASK2 0
|
||||
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
|
||||
#define DISABLED_MASK4 0
|
||||
#define DISABLED_MASK4 (DISABLE_PCID)
|
||||
#define DISABLED_MASK5 0
|
||||
#define DISABLED_MASK6 0
|
||||
#define DISABLED_MASK7 0
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
/*
|
||||
* We map the EFI regions needed for runtime services non-contiguously,
|
||||
|
@ -67,17 +66,6 @@ extern u64 asmlinkage efi_call(void *fp, ...);
|
|||
|
||||
#define efi_call_phys(f, args...) efi_call((f), args)
|
||||
|
||||
/*
|
||||
* Scratch space used for switching the pagetable in the EFI stub
|
||||
*/
|
||||
struct efi_scratch {
|
||||
u64 r15;
|
||||
u64 prev_cr3;
|
||||
pgd_t *efi_pgt;
|
||||
bool use_pgd;
|
||||
u64 phys_stack;
|
||||
} __packed;
|
||||
|
||||
#define efi_call_virt(f, ...) \
|
||||
({ \
|
||||
efi_status_t __s; \
|
||||
|
@ -85,20 +73,7 @@ struct efi_scratch {
|
|||
efi_sync_low_kernel_mappings(); \
|
||||
preempt_disable(); \
|
||||
__kernel_fpu_begin(); \
|
||||
\
|
||||
if (efi_scratch.use_pgd) { \
|
||||
efi_scratch.prev_cr3 = read_cr3(); \
|
||||
write_cr3((unsigned long)efi_scratch.efi_pgt); \
|
||||
__flush_tlb_all(); \
|
||||
} \
|
||||
\
|
||||
__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
|
||||
\
|
||||
if (efi_scratch.use_pgd) { \
|
||||
write_cr3(efi_scratch.prev_cr3); \
|
||||
__flush_tlb_all(); \
|
||||
} \
|
||||
\
|
||||
__kernel_fpu_end(); \
|
||||
preempt_enable(); \
|
||||
__s; \
|
||||
|
@ -138,7 +113,6 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size);
|
|||
extern void __init efi_map_region(efi_memory_desc_t *md);
|
||||
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
|
||||
extern void efi_sync_low_kernel_mappings(void);
|
||||
extern int __init efi_alloc_page_tables(void);
|
||||
extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
|
||||
extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
|
||||
extern void __init old_map_region(efi_memory_desc_t *md);
|
||||
|
|
|
@ -22,12 +22,8 @@ typedef struct {
|
|||
#ifdef CONFIG_SMP
|
||||
unsigned int irq_resched_count;
|
||||
unsigned int irq_call_count;
|
||||
/*
|
||||
* irq_tlb_count is double-counted in irq_call_count, so it must be
|
||||
* subtracted from irq_call_count when displaying irq_call_count
|
||||
*/
|
||||
unsigned int irq_tlb_count;
|
||||
#endif
|
||||
unsigned int irq_tlb_count;
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
unsigned int irq_thermal_count;
|
||||
#endif
|
||||
|
|
|
@ -187,7 +187,7 @@ extern char irq_entries_start[];
|
|||
#define VECTOR_RETRIGGERED ((void *)~0UL)
|
||||
|
||||
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
|
||||
DECLARE_PER_CPU(vector_irq_t, vector_irq);
|
||||
DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
|
||||
|
||||
#endif /* !ASSEMBLY_ */
|
||||
|
||||
|
|
141
arch/x86/include/asm/kaiser.h
Normal file
141
arch/x86/include/asm/kaiser.h
Normal file
|
@ -0,0 +1,141 @@
|
|||
#ifndef _ASM_X86_KAISER_H
|
||||
#define _ASM_X86_KAISER_H
|
||||
|
||||
#include <uapi/asm/processor-flags.h> /* For PCID constants */
|
||||
|
||||
/*
|
||||
* This file includes the definitions for the KAISER feature.
|
||||
* KAISER is a counter measure against x86_64 side channel attacks on
|
||||
* the kernel virtual memory. It has a shadow pgd for every process: the
|
||||
* shadow pgd has a minimalistic kernel-set mapped, but includes the whole
|
||||
* user memory. Within a kernel context switch, or when an interrupt is handled,
|
||||
* the pgd is switched to the normal one. When the system switches to user mode,
|
||||
* the shadow pgd is enabled. By this, the virtual memory caches are freed,
|
||||
* and the user may not attack the whole kernel memory.
|
||||
*
|
||||
* A minimalistic kernel mapping holds the parts needed to be mapped in user
|
||||
* mode, such as the entry/exit functions of the user space, or the stacks.
|
||||
*/
|
||||
|
||||
#define KAISER_SHADOW_PGD_OFFSET 0x1000
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
|
||||
.macro _SWITCH_TO_KERNEL_CR3 reg
|
||||
movq %cr3, \reg
|
||||
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
|
||||
/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
|
||||
ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
|
||||
movq \reg, %cr3
|
||||
.endm
|
||||
|
||||
.macro _SWITCH_TO_USER_CR3 reg regb
|
||||
/*
|
||||
* regb must be the low byte portion of reg: because we have arranged
|
||||
* for the low byte of the user PCID to serve as the high byte of NOFLUSH
|
||||
* (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
|
||||
* not enabled): so that the one register can update both memory and cr3.
|
||||
*/
|
||||
movq %cr3, \reg
|
||||
orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
|
||||
js 9f
|
||||
/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
|
||||
movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
|
||||
9:
|
||||
movq \reg, %cr3
|
||||
.endm
|
||||
|
||||
.macro SWITCH_KERNEL_CR3
|
||||
ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
|
||||
_SWITCH_TO_KERNEL_CR3 %rax
|
||||
popq %rax
|
||||
8:
|
||||
.endm
|
||||
|
||||
.macro SWITCH_USER_CR3
|
||||
ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
|
||||
_SWITCH_TO_USER_CR3 %rax %al
|
||||
popq %rax
|
||||
8:
|
||||
.endm
|
||||
|
||||
.macro SWITCH_KERNEL_CR3_NO_STACK
|
||||
ALTERNATIVE "jmp 8f", \
|
||||
__stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
|
||||
X86_FEATURE_KAISER
|
||||
_SWITCH_TO_KERNEL_CR3 %rax
|
||||
movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
|
||||
8:
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||
|
||||
.macro SWITCH_KERNEL_CR3
|
||||
.endm
|
||||
.macro SWITCH_USER_CR3
|
||||
.endm
|
||||
.macro SWITCH_KERNEL_CR3_NO_STACK
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Upon kernel/user mode switch, it may happen that the address
|
||||
* space has to be switched before the registers have been
|
||||
* stored. To change the address space, another register is
|
||||
* needed. A register therefore has to be stored/restored.
|
||||
*/
|
||||
DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
|
||||
|
||||
DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
|
||||
|
||||
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
|
||||
|
||||
extern int kaiser_enabled;
|
||||
extern void __init kaiser_check_boottime_disable(void);
|
||||
#else
|
||||
#define kaiser_enabled 0
|
||||
static inline void __init kaiser_check_boottime_disable(void) {}
|
||||
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||
|
||||
/*
|
||||
* Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
|
||||
* so as to build with tests on kaiser_enabled instead of #ifdefs.
|
||||
*/
|
||||
|
||||
/**
|
||||
* kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
|
||||
* @addr: the start address of the range
|
||||
* @size: the size of the range
|
||||
* @flags: The mapping flags of the pages
|
||||
*
|
||||
* The mapping is done on a global scope, so no bigger
|
||||
* synchronization has to be done. the pages have to be
|
||||
* manually unmapped again when they are not needed any longer.
|
||||
*/
|
||||
extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
|
||||
|
||||
/**
|
||||
* kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
|
||||
* @addr: the start address of the range
|
||||
* @size: the size of the range
|
||||
*/
|
||||
extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
|
||||
|
||||
/**
|
||||
* kaiser_init - Initialize the shadow mapping
|
||||
*
|
||||
* Most parts of the shadow mapping can be mapped upon boot
|
||||
* time. Only per-process things like the thread stacks
|
||||
* or a new LDT have to be mapped at runtime. These boot-
|
||||
* time mappings are permanent and never unmapped.
|
||||
*/
|
||||
extern void kaiser_init(void);
|
||||
|
||||
#endif /* __ASSEMBLY */
|
||||
|
||||
#endif /* _ASM_X86_KAISER_H */
|
|
@ -24,12 +24,6 @@ typedef struct {
|
|||
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
|
||||
} mm_context_t;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
void leave_mm(int cpu);
|
||||
#else
|
||||
static inline void leave_mm(int cpu)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_MMU_H */
|
||||
|
|
|
@ -98,109 +98,16 @@ static inline void load_mm_ldt(struct mm_struct *mm)
|
|||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
unsigned cpu = smp_processor_id();
|
||||
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk);
|
||||
|
||||
if (likely(prev != next)) {
|
||||
#ifdef CONFIG_SMP
|
||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
this_cpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/*
|
||||
* Re-load page tables.
|
||||
*
|
||||
* This logic has an ordering constraint:
|
||||
*
|
||||
* CPU 0: Write to a PTE for 'next'
|
||||
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||
* CPU 1: set bit 1 in next's mm_cpumask
|
||||
* CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||
*
|
||||
* We need to prevent an outcome in which CPU 1 observes
|
||||
* the new PTE value and CPU 0 observes bit 1 clear in
|
||||
* mm_cpumask. (If that occurs, then the IPI will never
|
||||
* be sent, and CPU 0's TLB will contain a stale entry.)
|
||||
*
|
||||
* The bad outcome can occur if either CPU's load is
|
||||
* reordered before that CPU's store, so both CPUs must
|
||||
* execute full barriers to prevent this from happening.
|
||||
*
|
||||
* Thus, switch_mm needs a full barrier between the
|
||||
* store to mm_cpumask and any operation that could load
|
||||
* from next->pgd. TLB fills are special and can happen
|
||||
* due to instruction fetches or for no reason at all,
|
||||
* and neither LOCK nor MFENCE orders them.
|
||||
* Fortunately, load_cr3() is serializing and gives the
|
||||
* ordering guarantee we need.
|
||||
*
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
|
||||
/* Stop flush ipis for the previous mm */
|
||||
cpumask_clear_cpu(cpu, mm_cpumask(prev));
|
||||
|
||||
/* Load per-mm CR4 state */
|
||||
load_mm_cr4(next);
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* Load the LDT, if the LDT is different.
|
||||
*
|
||||
* It's possible that prev->context.ldt doesn't match
|
||||
* the LDT register. This can happen if leave_mm(prev)
|
||||
* was called and then modify_ldt changed
|
||||
* prev->context.ldt but suppressed an IPI to this CPU.
|
||||
* In this case, prev->context.ldt != NULL, because we
|
||||
* never set context.ldt to NULL while the mm still
|
||||
* exists. That means that next->context.ldt !=
|
||||
* prev->context.ldt, because mms never share an LDT.
|
||||
*/
|
||||
if (unlikely(prev->context.ldt != next->context.ldt))
|
||||
load_mm_ldt(next);
|
||||
#endif
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
else {
|
||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||
/*
|
||||
* On established mms, the mm_cpumask is only changed
|
||||
* from irq context, from ptep_clear_flush() while in
|
||||
* lazy tlb mode, and here. Irqs are blocked during
|
||||
* schedule, protecting us from simultaneous changes.
|
||||
*/
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/*
|
||||
* We were in lazy tlb mode and leave_mm disabled
|
||||
* tlb flush IPI delivery. We must reload CR3
|
||||
* to make sure to use no freed page tables.
|
||||
*
|
||||
* As above, load_cr3() is serializing and orders TLB
|
||||
* fills with respect to the mm_cpumask write.
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
load_mm_cr4(next);
|
||||
load_mm_ldt(next);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk);
|
||||
#define switch_mm_irqs_off switch_mm_irqs_off
|
||||
|
||||
#define activate_mm(prev, next) \
|
||||
do { \
|
||||
|
|
|
@ -18,6 +18,12 @@
|
|||
#ifndef __ASSEMBLY__
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
extern int kaiser_enabled;
|
||||
#else
|
||||
#define kaiser_enabled 0
|
||||
#endif
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
|
||||
void ptdump_walk_pgd_level_checkwx(void);
|
||||
|
||||
|
@ -653,7 +659,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
|
|||
|
||||
static inline int pgd_bad(pgd_t pgd)
|
||||
{
|
||||
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
pgdval_t ignore_flags = _PAGE_USER;
|
||||
/*
|
||||
* We set NX on KAISER pgds that map userspace memory so
|
||||
* that userspace can not meaningfully use the kernel
|
||||
* page table by accident; it will fault on the first
|
||||
* instruction it tries to run. See native_set_pgd().
|
||||
*/
|
||||
if (kaiser_enabled)
|
||||
ignore_flags |= _PAGE_NX;
|
||||
|
||||
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
|
||||
}
|
||||
|
||||
static inline int pgd_none(pgd_t pgd)
|
||||
|
@ -855,7 +871,15 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
|
|||
*/
|
||||
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
|
||||
{
|
||||
memcpy(dst, src, count * sizeof(pgd_t));
|
||||
memcpy(dst, src, count * sizeof(pgd_t));
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
if (kaiser_enabled) {
|
||||
/* Clone the shadow pgd part as well */
|
||||
memcpy(native_get_shadow_pgd(dst),
|
||||
native_get_shadow_pgd(src),
|
||||
count * sizeof(pgd_t));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
|
||||
|
|
|
@ -106,9 +106,32 @@ static inline void native_pud_clear(pud_t *pud)
|
|||
native_set_pud(pud, native_make_pud(0));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||
|
||||
static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
/* linux/mmdebug.h may not have been included at this point */
|
||||
BUG_ON(!kaiser_enabled);
|
||||
#endif
|
||||
return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
|
||||
}
|
||||
#else
|
||||
static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
return pgd;
|
||||
}
|
||||
static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
|
||||
{
|
||||
BUILD_BUG_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||
|
||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
*pgdp = pgd;
|
||||
*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
|
||||
}
|
||||
|
||||
static inline void native_pgd_clear(pgd_t *pgd)
|
||||
|
|
|
@ -89,7 +89,7 @@
|
|||
#define _PAGE_NX (_AT(pteval_t, 0))
|
||||
#endif
|
||||
|
||||
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
||||
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
||||
|
||||
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
||||
_PAGE_ACCESSED | _PAGE_DIRTY)
|
||||
|
@ -102,6 +102,33 @@
|
|||
_PAGE_SOFT_DIRTY)
|
||||
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
|
||||
|
||||
/* The ASID is the lower 12 bits of CR3 */
|
||||
#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
|
||||
|
||||
/* Mask for all the PCID-related bits in CR3: */
|
||||
#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
|
||||
#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
|
||||
|
||||
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
|
||||
/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
|
||||
#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
|
||||
|
||||
#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
|
||||
#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
|
||||
#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
|
||||
#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
|
||||
#else
|
||||
#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
|
||||
/*
|
||||
* PCIDs are unsupported on 32-bit and none of these bits can be
|
||||
* set in CR3:
|
||||
*/
|
||||
#define X86_CR3_PCID_KERN_FLUSH (0)
|
||||
#define X86_CR3_PCID_USER_FLUSH (0)
|
||||
#define X86_CR3_PCID_KERN_NOFLUSH (0)
|
||||
#define X86_CR3_PCID_USER_NOFLUSH (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The cache modes defined here are used to translate between pure SW usage
|
||||
* and the HW defined cache mode bits and/or PAT entries.
|
||||
|
|
|
@ -305,7 +305,7 @@ struct tss_struct {
|
|||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
|
|
|
@ -4,6 +4,15 @@
|
|||
#include <linux/clocksource.h>
|
||||
#include <asm/pvclock-abi.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
|
||||
#else
|
||||
static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* some helper functions for xen and kvm pv clock sources */
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
|
||||
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
|
||||
|
|
|
@ -6,6 +6,55 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/special_insns.h>
|
||||
#include <asm/smp.h>
|
||||
|
||||
static inline void __invpcid(unsigned long pcid, unsigned long addr,
|
||||
unsigned long type)
|
||||
{
|
||||
struct { u64 d[2]; } desc = { { pcid, addr } };
|
||||
|
||||
/*
|
||||
* The memory clobber is because the whole point is to invalidate
|
||||
* stale TLB entries and, especially if we're flushing global
|
||||
* mappings, we don't want the compiler to reorder any subsequent
|
||||
* memory accesses before the TLB flush.
|
||||
*
|
||||
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
|
||||
* invpcid (%rcx), %rax in long mode.
|
||||
*/
|
||||
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
|
||||
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
|
||||
}
|
||||
|
||||
#define INVPCID_TYPE_INDIV_ADDR 0
|
||||
#define INVPCID_TYPE_SINGLE_CTXT 1
|
||||
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
|
||||
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
|
||||
|
||||
/* Flush all mappings for a given pcid and addr, not including globals. */
|
||||
static inline void invpcid_flush_one(unsigned long pcid,
|
||||
unsigned long addr)
|
||||
{
|
||||
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
|
||||
}
|
||||
|
||||
/* Flush all mappings for a given PCID, not including globals. */
|
||||
static inline void invpcid_flush_single_context(unsigned long pcid)
|
||||
{
|
||||
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
|
||||
}
|
||||
|
||||
/* Flush all mappings, including globals, for all PCIDs. */
|
||||
static inline void invpcid_flush_all(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
|
||||
}
|
||||
|
||||
/* Flush all mappings for all PCIDs except globals. */
|
||||
static inline void invpcid_flush_all_nonglobals(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
|
@ -16,10 +65,8 @@
|
|||
#endif
|
||||
|
||||
struct tlb_state {
|
||||
#ifdef CONFIG_SMP
|
||||
struct mm_struct *active_mm;
|
||||
int state;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
|
@ -84,6 +131,24 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
|
|||
cr4_set_bits(mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Declare a couple of kaiser interfaces here for convenience,
|
||||
* to avoid the need for asm/kaiser.h in unexpected places.
|
||||
*/
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
extern int kaiser_enabled;
|
||||
extern void kaiser_setup_pcid(void);
|
||||
extern void kaiser_flush_tlb_on_return_to_user(void);
|
||||
#else
|
||||
#define kaiser_enabled 0
|
||||
static inline void kaiser_setup_pcid(void)
|
||||
{
|
||||
}
|
||||
static inline void kaiser_flush_tlb_on_return_to_user(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void __native_flush_tlb(void)
|
||||
{
|
||||
/*
|
||||
|
@ -92,6 +157,8 @@ static inline void __native_flush_tlb(void)
|
|||
* back:
|
||||
*/
|
||||
preempt_disable();
|
||||
if (kaiser_enabled)
|
||||
kaiser_flush_tlb_on_return_to_user();
|
||||
native_write_cr3(native_read_cr3());
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -101,39 +168,84 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
|
|||
unsigned long cr4;
|
||||
|
||||
cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
/* clear PGE */
|
||||
native_write_cr4(cr4 & ~X86_CR4_PGE);
|
||||
/* write old PGE again and flush TLBs */
|
||||
native_write_cr4(cr4);
|
||||
if (cr4 & X86_CR4_PGE) {
|
||||
/* clear PGE and flush TLB of all entries */
|
||||
native_write_cr4(cr4 & ~X86_CR4_PGE);
|
||||
/* restore PGE as it was before */
|
||||
native_write_cr4(cr4);
|
||||
} else {
|
||||
/* do it with cr3, letting kaiser flush user PCID */
|
||||
__native_flush_tlb();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __native_flush_tlb_global(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_INVPCID)) {
|
||||
/*
|
||||
* Using INVPCID is considerably faster than a pair of writes
|
||||
* to CR4 sandwiched inside an IRQ flag save/restore.
|
||||
*
|
||||
* Note, this works with CR4.PCIDE=0 or 1.
|
||||
*/
|
||||
invpcid_flush_all();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read-modify-write to CR4 - protect it from preemption and
|
||||
* from interrupts. (Use the raw variant because this code can
|
||||
* be called from deep inside debugging code.)
|
||||
*/
|
||||
raw_local_irq_save(flags);
|
||||
|
||||
__native_flush_tlb_global_irq_disabled();
|
||||
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static inline void __native_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
|
||||
/*
|
||||
* SIMICS #GP's if you run INVPCID with type 2/3
|
||||
* and X86_CR4_PCIDE clear. Shame!
|
||||
*
|
||||
* The ASIDs used below are hard-coded. But, we must not
|
||||
* call invpcid(type=1/2) before CR4.PCIDE=1. Just call
|
||||
* invlpg in the case we are called early.
|
||||
*/
|
||||
|
||||
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
|
||||
if (kaiser_enabled)
|
||||
kaiser_flush_tlb_on_return_to_user();
|
||||
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
|
||||
return;
|
||||
}
|
||||
/* Flush the address out of both PCIDs. */
|
||||
/*
|
||||
* An optimization here might be to determine addresses
|
||||
* that are only kernel-mapped and only flush the kernel
|
||||
* ASID. But, userspace flushes are probably much more
|
||||
* important performance-wise.
|
||||
*
|
||||
* Make sure to do only a single invpcid when KAISER is
|
||||
* disabled and we have only a single ASID.
|
||||
*/
|
||||
if (kaiser_enabled)
|
||||
invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
|
||||
invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
|
||||
}
|
||||
|
||||
static inline void __flush_tlb_all(void)
|
||||
{
|
||||
if (cpu_has_pge)
|
||||
__flush_tlb_global();
|
||||
else
|
||||
__flush_tlb();
|
||||
__flush_tlb_global();
|
||||
/*
|
||||
* Note: if we somehow had PCID but not PGE, then this wouldn't work --
|
||||
* we'd end up flushing kernel translations for the current ASID but
|
||||
* we might fail to flush kernel translations for other cached ASIDs.
|
||||
*
|
||||
* To avoid this issue, we force PCID off if PGE is off.
|
||||
*/
|
||||
}
|
||||
|
||||
static inline void __flush_tlb_one(unsigned long addr)
|
||||
|
@ -147,7 +259,6 @@ static inline void __flush_tlb_one(unsigned long addr)
|
|||
/*
|
||||
* TLB flushing:
|
||||
*
|
||||
* - flush_tlb() flushes the current mm struct TLBs
|
||||
* - flush_tlb_all() flushes all processes TLBs
|
||||
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
|
||||
* - flush_tlb_page(vma, vmaddr) flushes one page
|
||||
|
@ -159,84 +270,6 @@ static inline void __flush_tlb_one(unsigned long addr)
|
|||
* and page-granular flushes are available only on i486 and up.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
|
||||
/* "_up" is for UniProcessor.
|
||||
*
|
||||
* This is a helper for other header functions. *Not* intended to be called
|
||||
* directly. All global TLB flushes need to either call this, or to bump the
|
||||
* vm statistics themselves.
|
||||
*/
|
||||
static inline void __flush_tlb_up(void)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb();
|
||||
}
|
||||
|
||||
static inline void flush_tlb_all(void)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb_all();
|
||||
}
|
||||
|
||||
static inline void flush_tlb(void)
|
||||
{
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void local_flush_tlb(void)
|
||||
{
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
if (mm == current->active_mm)
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void flush_tlb_page(struct vm_area_struct *vma,
|
||||
unsigned long addr)
|
||||
{
|
||||
if (vma->vm_mm == current->active_mm)
|
||||
__flush_tlb_one(addr);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
if (vma->vm_mm == current->active_mm)
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void flush_tlb_mm_range(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end, unsigned long vmflag)
|
||||
{
|
||||
if (mm == current->active_mm)
|
||||
__flush_tlb_up();
|
||||
}
|
||||
|
||||
static inline void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void reset_lazy_tlbstate(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void flush_tlb_kernel_range(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
flush_tlb_all();
|
||||
}
|
||||
|
||||
#else /* SMP */
|
||||
|
||||
#include <asm/smp.h>
|
||||
|
||||
#define local_flush_tlb() __flush_tlb()
|
||||
|
||||
#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
|
||||
|
@ -245,13 +278,14 @@ static inline void flush_tlb_kernel_range(unsigned long start,
|
|||
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
|
||||
|
||||
extern void flush_tlb_all(void);
|
||||
extern void flush_tlb_current_task(void);
|
||||
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
|
||||
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, unsigned long vmflag);
|
||||
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
|
||||
|
||||
#define flush_tlb() flush_tlb_current_task()
|
||||
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
|
||||
{
|
||||
flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
|
||||
}
|
||||
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm,
|
||||
|
@ -266,14 +300,6 @@ static inline void reset_lazy_tlbstate(void)
|
|||
this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
|
||||
}
|
||||
|
||||
#endif /* SMP */
|
||||
|
||||
/* Not inlined due to inc_irq_stat not being defined yet */
|
||||
#define flush_tlb_local() { \
|
||||
inc_irq_stat(irq_tlb_count); \
|
||||
local_flush_tlb(); \
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#define flush_tlb_others(mask, mm, start, end) \
|
||||
native_flush_tlb_others(mask, mm, start, end)
|
||||
|
|
|
@ -22,6 +22,7 @@ struct vdso_image {
|
|||
|
||||
long sym_vvar_page;
|
||||
long sym_hpet_page;
|
||||
long sym_pvclock_page;
|
||||
long sym_VDSO32_NOTE_MASK;
|
||||
long sym___kernel_sigreturn;
|
||||
long sym___kernel_rt_sigreturn;
|
||||
|
|
|
@ -77,7 +77,8 @@
|
|||
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
|
||||
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
|
||||
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
|
||||
#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
|
||||
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
|
||||
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
|
||||
|
||||
/*
|
||||
* Intel CPU features in CR4
|
||||
|
|
|
@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg
|
|||
CFLAGS_REMOVE_early_printk.o = -pg
|
||||
endif
|
||||
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
|
||||
# If instrumentation of this dir is enabled, boot hangs during first second.
|
||||
# Probably could be more selective here, but note that files related to irqs,
|
||||
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
|
||||
# non-deterministic coverage.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
CFLAGS_irq.o := -I$(src)/../include/asm/trace
|
||||
|
||||
|
|
|
@ -107,6 +107,15 @@ ENTRY(do_suspend_lowlevel)
|
|||
movq pt_regs_r14(%rax), %r14
|
||||
movq pt_regs_r15(%rax), %r15
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
/*
|
||||
* The suspend path may have poisoned some areas deeper in the stack,
|
||||
* which we now need to unpoison.
|
||||
*/
|
||||
movq %rsp, %rdi
|
||||
call kasan_unpoison_task_stack_below
|
||||
#endif
|
||||
|
||||
xorl %eax, %eax
|
||||
addq $8, %rsp
|
||||
jmp restore_processor_state
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
# Makefile for local APIC drivers and for the IO-APIC code
|
||||
#
|
||||
|
||||
# Leads to non-deterministic coverage that is not a function of syscall inputs.
|
||||
# In particualr, smp_apic_timer_interrupt() is called in random places.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o
|
||||
obj-y += hw_nmi.o
|
||||
|
||||
|
|
|
@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg
|
|||
CFLAGS_REMOVE_perf_event.o = -pg
|
||||
endif
|
||||
|
||||
# If these files are instrumented, boot hangs during the first second.
|
||||
KCOV_INSTRUMENT_common.o := n
|
||||
KCOV_INSTRUMENT_perf_event.o := n
|
||||
|
||||
# Make sure load_percpu_segment has no stackprotector
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_common.o := $(nostackp)
|
||||
|
|
|
@ -19,6 +19,14 @@
|
|||
|
||||
void __init check_bugs(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Regardless of whether PCID is enumerated, the SDM says
|
||||
* that it can't be enabled in 32-bit mode.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
#endif
|
||||
|
||||
identify_boot_cpu();
|
||||
#ifndef CONFIG_SMP
|
||||
pr_info("CPU: ");
|
||||
|
|
|
@ -92,7 +92,7 @@ static const struct cpu_dev default_cpu = {
|
|||
|
||||
static const struct cpu_dev *this_cpu = &default_cpu;
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* We need valid kernel segments for data and code in long mode too
|
||||
|
@ -162,6 +162,40 @@ static int __init x86_mpx_setup(char *s)
|
|||
}
|
||||
__setup("nompx", x86_mpx_setup);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int __init x86_pcid_setup(char *s)
|
||||
{
|
||||
/* require an exact match without trailing characters */
|
||||
if (strlen(s))
|
||||
return 0;
|
||||
|
||||
/* do not emit a message if the feature is not present */
|
||||
if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
return 1;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
pr_info("nopcid: PCID feature disabled\n");
|
||||
return 1;
|
||||
}
|
||||
__setup("nopcid", x86_pcid_setup);
|
||||
#endif
|
||||
|
||||
static int __init x86_noinvpcid_setup(char *s)
|
||||
{
|
||||
/* noinvpcid doesn't accept parameters */
|
||||
if (s)
|
||||
return -EINVAL;
|
||||
|
||||
/* do not emit a message if the feature is not present */
|
||||
if (!boot_cpu_has(X86_FEATURE_INVPCID))
|
||||
return 0;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_INVPCID);
|
||||
pr_info("noinvpcid: INVPCID feature disabled\n");
|
||||
return 0;
|
||||
}
|
||||
early_param("noinvpcid", x86_noinvpcid_setup);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static int cachesize_override = -1;
|
||||
static int disable_x86_serial_nr = 1;
|
||||
|
@ -287,6 +321,39 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
|||
}
|
||||
}
|
||||
|
||||
static void setup_pcid(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (cpu_has(c, X86_FEATURE_PCID)) {
|
||||
if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
|
||||
cr4_set_bits(X86_CR4_PCIDE);
|
||||
/*
|
||||
* INVPCID has two "groups" of types:
|
||||
* 1/2: Invalidate an individual address
|
||||
* 3/4: Invalidate all contexts
|
||||
*
|
||||
* 1/2 take a PCID, but 3/4 do not. So, 3/4
|
||||
* ignore the PCID argument in the descriptor.
|
||||
* But, we have to be careful not to call 1/2
|
||||
* with an actual non-zero PCID in them before
|
||||
* we do the above cr4_set_bits().
|
||||
*/
|
||||
if (cpu_has(c, X86_FEATURE_INVPCID))
|
||||
set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
|
||||
} else {
|
||||
/*
|
||||
* flush_tlb_all(), as currently implemented, won't
|
||||
* work if PCID is on but PGE is not. Since that
|
||||
* combination doesn't exist on real hardware, there's
|
||||
* no reason to try to fully support it, but it's
|
||||
* polite to avoid corrupting data if we're on
|
||||
* an improperly configured VM.
|
||||
*/
|
||||
clear_cpu_cap(c, X86_FEATURE_PCID);
|
||||
}
|
||||
}
|
||||
kaiser_setup_pcid();
|
||||
}
|
||||
|
||||
/*
|
||||
* Some CPU features depend on higher CPUID levels, which may not always
|
||||
* be available due to CPUID level capping or broken virtualization
|
||||
|
@ -918,6 +985,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
|||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
/* Set up PCID */
|
||||
setup_pcid(c);
|
||||
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
|
@ -1173,7 +1243,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
|
||||
/* May not be marked __init: used by software suspend */
|
||||
|
@ -1336,6 +1406,14 @@ void cpu_init(void)
|
|||
* try to read it.
|
||||
*/
|
||||
cr4_init_shadow();
|
||||
if (!kaiser_enabled) {
|
||||
/*
|
||||
* secondary_startup_64() deferred setting PGE in cr4:
|
||||
* probe_page_size_mask() sets it on the boot cpu,
|
||||
* but it needs to be set on each secondary cpu.
|
||||
*/
|
||||
cr4_set_bits(X86_CR4_PGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Load microcode on this cpu if a valid microcode is available.
|
||||
|
|
|
@ -2,11 +2,15 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/kaiser.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
|
||||
|
||||
/* The size of a BTS record in bytes: */
|
||||
#define BTS_RECORD_SIZE 24
|
||||
|
||||
|
@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)
|
|||
|
||||
static DEFINE_PER_CPU(void *, insn_buffer);
|
||||
|
||||
static void *dsalloc(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
unsigned int order = get_order(size);
|
||||
struct page *page;
|
||||
unsigned long addr;
|
||||
|
||||
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
|
||||
if (!page)
|
||||
return NULL;
|
||||
addr = (unsigned long)page_address(page);
|
||||
if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
|
||||
__free_pages(page, order);
|
||||
addr = 0;
|
||||
}
|
||||
return (void *)addr;
|
||||
#else
|
||||
return kmalloc_node(size, flags | __GFP_ZERO, node);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void dsfree(const void *buffer, size_t size)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
if (!buffer)
|
||||
return;
|
||||
kaiser_remove_mapping((unsigned long)buffer, size);
|
||||
free_pages((unsigned long)buffer, get_order(size));
|
||||
#else
|
||||
kfree(buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int alloc_pebs_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
|
@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)
|
|||
if (!x86_pmu.pebs)
|
||||
return 0;
|
||||
|
||||
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
|
||||
buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
|
||||
if (unlikely(!buffer))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)
|
|||
if (x86_pmu.intel_cap.pebs_format < 2) {
|
||||
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
|
||||
if (!ibuffer) {
|
||||
kfree(buffer);
|
||||
dsfree(buffer, x86_pmu.pebs_buffer_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
per_cpu(insn_buffer, cpu) = ibuffer;
|
||||
|
@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)
|
|||
kfree(per_cpu(insn_buffer, cpu));
|
||||
per_cpu(insn_buffer, cpu) = NULL;
|
||||
|
||||
kfree((void *)(unsigned long)ds->pebs_buffer_base);
|
||||
dsfree((void *)(unsigned long)ds->pebs_buffer_base,
|
||||
x86_pmu.pebs_buffer_size);
|
||||
ds->pebs_buffer_base = 0;
|
||||
}
|
||||
|
||||
|
@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)
|
|||
if (!x86_pmu.bts)
|
||||
return 0;
|
||||
|
||||
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
|
||||
buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
|
||||
if (unlikely(!buffer)) {
|
||||
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
|
||||
return -ENOMEM;
|
||||
|
@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)
|
|||
if (!ds || !x86_pmu.bts)
|
||||
return;
|
||||
|
||||
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
||||
dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
|
||||
ds->bts_buffer_base = 0;
|
||||
}
|
||||
|
||||
static int alloc_ds_buffer(int cpu)
|
||||
{
|
||||
int node = cpu_to_node(cpu);
|
||||
struct debug_store *ds;
|
||||
|
||||
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
|
||||
if (unlikely(!ds))
|
||||
return -ENOMEM;
|
||||
struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
|
||||
|
||||
memset(ds, 0, sizeof(*ds));
|
||||
per_cpu(cpu_hw_events, cpu).ds = ds;
|
||||
|
||||
return 0;
|
||||
|
@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)
|
|||
return;
|
||||
|
||||
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
||||
kfree(ds);
|
||||
}
|
||||
|
||||
void release_ds_buffers(void)
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <asm/pgalloc.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/espfix.h>
|
||||
#include <asm/kaiser.h>
|
||||
|
||||
/*
|
||||
* Note: we only need 6*8 = 48 bytes for the espfix stack, but round
|
||||
|
@ -126,6 +127,15 @@ void __init init_espfix_bsp(void)
|
|||
/* Install the espfix pud into the kernel page directory */
|
||||
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
|
||||
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
|
||||
/*
|
||||
* Just copy the top-level PGD that is mapping the espfix
|
||||
* area to ensure it is mapped into the shadow user page
|
||||
* tables.
|
||||
*/
|
||||
if (kaiser_enabled) {
|
||||
set_pgd(native_get_shadow_pgd(pgd_p),
|
||||
__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
|
||||
}
|
||||
|
||||
/* Randomize the locations */
|
||||
init_espfix_random();
|
||||
|
|
|
@ -183,8 +183,8 @@ ENTRY(secondary_startup_64)
|
|||
movq $(init_level4_pgt - __START_KERNEL_map), %rax
|
||||
1:
|
||||
|
||||
/* Enable PAE mode and PGE */
|
||||
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
|
||||
/* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
|
||||
movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
|
||||
movq %rcx, %cr4
|
||||
|
||||
/* Setup early boot stage 4 level pagetables. */
|
||||
|
@ -441,6 +441,27 @@ early_idt_ripmsg:
|
|||
.balign PAGE_SIZE; \
|
||||
GLOBAL(name)
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Each PGD needs to be 8k long and 8k aligned. We do not
|
||||
* ever go out to userspace with these, so we do not
|
||||
* strictly *need* the second page, but this allows us to
|
||||
* have a single set_pgd() implementation that does not
|
||||
* need to worry about whether it has 4k or 8k to work
|
||||
* with.
|
||||
*
|
||||
* This ensures PGDs are 8k long:
|
||||
*/
|
||||
#define KAISER_USER_PGD_FILL 512
|
||||
/* This ensures they are 8k-aligned: */
|
||||
#define NEXT_PGD_PAGE(name) \
|
||||
.balign 2 * PAGE_SIZE; \
|
||||
GLOBAL(name)
|
||||
#else
|
||||
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
|
||||
#define KAISER_USER_PGD_FILL 0
|
||||
#endif
|
||||
|
||||
/* Automate the creation of 1 to 1 mapping pmd entries */
|
||||
#define PMDS(START, PERM, COUNT) \
|
||||
i = 0 ; \
|
||||
|
@ -450,9 +471,10 @@ GLOBAL(name)
|
|||
.endr
|
||||
|
||||
__INITDATA
|
||||
NEXT_PAGE(early_level4_pgt)
|
||||
NEXT_PGD_PAGE(early_level4_pgt)
|
||||
.fill 511,8,0
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
.fill KAISER_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(early_dynamic_pgts)
|
||||
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
||||
|
@ -460,16 +482,18 @@ NEXT_PAGE(early_dynamic_pgts)
|
|||
.data
|
||||
|
||||
#ifndef CONFIG_XEN
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
NEXT_PGD_PAGE(init_level4_pgt)
|
||||
.fill 512,8,0
|
||||
.fill KAISER_USER_PGD_FILL,8,0
|
||||
#else
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
NEXT_PGD_PAGE(init_level4_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
.fill KAISER_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(level3_ident_pgt)
|
||||
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
|
@ -480,6 +504,7 @@ NEXT_PAGE(level2_ident_pgt)
|
|||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
#endif
|
||||
.fill KAISER_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(level3_kernel_pgt)
|
||||
.fill L3_START_KERNEL,8,0
|
||||
|
|
|
@ -353,7 +353,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
|
|||
|
||||
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
|
||||
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
|
||||
disable_irq(hdev->irq);
|
||||
disable_hardirq(hdev->irq);
|
||||
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
|
||||
enable_irq(hdev->irq);
|
||||
}
|
||||
|
|
|
@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
|||
seq_puts(p, " Rescheduling interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "CAL");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
|
||||
irq_stats(j)->irq_tlb_count);
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
||||
seq_puts(p, " Function call interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "TLB");
|
||||
for_each_online_cpu(j)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue