Merge android-4.4.144 (4b2d6ba) into msm-4.4

* refs/heads/tmp-4b2d6ba
  Linux 4.4.144
  ubi: fastmap: Erase outdated anchor PEBs during attach
  ubi: Fix Fastmap's update_vol()
  ubi: Fix races around ubi_refill_pools()
  ubi: Be more paranoid while seaching for the most recent Fastmap
  ubi: Rework Fastmap attach base code
  ubi: Introduce vol_ignored()
  clk: tegra: Fix PLL_U post divider and initial rate on Tegra30
  block: do not use interruptible wait anywhere
  x86/cpu: Re-apply forced caps every time CPU caps are re-read
  x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths
  x86/bugs: Rename SSBD_NO to SSB_NO
  x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG
  x86/bugs: Rework spec_ctrl base and mask logic
  x86/bugs: Remove x86_spec_ctrl_set()
  x86/bugs: Expose x86_spec_ctrl_base directly
  x86/bugs: Unify x86_spec_ctrl_{set_guest, restore_host}
  x86/speculation: Rework speculative_store_bypass_update()
  x86/speculation: Add virtualized speculative store bypass disable support
  x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL
  x86/speculation: Handle HT correctly on AMD
  x86/cpufeatures: Add FEATURE_ZEN
  x86/cpu/AMD: Fix erratum 1076 (CPB bit)
  x86/cpufeatures: Disentangle SSBD enumeration
  x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS
  x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP
  x86/cpu: Make alternative_msr_write work for 32-bit code
  x86/bugs: Fix the parameters alignment and missing void
  x86/bugs: Make cpu_show_common() static
  x86/bugs: Fix __ssb_select_mitigation() return type
  Documentation/spec_ctrl: Do some minor cleanups
  proc: Use underscores for SSBD in 'status'
  x86/bugs: Rename _RDS to _SSBD
  x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass
  seccomp: Move speculation migitation control to arch code
  seccomp: Add filter flag to opt-out of SSB mitigation
  seccomp: Use PR_SPEC_FORCE_DISABLE
  prctl: Add force disable speculation
  seccomp: Enable speculation flaw mitigations
  proc: Provide details on speculation flaw mitigations
  nospec: Allow getting/setting on non-current task
  x86/speculation: Add prctl for Speculative Store Bypass mitigation
  x86/process: Allow runtime control of Speculative Store Bypass
  x86/process: Optimize TIF_NOTSC switch
  x86/process: Correct and optimize TIF_BLOCKSTEP switch
  x86/process: Optimize TIF checks in __switch_to_xtra()
  prctl: Add speculation control prctls
  x86/speculation: Create spec-ctrl.h to avoid include hell
  x86/bugs/AMD: Add support to disable RDS on Fam[15, 16, 17]h if requested
  x86/bugs: Whitelist allowed SPEC_CTRL MSR values
  x86/bugs/intel: Set proper CPU features and setup RDS
  x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation
  x86/cpufeatures: Add X86_FEATURE_RDS
  x86/bugs: Expose /sys/../spec_store_bypass
  x86/cpu/intel: Add Knights Mill to Intel family
  x86/cpu: Rename Merrifield2 to Moorefield
  x86/bugs, KVM: Support the combination of guest and host IBRS
  x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits
  x86/bugs: Concentrate bug reporting into a separate function
  x86/bugs: Concentrate bug detection into a separate function
  x86/nospec: Simplify alternative_msr_write()
  x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen
  xen: set cpu capabilities from xen_start_kernel()
  selftest/seccomp: Fix the seccomp(2) signature
  selftest/seccomp: Fix the flag name SECCOMP_FILTER_FLAG_TSYNC
  x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist
  x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP
  x86/speculation: Use IBRS if available before calling into firmware
  x86/spectre_v2: Don't check microcode versions when running under hypervisors
  x86/speculation: Use Indirect Branch Prediction Barrier in context switch
  x86/mm: Give each mm TLB flush generation a unique ID
  x86/mm: Factor out LDT init from context init
  x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend
  x86/speculation: Add <asm/msr-index.h> dependency
  x86/speculation: Fix up array_index_nospec_mask() asm constraint
  x86/speculation: Clean up various Spectre related details
  x86/speculation: Correct Speculation Control microcode blacklist again
  x86/speculation: Update Speculation Control microcode blacklist
  x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface
  x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs
  x86/pti: Mark constant arrays as __initconst
  x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel
  x86/cpufeatures: Clean up Spectre v2 related CPUID flags
  x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support
  x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes
  x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown
  x86/msr: Add definitions for new speculation control MSRs
  x86/cpufeatures: Add AMD feature bits for Speculation Control
  x86/cpufeatures: Add Intel feature bits for Speculation Control
  x86/cpufeatures: Add CPUID_7_EDX CPUID leaf
  x86/paravirt: Make native_save_fl() extern inline
  xhci: Fix perceived dead host due to runtime suspend race with event handler
  skbuff: Unconditionally copy pfmemalloc in __skb_clone()
  net: Don't copy pfmemalloc flag in __copy_skb_header()
  tg3: Add higher cpu clock for 5762.
  ptp: fix missing break in switch
  net: phy: fix flag masking in __set_phy_supported
  net/ipv4: Set oif in fib_compute_spec_dst
  lib/rhashtable: consider param->min_size when setting initial table size
  ipv6: fix useless rol32 call on hash
  ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns
  mm: memcg: fix use after free in mem_cgroup_iter()
  ARC: mm: allow mprotect to make stack mappings executable
  ARC: Fix CONFIG_SWAP
  ALSA: rawmidi: Change resized buffers atomically
  fat: fix memory allocation failure handling of match_strdup()
  x86/MCE: Remove min interval polling limitation
  KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel.

Conflicts:
	drivers/mtd/ubi/wl.c
	sound/core/rawmidi.c

Change-Id: I277fe9260a764e7923ddc90e7327d9aa5865a038
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
This commit is contained in:
Srinivasarao P 2018-08-02 12:00:59 +05:30
commit facb909e66
74 changed files with 1971 additions and 274 deletions

View file

@ -276,6 +276,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities

View file

@ -2530,6 +2530,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
allow data leaks with this option, which is equivalent
to spectre_v2=off.
nospec_store_bypass_disable
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@ -3702,6 +3705,48 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Not specifying this option is equivalent to
spectre_v2=auto.
spec_store_bypass_disable=
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
Certain CPUs are vulnerable to an exploit against a
a common industry wide performance optimization known
as "Speculative Store Bypass" in which recent stores
to the same memory location may not be observed by
later loads during speculative execution. The idea
is that such stores are unlikely and that they can
be detected prior to instruction retirement at the
end of a particular speculation execution window.
In vulnerable processors, the speculatively forwarded
store can be used in a cache side channel attack, for
example to read memory to which the attacker does not
directly have access (e.g. inside sandboxed code).
This parameter controls whether the Speculative Store
Bypass optimization is used.
on - Unconditionally disable Speculative Store Bypass
off - Unconditionally enable Speculative Store Bypass
auto - Kernel detects whether the CPU model contains an
implementation of Speculative Store Bypass and
picks the most appropriate mitigation. If the
CPU is not vulnerable, "off" is selected. If the
CPU is vulnerable the default mitigation is
architecture and Kconfig dependent. See below.
prctl - Control Speculative Store Bypass per thread
via prctl. Speculative Store Bypass is enabled
for a process by default. The state of the control
is inherited on fork.
seccomp - Same as "prctl" above, but all seccomp threads
will disable SSB unless they explicitly opt out.
Not specifying this option is equivalent to
spec_store_bypass_disable=auto.
Default mitigations:
X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=

View file

@ -0,0 +1,94 @@
===================
Speculation Control
===================
Quite some CPUs have speculation-related misfeatures which are in
fact vulnerabilities causing data leaks in various forms even across
privilege domains.
The kernel provides mitigation for such vulnerabilities in various
forms. Some of these mitigations are compile-time configurable and some
can be supplied on the kernel command line.
There is also a class of mitigations which are very expensive, but they can
be restricted to a certain set of processes or tasks in controlled
environments. The mechanism to control these mitigations is via
:manpage:`prctl(2)`.
There are two prctl options which are related to this:
* PR_GET_SPECULATION_CTRL
* PR_SET_SPECULATION_CTRL
PR_GET_SPECULATION_CTRL
-----------------------
PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
the following meaning:
==== ===================== ===================================================
Bit Define Description
==== ===================== ===================================================
0 PR_SPEC_PRCTL Mitigation can be controlled per task by
PR_SET_SPECULATION_CTRL.
1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
disabled.
2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
enabled.
3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
subsequent prctl(..., PR_SPEC_ENABLE) will fail.
==== ===================== ===================================================
If all bits are 0 the CPU is not affected by the speculation misfeature.
If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
misfeature will fail.
PR_SET_SPECULATION_CTRL
-----------------------
PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
PR_SPEC_FORCE_DISABLE.
Common error codes
------------------
======= =================================================================
Value Meaning
======= =================================================================
EINVAL The prctl is not implemented by the architecture or unused
prctl(2) arguments are not 0.
ENODEV arg2 is selecting a not supported speculation misfeature.
======= =================================================================
PR_SET_SPECULATION_CTRL error codes
-----------------------------------
======= =================================================================
Value Meaning
======= =================================================================
0 Success
ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
ENXIO Control of the selected speculation misfeature is not possible.
See PR_GET_SPECULATION_CTRL.
EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
tried to enable it again.
======= =================================================================
Speculation misfeature controls
-------------------------------
- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
Invocations:
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);

View file

@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 143
SUBLEVEL = 144
EXTRAVERSION =
NAME = Blurry Fish Butt

View file

@ -102,7 +102,7 @@ typedef pte_t * pgtable_t;
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
/* Default Permissions for stack/heaps pages (Non Executable) */
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#define WANT_PAGE_VIRTUAL 1

View file

@ -372,7 +372,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* Decode a PTE containing swap "identifier "into constituents */
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
/* NOPs, to keep generic kernel happy */
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })

View file

@ -79,24 +79,33 @@ ENTRY(entry_SYSENTER_compat)
ASM_CLAC /* Clear AC after saving FLAGS */
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
pushq $0 /* pt_regs->ip = 0 (placeholder) */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
pushq %r8 /* pt_regs->r15 = 0 */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorq %r15, %r15 /* nospec r15 */
cld
/*
@ -185,17 +194,26 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdx /* pt_regs->dx */
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
pushq %r8 /* pt_regs->r15 = 0 */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
xorq %r12, %r12 /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
xorq %r13, %r13 /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
xorq %r14, %r14 /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorq %r15, %r15 /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@ -292,17 +310,26 @@ ENTRY(entry_INT80_compat)
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq $0 /* pt_regs->r8 = 0 */
xorq %r8, %r8 /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
xorq %r9, %r9 /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
xorq %r10, %r10 /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
xorq %r11, %r11 /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
xorq %r12, %r12 /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
xorq %r13, %r13 /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
xorq %r14, %r14 /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
xorq %r15, %r15 /* nospec r15 */
cld
/*

View file

@ -6,6 +6,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
#include <asm/nospec-branch.h>
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
firmware_restrict_branch_speculation_end();
}
static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@ -55,6 +59,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@ -67,6 +72,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
firmware_restrict_branch_speculation_end();
return error;
}

View file

@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
asm volatile ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"r"(size),"r" (index)
:"g"(size),"r" (index)
:"cc");
return mask;
}

View file

@ -28,6 +28,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
CPUID_7_EDX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
BUILD_BUG_ON_ZERO(NCAPINTS != 18))
BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
BUILD_BUG_ON_ZERO(NCAPINTS != 18))
BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \

View file

@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 18 /* N 32-bit words worth of info */
#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@ -194,13 +194,28 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
@ -251,6 +266,10 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@ -285,6 +304,15 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
* BUG word(s)
*/
@ -302,5 +330,6 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */

View file

@ -59,6 +59,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#define DISABLED_MASK18 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_DISABLED_FEATURES_H */

View file

@ -3,6 +3,7 @@
#include <asm/fpu/api.h>
#include <asm/pgtable.h>
#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@ -41,8 +42,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
({ \
efi_status_t __s; \
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
__s = ((efi_##f##_t __attribute__((regparm(0)))*) \
efi.systab->runtime->f)(args); \
firmware_restrict_branch_speculation_end(); \
kernel_fpu_end(); \
__s; \
})
@ -51,8 +54,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#define __efi_call_virt(f, args...) \
({ \
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
((efi_##f##_t __attribute__((regparm(0)))*) \
efi.systab->runtime->f)(args); \
firmware_restrict_branch_speculation_end(); \
kernel_fpu_end(); \
})
@ -73,7 +78,9 @@ extern u64 asmlinkage efi_call(void *fp, ...);
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
__s; \

View file

@ -12,6 +12,7 @@
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_FAM6_CORE2_PENRYN 0x17
@ -20,6 +21,7 @@
#define INTEL_FAM6_NEHALEM 0x1E
#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_FAM6_NEHALEM_EX 0x2E
#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_FAM6_WESTMERE2 0x1F
#define INTEL_FAM6_WESTMERE_EP 0x2C
@ -36,9 +38,9 @@
#define INTEL_FAM6_HASWELL_GT3E 0x46
#define INTEL_FAM6_BROADWELL_CORE 0x3D
#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_BROADWELL_GT3E 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
@ -56,13 +58,15 @@
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
#endif /* _ASM_X86_INTEL_FAMILY_H */

View file

@ -8,7 +8,7 @@
* Interrupt control:
*/
static inline unsigned long native_save_fl(void)
extern inline unsigned long native_save_fl(void)
{
unsigned long flags;

View file

@ -3,12 +3,18 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
/*
* The x86 doesn't have a mmu context, but
* we put the segment information here.
* x86 has arch-specific MMU state beyond what lives in mm_struct.
*/
typedef struct {
/*
* ctx_id uniquely identifies this mm_struct. A ctx_id will never
* be reused, and zero is not a valid ctx_id.
*/
u64 ctx_id;
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
#endif
@ -24,6 +30,11 @@ typedef struct {
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
.context = { \
.ctx_id = 1, \
}
void leave_mm(int cpu);
#endif /* _ASM_X86_MMU_H */

View file

@ -11,6 +11,9 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/mpx.h>
extern atomic64_t last_mm_ctx_id;
#ifndef CONFIG_PARAVIRT
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
@ -52,15 +55,15 @@ struct ldt_struct {
/*
* Used for LDT copy/destruction.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
static inline int init_new_context_ldt(struct task_struct *tsk,
struct mm_struct *mm)
{
return 0;
}
static inline void destroy_context(struct mm_struct *mm) {}
static inline void destroy_context_ldt(struct mm_struct *mm) {}
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
@ -102,6 +105,18 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
}
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
init_new_context_ldt(tsk, mm);
return 0;
}
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
}
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);

View file

@ -32,6 +32,15 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
@ -45,6 +54,16 @@
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
#define ARCH_CAP_SSB_NO (1 << 4) /*
* Not susceptible to Speculative Store Bypass
* attack, so no Speculative Store Bypass
* control required.
*/
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@ -132,6 +151,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
@ -308,6 +328,8 @@
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231

View file

@ -6,6 +6,7 @@
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
/*
* Fill the CPU return stack buffer.
@ -171,6 +172,14 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
SPEC_STORE_BYPASS_DISABLE,
SPEC_STORE_BYPASS_PRCTL,
SPEC_STORE_BYPASS_SECCOMP,
};
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@ -194,6 +203,51 @@ static inline void vmexit_fill_RSB(void)
#endif
}
static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
: : "c" (msr),
"a" ((u32)val),
"d" ((u32)(val >> 32)),
[feature] "i" (feature)
: "memory");
}
static inline void indirect_branch_prediction_barrier(void)
{
u64 val = PRED_CMD_IBPB;
alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
/*
* With retpoline, we must use IBRS to restrict branch prediction
* before calling into firmware.
*
* (Implemented as CPP macros due to header hell.)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
\
preempt_disable(); \
alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
u64 val = x86_spec_ctrl_base; \
\
alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
#endif /* __ASSEMBLY__ */
/*

View file

@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */

View file

@ -0,0 +1,80 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SPECCTRL_H_
#define _ASM_X86_SPECCTRL_H_
#include <linux/thread_info.h>
#include <asm/nospec-branch.h>
/*
* On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
* the guest has, while on VMEXIT we restore the host view. This
* would be easier if SPEC_CTRL were architecturally maskable or
* shadowable for guests but this is not (currently) the case.
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
* the guest's version of VIRT_SPEC_CTRL, if emulated.
*/
extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
/**
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
* @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
* @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
* (may get translated to MSR_AMD64_LS_CFG bits)
*
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
{
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
}
/**
* x86_spec_ctrl_restore_host - Restore host speculation control registers
* @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
* @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
* (may get translated to MSR_AMD64_LS_CFG bits)
*
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
{
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
}
/* AMD specific Speculative Store Bypass MSR data */
extern u64 x86_amd_ls_cfg_base;
extern u64 x86_amd_ls_cfg_ssbd_mask;
static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
{
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
}
#ifdef CONFIG_SMP
extern void speculative_store_bypass_ht_init(void);
#else
static inline void speculative_store_bypass_ht_init(void) { }
#endif
extern void speculative_store_bypass_update(unsigned long tif);
static inline void speculative_store_bypass_update_current(void)
{
speculative_store_bypass_update(current_thread_info()->flags);
}
#endif

View file

@ -92,6 +92,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@ -114,8 +115,9 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@ -147,7 +149,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)

View file

@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void)
struct tlb_state {
struct mm_struct *active_mm;
int state;
/* last user mm's ctx id */
u64 last_ctx_id;
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
@ -109,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask)
}
}
static inline void cr4_toggle_bits(unsigned long mask)
{
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
cr4 ^= mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{

View file

@ -53,6 +53,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
obj-y += irqflags.o
obj-y += process.o
obj-y += fpu/

View file

@ -9,6 +9,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@ -519,6 +520,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
if (c->x86 >= 0x15 && c->x86 <= 0x17) {
unsigned int bit;
switch (c->x86) {
case 0x15: bit = 54; break;
case 0x16: bit = 33; break;
case 0x17: bit = 10; break;
default: return;
}
/*
* Try to cache the base value so further operations can
* avoid RMW. If that faults, do not enable SSBD.
*/
if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
setup_force_cpu_cap(X86_FEATURE_SSBD);
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
}
}
}
static void early_init_amd(struct cpuinfo_x86 *c)
@ -692,6 +713,17 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
}
}
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
*/
if (c->x86_model <= 1 && c->x86_mask <= 1)
set_cpu_cap(c, X86_FEATURE_CPB);
}
static void init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
@ -722,6 +754,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x17: init_amd_zn(c); break;
}
/* Enable workaround for FXSAVE leak */
@ -791,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
/* AMD CPUs don't reset SS attributes on SYSRET */
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
}
#ifdef CONFIG_X86_32

View file

@ -11,8 +11,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <asm/nospec-branch.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@ -26,6 +28,27 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
/*
* Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
* writes to SPEC_CTRL contain whatever reserved bits have been set.
*/
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
/*
* The vendor and possibly platform specific bits which can be modified in
* x86_spec_ctrl_base.
*/
static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
/*
* AMD specific MSR info for Speculative Store Bypass control.
* x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
*/
u64 x86_amd_ls_cfg_base;
u64 x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@ -36,9 +59,27 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
/*
* Read the SPEC_CTRL MSR to account for reserved bits which may
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
/* Allow STIBP in MSR_SPEC_CTRL if supported */
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
/*
* Select proper mitigation for any exposure to the Speculative Store
* Bypass vulnerability.
*/
ssb_select_mitigation();
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@ -94,6 +135,73 @@ static const char *spectre_v2_strings[] = {
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
u64 msrval, guestval, hostval = x86_spec_ctrl_base;
struct thread_info *ti = current_thread_info();
/* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
/*
* Restrict guest_spec_ctrl to supported values. Clear the
* modifiable bits in the host base value and or the
* modifiable bits from the guest value.
*/
guestval = hostval & ~x86_spec_ctrl_mask;
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
}
}
/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
*/
if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
!static_cpu_has(X86_FEATURE_VIRT_SSBD))
return;
/*
* If the host has SSBD mitigation enabled, force it in the host's
* virtual MSR value. If its not permanently enabled, evaluate
* current's TIF_SSBD thread flag.
*/
if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
hostval = SPEC_CTRL_SSBD;
else
hostval = ssbd_tif_to_spec_ctrl(ti->flags);
/* Sanitize the guest value */
guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
if (hostval != guestval) {
unsigned long tif;
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
ssbd_spec_ctrl_to_tif(hostval);
speculative_store_bypass_update(tif);
}
}
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
static void x86_amd_ssb_disable(void)
{
u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
wrmsrl(MSR_AMD64_LS_CFG, msrval);
}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@ -162,8 +270,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
else {
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
sizeof(arg));
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
@ -184,8 +291,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
pr_err("%s selected but not compiled in. Switching to AUTO select\n",
mitigation_options[i].option);
pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@ -255,14 +361,14 @@ static void __init spectre_v2_select_mitigation(void)
goto retpoline_auto;
break;
}
pr_err("kernel not compiled with retpoline; no mitigation available!");
pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
return;
retpoline_auto:
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
retpoline_amd:
if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
pr_err("LFENCE not serializing. Switching to generic retpoline\n");
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@ -280,7 +386,7 @@ retpoline_auto:
pr_info("%s\n", spectre_v2_strings[mode]);
/*
* If neither SMEP or KPTI are available, there is a risk of
* If neither SMEP nor PTI are available, there is a risk of
* hitting userspace addresses in the RSB after a context switch
* from a shallow call stack to a deeper one. To prevent this fill
* the entire RSB, even when using IBRS.
@ -294,38 +400,309 @@ retpoline_auto:
if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Filling RSB on context switch\n");
pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
}
/* Initialize Indirect Branch Prediction Barrier if supported */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. But firmware isn't, so use IBRS to protect that.
*/
if (boot_cpu_has(X86_FEATURE_IBRS)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
}
#undef pr_fmt
#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE;
/* The kernel command line selection */
enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_NONE,
SPEC_STORE_BYPASS_CMD_AUTO,
SPEC_STORE_BYPASS_CMD_ON,
SPEC_STORE_BYPASS_CMD_PRCTL,
SPEC_STORE_BYPASS_CMD_SECCOMP,
};
static const char *ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
[SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
};
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
} ssb_mitigation_options[] = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
{ "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
{ "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
};
static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
{
enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
char arg[20];
int ret, i;
if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
return SPEC_STORE_BYPASS_CMD_NONE;
} else {
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
arg, sizeof(arg));
if (ret < 0)
return SPEC_STORE_BYPASS_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
if (!match_option(arg, ret, ssb_mitigation_options[i].option))
continue;
cmd = ssb_mitigation_options[i].cmd;
break;
}
if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPEC_STORE_BYPASS_CMD_AUTO;
}
}
return cmd;
}
static enum ssb_mitigation __init __ssb_select_mitigation(void)
{
enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
enum ssb_mitigation_cmd cmd;
if (!boot_cpu_has(X86_FEATURE_SSBD))
return mode;
cmd = ssb_parse_cmdline();
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
(cmd == SPEC_STORE_BYPASS_CMD_NONE ||
cmd == SPEC_STORE_BYPASS_CMD_AUTO))
return mode;
switch (cmd) {
case SPEC_STORE_BYPASS_CMD_AUTO:
case SPEC_STORE_BYPASS_CMD_SECCOMP:
/*
* Choose prctl+seccomp as the default mode if seccomp is
* enabled.
*/
if (IS_ENABLED(CONFIG_SECCOMP))
mode = SPEC_STORE_BYPASS_SECCOMP;
else
mode = SPEC_STORE_BYPASS_PRCTL;
break;
case SPEC_STORE_BYPASS_CMD_ON:
mode = SPEC_STORE_BYPASS_DISABLE;
break;
case SPEC_STORE_BYPASS_CMD_PRCTL:
mode = SPEC_STORE_BYPASS_PRCTL;
break;
case SPEC_STORE_BYPASS_CMD_NONE:
break;
}
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
* - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
*/
if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
* a completely different MSR and bit dependent on family.
*/
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
break;
case X86_VENDOR_AMD:
x86_amd_ssb_disable();
break;
}
}
return mode;
}
static void ssb_select_mitigation(void)
{
ssb_mode = __ssb_select_mitigation();
if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
pr_info("%s\n", ssb_strings[ssb_mode]);
}
#undef pr_fmt
#define pr_fmt(fmt) "Speculation prctl: " fmt
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
bool update;
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
return -ENXIO;
switch (ctrl) {
case PR_SPEC_ENABLE:
/* If speculation is force disabled, enable is not allowed */
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
break;
default:
return -ERANGE;
}
/*
* If being set on non-current task, delay setting the CPU
* mitigation until it is next scheduled.
*/
if (task == current && update)
speculative_store_bypass_update_current();
return 0;
}
int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
unsigned long ctrl)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
default:
return -ENODEV;
}
}
#ifdef CONFIG_SECCOMP
void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
case SPEC_STORE_BYPASS_DISABLE:
return PR_SPEC_DISABLE;
case SPEC_STORE_BYPASS_SECCOMP:
case SPEC_STORE_BYPASS_PRCTL:
if (task_spec_ssb_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ssb_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
default:
if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
return PR_SPEC_ENABLE;
return PR_SPEC_NOT_AFFECTED;
}
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_get(task);
default:
return -ENODEV;
}
}
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
}
#ifdef CONFIG_SYSFS
ssize_t cpu_show_meltdown(struct device *dev,
struct device_attribute *attr, char *buf)
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
if (boot_cpu_has(X86_FEATURE_KAISER))
return sprintf(buf, "Mitigation: PTI\n");
switch (bug) {
case X86_BUG_CPU_MELTDOWN:
if (boot_cpu_has(X86_FEATURE_KAISER))
return sprintf(buf, "Mitigation: PTI\n");
break;
case X86_BUG_SPECTRE_V1:
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
default:
break;
}
return sprintf(buf, "Vulnerable\n");
}
ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
}
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
spectre_v2_module_string());
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
}
ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif

View file

@ -43,6 +43,8 @@
#include <asm/pat.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@ -674,6 +676,40 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
}
}
static void init_speculation_control(struct cpuinfo_x86 *c)
{
/*
* The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
* and they also have a different bit for STIBP support. Also,
* a hypervisor might have set the individual AMD bits even on
* Intel CPUs, for finer-grained selection of what's available.
*/
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_IBPB);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
set_cpu_cap(c, X86_FEATURE_STIBP);
if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD))
set_cpu_cap(c, X86_FEATURE_SSBD);
if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
if (cpu_has(c, X86_FEATURE_AMD_IBPB))
set_cpu_cap(c, X86_FEATURE_IBPB);
if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
set_cpu_cap(c, X86_FEATURE_STIBP);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
}
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@ -695,6 +731,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
@ -765,6 +802,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
init_speculation_control(c);
/*
* Clear/Set all flags overridden by options, after probe.
* This needs to happen each time we re-probe, which may happen
* several times during CPU initialization.
*/
apply_forced_caps(c);
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@ -793,6 +838,75 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
{ X86_VENDOR_ANY, 4 },
{}
};
static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{ X86_VENDOR_AMD },
{}
};
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{ X86_VENDOR_CENTAUR, 5, },
{ X86_VENDOR_INTEL, 5, },
{ X86_VENDOR_NSC, 5, },
{ X86_VENDOR_AMD, 0x12, },
{ X86_VENDOR_AMD, 0x11, },
{ X86_VENDOR_AMD, 0x10, },
{ X86_VENDOR_AMD, 0xf, },
{ X86_VENDOR_ANY, 4, },
{}
};
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
!(ia32_cap & ARCH_CAP_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (x86_match_cpu(cpu_no_speculation))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (x86_match_cpu(cpu_no_meltdown))
return;
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@ -839,11 +953,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
cpu_set_bug_bits(c);
fpu__init_system(c);
@ -1132,6 +1242,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
enable_sep_cpu();
#endif
mtrr_ap_init();
x86_spec_ctrl_setup_ap();
}
struct msr_range {

View file

@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
extern void x86_spec_ctrl_setup_ap(void);
#endif /* ARCH_X86_CPU_H */

View file

@ -13,6 +13,7 @@
#include <asm/msr.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/intel-family.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@ -25,6 +26,62 @@
#include <asm/apic.h>
#endif
/*
* Early microcode releases for the Spectre v2 mitigation were broken.
* Information taken from;
* - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
* - https://kb.vmware.com/s/article/52345
* - Microcode revisions observed in the wild
* - Release note from 20180108 microcode release
*/
struct sku_microcode {
u8 model;
u8 stepping;
u32 microcode;
};
static const struct sku_microcode spectre_bad_microcodes[] = {
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
{ INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
{ INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
{ INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
{ INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
{ INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
{ INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
{ INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
{ INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
{ INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
{ INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
{ INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
{ INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
/* Observed in the wild */
{ INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
{ INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
};
static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
{
int i;
/*
* We know that the hypervisor lie to us on the microcode version so
* we may as well hope that it is running the correct version.
*/
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return false;
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_mask == spectre_bad_microcodes[i].stepping)
return (c->microcode <= spectre_bad_microcodes[i].microcode);
}
return false;
}
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@ -51,6 +108,22 @@ static void early_init_intel(struct cpuinfo_x86 *c)
rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
}
/* Now if any of them are set, check the blacklist and clear the lot */
if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
setup_clear_cpu_cap(X86_FEATURE_IBRS);
setup_clear_cpu_cap(X86_FEATURE_IBPB);
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SSBD);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
}
/*
* Atom erratum AAE44/AAF40/AAG38/AAH41:
*

View file

@ -2294,9 +2294,6 @@ static ssize_t store_int_with_restart(struct device *s,
if (check_interval == old_check_interval)
return ret;
if (check_interval < 1)
check_interval = 1;
mutex_lock(&mce_sysfs_mutex);
mce_restart();
mutex_unlock(&mce_sysfs_mutex);

View file

@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/asm.h>
#include <asm-generic/export.h>
#include <linux/linkage.h>
/*
* unsigned long native_save_fl(void)
*/
ENTRY(native_save_fl)
pushf
pop %_ASM_AX
ret
ENDPROC(native_save_fl)
EXPORT_SYMBOL(native_save_fl)
/*
* void native_restore_fl(unsigned long flags)
* %eax/%rdi: flags
*/
ENTRY(native_restore_fl)
push %_ASM_ARG1
popf
ret
ENDPROC(native_restore_fl)
EXPORT_SYMBOL(native_restore_fl)

View file

@ -119,7 +119,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
@ -160,7 +160,7 @@ out_unlock:
*
* 64bit: Don't touch the LDT register - we're already in the next thread.
*/
void destroy_context(struct mm_struct *mm)
void destroy_context_ldt(struct mm_struct *mm)
{
free_ldt_struct(mm->context.ldt);
mm->context.ldt = NULL;

View file

@ -31,6 +31,7 @@
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@ -116,11 +117,6 @@ void flush_thread(void)
fpu__clear(&tsk->thread.fpu);
}
static void hard_disable_TSC(void)
{
cr4_set_bits(X86_CR4_TSD);
}
void disable_TSC(void)
{
preempt_disable();
@ -129,15 +125,10 @@ void disable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_disable_TSC();
cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
static void hard_enable_TSC(void)
{
cr4_clear_bits(X86_CR4_TSD);
}
static void enable_TSC(void)
{
preempt_disable();
@ -146,7 +137,7 @@ static void enable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_enable_TSC();
cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
@ -174,48 +165,199 @@ int set_tsc_mode(unsigned int val)
return 0;
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
static inline void switch_to_bitmap(struct tss_struct *tss,
struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
struct thread_struct *prev, *next;
prev = &prev_p->thread;
next = &next_p->thread;
if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
debugctl |= DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
hard_disable_TSC();
else
hard_enable_TSC();
}
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
} else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
}
#ifdef CONFIG_SMP
struct ssb_state {
struct ssb_state *shared_state;
raw_spinlock_t lock;
unsigned int disable_state;
unsigned long local_state;
};
#define LSTATE_SSB 0
static DEFINE_PER_CPU(struct ssb_state, ssb_state);
void speculative_store_bypass_ht_init(void)
{
struct ssb_state *st = this_cpu_ptr(&ssb_state);
unsigned int this_cpu = smp_processor_id();
unsigned int cpu;
st->local_state = 0;
/*
* Shared state setup happens once on the first bringup
* of the CPU. It's not destroyed on CPU hotunplug.
*/
if (st->shared_state)
return;
raw_spin_lock_init(&st->lock);
/*
* Go over HT siblings and check whether one of them has set up the
* shared state pointer already.
*/
for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
if (cpu == this_cpu)
continue;
if (!per_cpu(ssb_state, cpu).shared_state)
continue;
/* Link it to the state of the sibling: */
st->shared_state = per_cpu(ssb_state, cpu).shared_state;
return;
}
/*
* First HT sibling to come up on the core. Link shared state of
* the first HT sibling to itself. The siblings on the same core
* which come up later will see the shared state pointer and link
* themself to the state of this CPU.
*/
st->shared_state = st;
}
/*
* Logic is: First HT sibling enables SSBD for both siblings in the core
* and last sibling to disable it, disables it for the whole core. This how
* MSR_SPEC_CTRL works in "hardware":
*
* CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
*/
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
{
struct ssb_state *st = this_cpu_ptr(&ssb_state);
u64 msr = x86_amd_ls_cfg_base;
if (!static_cpu_has(X86_FEATURE_ZEN)) {
msr |= ssbd_tif_to_amd_ls_cfg(tifn);
wrmsrl(MSR_AMD64_LS_CFG, msr);
return;
}
if (tifn & _TIF_SSBD) {
/*
* Since this can race with prctl(), block reentry on the
* same CPU.
*/
if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
return;
msr |= x86_amd_ls_cfg_ssbd_mask;
raw_spin_lock(&st->shared_state->lock);
/* First sibling enables SSBD: */
if (!st->shared_state->disable_state)
wrmsrl(MSR_AMD64_LS_CFG, msr);
st->shared_state->disable_state++;
raw_spin_unlock(&st->shared_state->lock);
} else {
if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
return;
raw_spin_lock(&st->shared_state->lock);
st->shared_state->disable_state--;
if (!st->shared_state->disable_state)
wrmsrl(MSR_AMD64_LS_CFG, msr);
raw_spin_unlock(&st->shared_state->lock);
}
}
#else
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
{
u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
wrmsrl(MSR_AMD64_LS_CFG, msr);
}
#endif
static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
{
/*
* SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
* so ssbd_tif_to_spec_ctrl() just works.
*/
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
static __always_inline void intel_set_ssb_state(unsigned long tifn)
{
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
{
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
amd_set_ssb_virt_state(tifn);
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
amd_set_core_ssb_state(tifn);
else
intel_set_ssb_state(tifn);
}
void speculative_store_bypass_update(unsigned long tif)
{
preempt_disable();
__speculative_store_bypass_update(tif);
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
prev = &prev_p->thread;
next = &next_p->thread;
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
switch_to_bitmap(tss, prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
arch_has_block_step()) {
unsigned long debugctl, msk;
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~DEBUGCTLMSR_BTF;
msk = tifn & _TIF_BLOCKSTEP;
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}
if ((tifp ^ tifn) & _TIF_NOTSC)
cr4_toggle_bits(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_SSBD)
__speculative_store_bypass_update(tifn);
}
/*

View file

@ -75,6 +75,7 @@
#include <asm/i8259.h>
#include <asm/realmode.h>
#include <asm/misc.h>
#include <asm/spec-ctrl.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@ -217,6 +218,8 @@ static void notrace start_secondary(void *unused)
*/
check_tsc_sync_target();
speculative_store_bypass_ht_init();
/*
* Lock vector_lock and initialize the vectors on this cpu
* before setting the cpu online. We must set it online with
@ -1209,6 +1212,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
speculative_store_bypass_ht_init();
}
void arch_enable_nonboot_cpus_begin(void)

View file

@ -37,7 +37,7 @@
#include <asm/desc.h>
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/nospec-branch.h>
#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"

View file

@ -48,7 +48,7 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/nospec-branch.h>
#include <asm/spec-ctrl.h>
#include "trace.h"
#include "pmu.h"

View file

@ -10,6 +10,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
@ -29,6 +30,8 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
struct flush_tlb_info {
struct mm_struct *flush_mm;
unsigned long flush_start;
@ -104,6 +107,36 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
*
* As an optimization, flush indirect branches only when
* switching into processes that disable dumping. This
* protects high value processes like gpg, without having
* too high performance overhead. IBPB is *expensive*!
*
* This will not flush branches when switching into kernel
* threads. It will also not flush if we switch to idle
* thread and back to the same process. It will flush if we
* switch to a different non-dumpable process.
*/
if (tsk && tsk->mm &&
tsk->mm->context.ctx_id != last_ctx_id &&
get_dumpable(tsk->mm) != SUID_DUMP_USER)
indirect_branch_prediction_barrier();
/*
* Record last user mm's context id, so we can avoid
* flushing branch buffer with IBPB if we switch back
* to the same user.
*/
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);
cpumask_set_cpu(cpu, mm_cpumask(next));

View file

@ -40,6 +40,7 @@
#include <asm/fixmap.h>
#include <asm/realmode.h>
#include <asm/time.h>
#include <asm/nospec-branch.h>
/*
* We allocate runtime services regions bottom-up, starting from -4G, i.e.
@ -347,6 +348,7 @@ extern efi_status_t efi64_thunk(u32, ...);
\
efi_sync_low_kernel_mappings(); \
local_irq_save(flags); \
firmware_restrict_branch_speculation_start(); \
\
efi_scratch.prev_cr3 = read_cr3(); \
write_cr3((unsigned long)efi_scratch.efi_pgt); \
@ -357,6 +359,7 @@ extern efi_status_t efi64_thunk(u32, ...);
\
write_cr3(efi_scratch.prev_cr3); \
__flush_tlb_all(); \
firmware_restrict_branch_speculation_end(); \
local_irq_restore(flags); \
\
__s; \

View file

@ -460,6 +460,12 @@ static void __init xen_init_cpuid_mask(void)
cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
}
static void __init xen_init_capabilities(void)
{
if (xen_pv_domain())
setup_force_cpu_cap(X86_FEATURE_XENPV);
}
static void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
@ -1587,6 +1593,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_init_irq_ops();
xen_init_cpuid_mask();
xen_init_capabilities();
#ifdef CONFIG_X86_LOCAL_APIC
/*
@ -1883,14 +1890,6 @@ bool xen_hvm_need_lapic(void)
}
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
set_cpu_cap(c, X86_FEATURE_XENPV);
}
}
const struct hypervisor_x86 x86_hyper_xen = {
.name = "Xen",
.detect = xen_platform,
@ -1898,7 +1897,6 @@ const struct hypervisor_x86 x86_hyper_xen = {
.init_platform = xen_hvm_guest_init,
#endif
.x2apic_available = xen_x2apic_para_available,
.set_cpu_features = xen_set_cpu_features,
};
EXPORT_SYMBOL(x86_hyper_xen);

View file

@ -28,6 +28,7 @@
#include <xen/interface/vcpu.h>
#include <xen/interface/xenpmu.h>
#include <asm/spec-ctrl.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@ -87,6 +88,8 @@ static void cpu_bringup(void)
cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
speculative_store_bypass_ht_init();
xen_setup_cpu_clockevents();
notify_cpu_starting(cpu);
@ -357,6 +360,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);
speculative_store_bypass_ht_init();
xen_pmu_init(0);
if (xen_smp_intr_init(0))

View file

@ -1,11 +1,14 @@
#include <linux/types.h>
#include <linux/tick.h>
#include <linux/percpu-defs.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include <asm/fixmap.h>
@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled)
xen_mm_unpin_all();
}
static DEFINE_PER_CPU(u64, spec_ctrl);
void xen_arch_pre_suspend(void)
{
if (xen_pv_domain())
@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data)
{
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
/* Boot processor notified via generic timekeeping_resume() */
if (smp_processor_id() == 0)
return;
@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data)
static void xen_vcpu_notify_suspend(void *data)
{
u64 tmp;
tick_suspend_local();
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
this_cpu_write(spec_ctrl, tmp);
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
}
}
void xen_arch_resume(void)

View file

@ -661,21 +661,17 @@ EXPORT_SYMBOL(blk_alloc_queue);
int blk_queue_enter(struct request_queue *q, gfp_t gfp)
{
while (true) {
int ret;
if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
if (!gfpflags_allow_blocking(gfp))
return -EBUSY;
ret = wait_event_interruptible(q->mq_freeze_wq,
!atomic_read(&q->mq_freeze_depth) ||
blk_queue_dying(q));
wait_event(q->mq_freeze_wq,
!atomic_read(&q->mq_freeze_depth) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
if (ret)
return ret;
}
}

View file

@ -693,14 +693,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Not affected\n");
}
ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "Not affected\n");
}
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
&dev_attr_spectre_v1.attr,
&dev_attr_spectre_v2.attr,
&dev_attr_spec_store_bypass.attr,
NULL
};

View file

@ -333,11 +333,11 @@ static struct pdiv_map pllu_p[] = {
};
static struct tegra_clk_pll_freq_table pll_u_freq_table[] = {
{ 12000000, 480000000, 960, 12, 0, 12},
{ 13000000, 480000000, 960, 13, 0, 12},
{ 16800000, 480000000, 400, 7, 0, 5},
{ 19200000, 480000000, 200, 4, 0, 3},
{ 26000000, 480000000, 960, 26, 0, 12},
{ 12000000, 480000000, 960, 12, 2, 12 },
{ 13000000, 480000000, 960, 13, 2, 12 },
{ 16800000, 480000000, 400, 7, 2, 5 },
{ 19200000, 480000000, 200, 4, 2, 3 },
{ 26000000, 480000000, 960, 26, 2, 12 },
{ 0, 0, 0, 0, 0, 0 },
};
@ -1372,6 +1372,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
{TEGRA30_CLK_GR2D, TEGRA30_CLK_PLL_C, 300000000, 0},
{TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0},
{TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0},
{ TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 },
{TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0}, /* This MUST be the last entry. */
};

View file

@ -174,6 +174,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec)
return 0;
}
/**
* add_fastmap - add a Fastmap related physical eraseblock.
* @ai: attaching information
* @pnum: physical eraseblock number the VID header came from
* @vid_hdr: the volume identifier header
* @ec: erase counter of the physical eraseblock
*
* This function allocates a 'struct ubi_ainf_peb' object for a Fastamp
* physical eraseblock @pnum and adds it to the 'fastmap' list.
* Such blocks can be Fastmap super and data blocks from both the most
* recent Fastmap we're attaching from or from old Fastmaps which will
* be erased.
*/
static int add_fastmap(struct ubi_attach_info *ai, int pnum,
struct ubi_vid_hdr *vid_hdr, int ec)
{
struct ubi_ainf_peb *aeb;
aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL);
if (!aeb)
return -ENOMEM;
aeb->pnum = pnum;
aeb->vol_id = be32_to_cpu(vidh->vol_id);
aeb->sqnum = be64_to_cpu(vidh->sqnum);
aeb->ec = ec;
list_add(&aeb->u.list, &ai->fastmap);
dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum,
aeb->vol_id, aeb->sqnum);
return 0;
}
/**
* validate_vid_hdr - check volume identifier header.
* @ubi: UBI device description object
@ -803,13 +837,26 @@ out_unlock:
return err;
}
static bool vol_ignored(int vol_id)
{
switch (vol_id) {
case UBI_LAYOUT_VOLUME_ID:
return true;
}
#ifdef CONFIG_MTD_UBI_FASTMAP
return ubi_is_fm_vol(vol_id);
#else
return false;
#endif
}
/**
* scan_peb - scan and process UBI headers of a PEB.
* @ubi: UBI device description object
* @ai: attaching information
* @pnum: the physical eraseblock number
* @vid: The volume ID of the found volume will be stored in this pointer
* @sqnum: The sqnum of the found volume will be stored in this pointer
* @fast: true if we're scanning for a Fastmap
*
* This function reads UBI headers of PEB @pnum, checks them, and adds
* information about this PEB to the corresponding list or RB-tree in the
@ -817,9 +864,9 @@ out_unlock:
* successfully handled and a negative error code in case of failure.
*/
static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
int pnum, int *vid, unsigned long long *sqnum)
int pnum, bool fast)
{
long long uninitialized_var(ec);
long long ec;
int err, bitflips = 0, vol_id = -1, ec_err = 0;
dbg_bld("scan PEB %d", pnum);
@ -935,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
*/
ai->maybe_bad_peb_count += 1;
case UBI_IO_BAD_HDR:
/*
* If we're facing a bad VID header we have to drop *all*
* Fastmap data structures we find. The most recent Fastmap
* could be bad and therefore there is a chance that we attach
* from an old one. On a fine MTD stack a PEB must not render
* bad all of a sudden, but the reality is different.
* So, let's be paranoid and help finding the root cause by
* falling back to scanning mode instead of attaching with a
* bad EBA table and cause data corruption which is hard to
* analyze.
*/
if (fast)
ai->force_full_scan = 1;
if (ec_err)
/*
* Both headers are corrupted. There is a possibility
@ -991,21 +1052,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
}
vol_id = be32_to_cpu(vidh->vol_id);
if (vid)
*vid = vol_id;
if (sqnum)
*sqnum = be64_to_cpu(vidh->sqnum);
if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {
if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) {
int lnum = be32_to_cpu(vidh->lnum);
/* Unsupported internal volume */
switch (vidh->compat) {
case UBI_COMPAT_DELETE:
if (vol_id != UBI_FM_SB_VOLUME_ID
&& vol_id != UBI_FM_DATA_VOLUME_ID) {
ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
vol_id, lnum);
}
ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
vol_id, lnum);
err = add_to_list(ai, pnum, vol_id, lnum,
ec, 1, &ai->erase);
if (err)
@ -1037,7 +1092,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
if (ec_err)
ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d",
pnum);
err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
if (ubi_is_fm_vol(vol_id))
err = add_fastmap(ai, pnum, vidh, ec);
else
err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
if (err)
return err;
@ -1186,6 +1246,10 @@ static void destroy_ai(struct ubi_attach_info *ai)
list_del(&aeb->u.list);
kmem_cache_free(ai->aeb_slab_cache, aeb);
}
list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) {
list_del(&aeb->u.list);
kmem_cache_free(ai->aeb_slab_cache, aeb);
}
/* Destroy the volume RB-tree */
rb = ai->volumes.rb_node;
@ -1245,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai,
cond_resched();
dbg_gen("process PEB %d", pnum);
err = scan_peb(ubi, ai, pnum, NULL, NULL);
err = scan_peb(ubi, ai, pnum, false);
if (err < 0)
goto out_vidh;
}
@ -1311,6 +1375,7 @@ static struct ubi_attach_info *alloc_ai(void)
INIT_LIST_HEAD(&ai->free);
INIT_LIST_HEAD(&ai->erase);
INIT_LIST_HEAD(&ai->alien);
INIT_LIST_HEAD(&ai->fastmap);
ai->volumes = RB_ROOT;
ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache",
sizeof(struct ubi_ainf_peb),
@ -1337,52 +1402,58 @@ static struct ubi_attach_info *alloc_ai(void)
*/
static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
{
int err, pnum, fm_anchor = -1;
unsigned long long max_sqnum = 0;
int err, pnum;
struct ubi_attach_info *scan_ai;
err = -ENOMEM;
scan_ai = alloc_ai();
if (!scan_ai)
goto out;
ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
if (!ech)
goto out;
goto out_ai;
vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
if (!vidh)
goto out_ech;
for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) {
int vol_id = -1;
unsigned long long sqnum = -1;
cond_resched();
dbg_gen("process PEB %d", pnum);
err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum);
err = scan_peb(ubi, scan_ai, pnum, true);
if (err < 0)
goto out_vidh;
if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) {
max_sqnum = sqnum;
fm_anchor = pnum;
}
}
ubi_free_vid_hdr(ubi, vidh);
kfree(ech);
if (fm_anchor < 0)
return UBI_NO_FASTMAP;
if (scan_ai->force_full_scan)
err = UBI_NO_FASTMAP;
else
err = ubi_scan_fastmap(ubi, *ai, scan_ai);
destroy_ai(*ai);
*ai = alloc_ai();
if (!*ai)
return -ENOMEM;
if (err) {
/*
* Didn't attach via fastmap, do a full scan but reuse what
* we've aready scanned.
*/
destroy_ai(*ai);
*ai = scan_ai;
} else
destroy_ai(scan_ai);
return ubi_scan_fastmap(ubi, *ai, fm_anchor);
return err;
out_vidh:
ubi_free_vid_hdr(ubi, vidh);
out_ech:
kfree(ech);
out_ai:
destroy_ai(scan_ai);
out:
return err;
}

View file

@ -1178,6 +1178,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_volume *vol;
uint32_t crc;
ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
vol_id = be32_to_cpu(vid_hdr->vol_id);
lnum = be32_to_cpu(vid_hdr->lnum);
@ -1346,9 +1348,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
}
ubi_assert(vol->eba_tbl[lnum] == from);
down_read(&ubi->fm_eba_sem);
vol->eba_tbl[lnum] = to;
up_read(&ubi->fm_eba_sem);
out_unlock_buf:
mutex_unlock(&ubi->buf_mutex);

View file

@ -262,6 +262,8 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
int pnum;
ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
if (pool->used == pool->size) {
/* We cannot update the fastmap here because this
* function is called in atomic context.
@ -303,7 +305,7 @@ int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
wrk->anchor = 1;
wrk->func = &wear_leveling_worker;
schedule_ubi_work(ubi, wrk);
__schedule_ubi_work(ubi, wrk);
return 0;
}
@ -344,7 +346,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
spin_unlock(&ubi->wl_lock);
vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
return schedule_erase(ubi, e, vol_id, lnum, torture);
return schedule_erase(ubi, e, vol_id, lnum, torture, true);
}
/**

View file

@ -326,6 +326,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai,
aeb->pnum = new_aeb->pnum;
aeb->copy_flag = new_vh->copy_flag;
aeb->scrub = new_aeb->scrub;
aeb->sqnum = new_aeb->sqnum;
kmem_cache_free(ai->aeb_slab_cache, new_aeb);
/* new_aeb is older */
@ -850,28 +851,58 @@ fail:
return ret;
}
/**
* find_fm_anchor - find the most recent Fastmap superblock (anchor)
* @ai: UBI attach info to be filled
*/
static int find_fm_anchor(struct ubi_attach_info *ai)
{
int ret = -1;
struct ubi_ainf_peb *aeb;
unsigned long long max_sqnum = 0;
list_for_each_entry(aeb, &ai->fastmap, u.list) {
if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) {
max_sqnum = aeb->sqnum;
ret = aeb->pnum;
}
}
return ret;
}
/**
* ubi_scan_fastmap - scan the fastmap.
* @ubi: UBI device object
* @ai: UBI attach info to be filled
* @fm_anchor: The fastmap starts at this PEB
* @scan_ai: UBI attach info from the first 64 PEBs,
* used to find the most recent Fastmap data structure
*
* Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found,
* UBI_BAD_FASTMAP if one was found but is not usable.
* < 0 indicates an internal error.
*/
int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
int fm_anchor)
struct ubi_attach_info *scan_ai)
{
struct ubi_fm_sb *fmsb, *fmsb2;
struct ubi_vid_hdr *vh;
struct ubi_ec_hdr *ech;
struct ubi_fastmap_layout *fm;
int i, used_blocks, pnum, ret = 0;
struct ubi_ainf_peb *tmp_aeb, *aeb;
int i, used_blocks, pnum, fm_anchor, ret = 0;
size_t fm_size;
__be32 crc, tmp_crc;
unsigned long long sqnum = 0;
fm_anchor = find_fm_anchor(scan_ai);
if (fm_anchor < 0)
return UBI_NO_FASTMAP;
/* Move all (possible) fastmap blocks into our new attach structure. */
list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
list_move_tail(&aeb->u.list, &ai->fastmap);
down_write(&ubi->fm_protect);
memset(ubi->fm_buf, 0, ubi->fm_size);
@ -1484,22 +1515,30 @@ int ubi_update_fastmap(struct ubi_device *ubi)
struct ubi_wl_entry *tmp_e;
down_write(&ubi->fm_protect);
down_write(&ubi->work_sem);
down_write(&ubi->fm_eba_sem);
ubi_refill_pools(ubi);
if (ubi->ro_mode || ubi->fm_disabled) {
up_write(&ubi->fm_eba_sem);
up_write(&ubi->work_sem);
up_write(&ubi->fm_protect);
return 0;
}
ret = ubi_ensure_anchor_pebs(ubi);
if (ret) {
up_write(&ubi->fm_eba_sem);
up_write(&ubi->work_sem);
up_write(&ubi->fm_protect);
return ret;
}
new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL);
if (!new_fm) {
up_write(&ubi->fm_eba_sem);
up_write(&ubi->work_sem);
up_write(&ubi->fm_protect);
return -ENOMEM;
}
@ -1608,16 +1647,14 @@ int ubi_update_fastmap(struct ubi_device *ubi)
new_fm->e[0] = tmp_e;
}
down_write(&ubi->work_sem);
down_write(&ubi->fm_eba_sem);
ret = ubi_write_fastmap(ubi, new_fm);
up_write(&ubi->fm_eba_sem);
up_write(&ubi->work_sem);
if (ret)
goto err;
out_unlock:
up_write(&ubi->fm_eba_sem);
up_write(&ubi->work_sem);
up_write(&ubi->fm_protect);
kfree(old_fm);
return ret;

View file

@ -705,6 +705,8 @@ struct ubi_ainf_volume {
* @erase: list of physical eraseblocks which have to be erased
* @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
* those belonging to "preserve"-compatible internal volumes)
* @fastmap: list of physical eraseblocks which relate to fastmap (e.g.,
* eraseblocks of the current and not yet erased old fastmap blocks)
* @corr_peb_count: count of PEBs in the @corr list
* @empty_peb_count: count of PEBs which are presumably empty (contain only
* 0xFF bytes)
@ -715,6 +717,8 @@ struct ubi_ainf_volume {
* @vols_found: number of volumes found
* @highest_vol_id: highest volume ID
* @is_empty: flag indicating whether the MTD device is empty or not
* @force_full_scan: flag indicating whether we need to do a full scan and drop
all existing Fastmap data structures
* @min_ec: lowest erase counter value
* @max_ec: highest erase counter value
* @max_sqnum: highest sequence number value
@ -733,6 +737,7 @@ struct ubi_attach_info {
struct list_head free;
struct list_head erase;
struct list_head alien;
struct list_head fastmap;
int corr_peb_count;
int empty_peb_count;
int alien_peb_count;
@ -741,6 +746,7 @@ struct ubi_attach_info {
int vols_found;
int highest_vol_id;
int is_empty;
int force_full_scan;
int min_ec;
int max_ec;
unsigned long long max_sqnum;
@ -919,7 +925,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
size_t ubi_calc_fm_size(struct ubi_device *ubi);
int ubi_update_fastmap(struct ubi_device *ubi);
int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
int fm_anchor);
struct ubi_attach_info *scan_ai);
#else
static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; }
#endif
@ -1113,4 +1119,42 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
return idx;
}
/**
* ubi_is_fm_vol - check whether a volume ID is a Fastmap volume.
* @vol_id: volume ID
*/
static inline bool ubi_is_fm_vol(int vol_id)
{
switch (vol_id) {
case UBI_FM_SB_VOLUME_ID:
case UBI_FM_DATA_VOLUME_ID:
return true;
}
return false;
}
/**
* ubi_find_fm_block - check whether a PEB is part of the current Fastmap.
* @ubi: UBI device description object
* @pnum: physical eraseblock to look for
*
* This function returns a wear leveling object if @pnum relates to the current
* fastmap, @NULL otherwise.
*/
static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi,
int pnum)
{
int i;
if (ubi->fm) {
for (i = 0; i < ubi->fm->used_blocks; i++) {
if (ubi->fm->e[i]->pnum == pnum)
return ubi->fm->e[i];
}
}
return NULL;
}
#endif /* !__UBI_UBI_H__ */

View file

@ -599,7 +599,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
* failure.
*/
static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
int vol_id, int lnum, int torture)
int vol_id, int lnum, int torture, bool nested)
{
struct ubi_work *wl_wrk;
@ -618,7 +618,10 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
wl_wrk->lnum = lnum;
wl_wrk->torture = torture;
schedule_ubi_work(ubi, wl_wrk);
if (nested)
__schedule_ubi_work(ubi, wl_wrk);
else
schedule_ubi_work(ubi, wl_wrk);
return 0;
}
@ -679,6 +682,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
if (!vid_hdr)
return -ENOMEM;
down_read(&ubi->fm_eba_sem);
mutex_lock(&ubi->move_mutex);
spin_lock(&ubi->wl_lock);
ubi_assert(!ubi->move_from && !ubi->move_to);
@ -919,6 +923,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
dbg_wl("done");
mutex_unlock(&ubi->move_mutex);
up_read(&ubi->fm_eba_sem);
return 0;
/*
@ -969,6 +974,7 @@ out_not_moved:
}
mutex_unlock(&ubi->move_mutex);
up_read(&ubi->fm_eba_sem);
return 0;
out_error:
@ -990,6 +996,7 @@ out_error:
out_ro:
ubi_ro_mode(ubi);
mutex_unlock(&ubi->move_mutex);
up_read(&ubi->fm_eba_sem);
ubi_assert(err != 0);
return err < 0 ? err : -EIO;
@ -997,6 +1004,7 @@ out_cancel:
ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock);
mutex_unlock(&ubi->move_mutex);
up_read(&ubi->fm_eba_sem);
ubi_free_vid_hdr(ubi, vid_hdr);
return 0;
}
@ -1119,7 +1127,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
int err1;
/* Re-schedule the LEB for erasure */
err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
if (err1) {
wl_entry_destroy(ubi, e);
err = err1;
@ -1315,7 +1323,7 @@ retry:
}
spin_unlock(&ubi->wl_lock);
err = schedule_erase(ubi, e, vol_id, lnum, torture);
err = schedule_erase(ubi, e, vol_id, lnum, torture, false);
if (err) {
spin_lock(&ubi->wl_lock);
wl_tree_add(e, &ubi->used);
@ -1750,6 +1758,48 @@ static void shutdown_work(struct ubi_device *ubi)
}
}
/**
* erase_aeb - erase a PEB given in UBI attach info PEB
* @ubi: UBI device description object
* @aeb: UBI attach info PEB
* @sync: If true, erase synchronously. Otherwise schedule for erasure
*/
static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
{
struct ubi_wl_entry *e;
int err;
e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
if (!e)
return -ENOMEM;
e->pnum = aeb->pnum;
e->ec = aeb->ec;
e->tagged_scrub_all = 0;
e->sqnum = aeb->sqnum;
ubi->lookuptbl[e->pnum] = e;
if (sync) {
err = sync_erase(ubi, e, false);
if (err)
goto out_free;
wl_tree_add(e, &ubi->free);
ubi->free_count++;
} else {
err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
if (err)
goto out_free;
}
return 0;
out_free:
wl_entry_destroy(ubi, e);
return err;
}
/**
* ubi_wl_init - initialize the WL sub-system using attaching information.
* @ubi: UBI device description object
@ -1787,20 +1837,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
cond_resched();
e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
if (!e)
err = erase_aeb(ubi, aeb, false);
if (err)
goto out_free;
e->pnum = aeb->pnum;
e->ec = aeb->ec;
e->tagged_scrub_all = 0;
e->sqnum = aeb->sqnum;
ubi->lookuptbl[e->pnum] = e;
if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
wl_entry_destroy(ubi, e);
goto out_free;
}
found_pebs++;
}
@ -1854,19 +1894,49 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
}
}
list_for_each_entry(aeb, &ai->fastmap, u.list) {
cond_resched();
e = ubi_find_fm_block(ubi, aeb->pnum);
if (e) {
ubi_assert(!ubi->lookuptbl[e->pnum]);
ubi->lookuptbl[e->pnum] = e;
} else {
bool sync = false;
/*
* Usually old Fastmap PEBs are scheduled for erasure
* and we don't have to care about them but if we face
* an power cut before scheduling them we need to
* take care of them here.
*/
if (ubi->lookuptbl[aeb->pnum])
continue;
/*
* The fastmap update code might not find a free PEB for
* writing the fastmap anchor to and then reuses the
* current fastmap anchor PEB. When this PEB gets erased
* and a power cut happens before it is written again we
* must make sure that the fastmap attach code doesn't
* find any outdated fastmap anchors, hence we erase the
* outdated fastmap anchor PEBs synchronously here.
*/
if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
sync = true;
err = erase_aeb(ubi, aeb, sync);
if (err)
goto out_free;
}
found_pebs++;
}
dbg_wl("found %i PEBs", found_pebs);
if (ubi->fm) {
ubi_assert(ubi->good_peb_count ==
found_pebs + ubi->fm->used_blocks);
for (i = 0; i < ubi->fm->used_blocks; i++) {
e = ubi->fm->e[i];
ubi->lookuptbl[e->pnum] = e;
}
}
else
ubi_assert(ubi->good_peb_count == found_pebs);
ubi_assert(ubi->good_peb_count == found_pebs);
reserved_pebs = WL_RESERVED_PEBS;
ubi_fastmap_init(ubi, &reserved_pebs);

View file

@ -9278,6 +9278,15 @@ static int tg3_chip_reset(struct tg3 *tp)
tg3_restore_clk(tp);
/* Increase the core clock speed to fix tx timeout issue for 5762
* with 100Mbps link speed.
*/
if (tg3_asic_rev(tp) == ASIC_REV_5762) {
val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE);
tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val |
TG3_CPMU_MAC_ORIDE_ENABLE);
}
/* Reprobe ASF enable state. */
tg3_flag_clear(tp, ENABLE_ASF);
tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK |

View file

@ -1265,11 +1265,8 @@ static int gen10g_resume(struct phy_device *phydev)
static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
{
/* The default values for phydev->supported are provided by the PHY
* driver "features" member, we want to reset to sane defaults first
* before supporting higher speeds.
*/
phydev->supported &= PHY_DEFAULT_FEATURES;
phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
PHY_10BT_FEATURES);
switch (max_speed) {
default:

View file

@ -88,6 +88,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
case PTP_PF_PHYSYNC:
if (chan != 0)
return -EINVAL;
break;
default:
return -EINVAL;
}

View file

@ -909,6 +909,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
spin_unlock_irqrestore(&xhci->lock, flags);
}
static bool xhci_pending_portevent(struct xhci_hcd *xhci)
{
__le32 __iomem **port_array;
int port_index;
u32 status;
u32 portsc;
status = readl(&xhci->op_regs->status);
if (status & STS_EINT)
return true;
/*
* Checking STS_EINT is not enough as there is a lag between a change
* bit being set and the Port Status Change Event that it generated
* being written to the Event Ring. See note in xhci 1.1 section 4.19.2.
*/
port_index = xhci->num_usb2_ports;
port_array = xhci->usb2_ports;
while (port_index--) {
portsc = readl(port_array[port_index]);
if (portsc & PORT_CHANGE_MASK ||
(portsc & PORT_PLS_MASK) == XDEV_RESUME)
return true;
}
port_index = xhci->num_usb3_ports;
port_array = xhci->usb3_ports;
while (port_index--) {
portsc = readl(port_array[port_index]);
if (portsc & PORT_CHANGE_MASK ||
(portsc & PORT_PLS_MASK) == XDEV_RESUME)
return true;
}
return false;
}
/*
* Stop HC (not bus-specific)
*
@ -1006,7 +1041,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend);
*/
int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
{
u32 command, temp = 0, status;
u32 command, temp = 0;
struct usb_hcd *hcd = xhci_to_hcd(xhci);
struct usb_hcd *secondary_hcd;
int retval = 0;
@ -1128,8 +1163,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
done:
if (retval == 0) {
/* Resume root hubs only when have pending events. */
status = readl(&xhci->op_regs->status);
if (status & STS_EINT) {
if (xhci_pending_portevent(xhci)) {
usb_hcd_resume_root_hub(xhci->shared_hcd);
usb_hcd_resume_root_hub(hcd);
}

View file

@ -382,6 +382,10 @@ struct xhci_op_regs {
#define PORT_PLC (1 << 22)
/* port configure error change - port failed to configure its link partner */
#define PORT_CEC (1 << 23)
#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \
PORT_RC | PORT_PLC | PORT_CEC)
/* Cold Attach Status - xHC can set this bit to report device attached during
* Sx state. Warm port reset should be perfomed to clear this bit and move port
* to connected state.

View file

@ -613,13 +613,21 @@ static void fat_set_state(struct super_block *sb,
brelse(bh);
}
static void fat_reset_iocharset(struct fat_mount_options *opts)
{
if (opts->iocharset != fat_default_iocharset) {
/* Note: opts->iocharset can be NULL here */
kfree(opts->iocharset);
opts->iocharset = fat_default_iocharset;
}
}
static void delayed_free(struct rcu_head *p)
{
struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
fat_reset_iocharset(&sbi->options);
kfree(sbi);
}
@ -1035,7 +1043,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
opts->fs_fmask = opts->fs_dmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
opts->iocharset = fat_default_iocharset;
fat_reset_iocharset(opts);
if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
opts->rodir = 0;
@ -1185,8 +1193,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
/* vfat specific */
case Opt_charset:
if (opts->iocharset != fat_default_iocharset)
kfree(opts->iocharset);
fat_reset_iocharset(opts);
iocharset = match_strdup(&args[0]);
if (!iocharset)
return -ENOMEM;
@ -1777,8 +1784,7 @@ out_fail:
iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
fat_reset_iocharset(&sbi->options);
sb->s_fs_info = NULL;
kfree(sbi);
return error;

View file

@ -79,6 +79,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@ -332,6 +333,31 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
#endif
seq_printf(m, "\nSpeculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
case -EINVAL:
seq_printf(m, "unknown");
break;
case PR_SPEC_NOT_AFFECTED:
seq_printf(m, "not vulnerable");
break;
case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
seq_printf(m, "thread force mitigated");
break;
case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
seq_printf(m, "thread mitigated");
break;
case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
seq_printf(m, "thread vulnerable");
break;
case PR_SPEC_DISABLE:
seq_printf(m, "globally mitigated");
break;
default:
seq_printf(m, "vulnerable");
break;
}
seq_putc(m, '\n');
}
static inline void task_context_switch_counts(struct seq_file *m,

View file

@ -59,6 +59,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,

View file

@ -7,6 +7,8 @@
#define _LINUX_NOSPEC_H
#include <asm/barrier.h>
struct task_struct;
/**
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
* @index: array element index
@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
\
(typeof(_i)) (_i & _mask); \
})
/* Speculation control prctl */
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
unsigned long ctrl);
/* Speculation control for seccomp enforced mitigation */
void arch_seccomp_spec_mitigate(struct task_struct *task);
#endif /* _LINUX_NOSPEC_H */

View file

@ -2457,6 +2457,8 @@ static inline void memalloc_noio_restore(unsigned int flags)
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
@ -2480,6 +2482,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
/*
* task->jobctl flags
*/

View file

@ -3,7 +3,8 @@
#include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW)
#ifdef CONFIG_SECCOMP

View file

@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
* @xmit_more: More SKBs are pending for this queue
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@ -594,8 +595,8 @@ struct sk_buff {
fclone:2,
peeked:1,
head_frag:1,
xmit_more:1;
/* one bit hole */
xmit_more:1,
pfmemalloc:1;
kmemcheck_bitfield_end(flags1);
/* fields enclosed in headers_start/headers_end are copied
@ -615,19 +616,18 @@ struct sk_buff {
__u8 __pkt_type_offset[0];
__u8 pkt_type:3;
__u8 pfmemalloc:1;
__u8 ignore_df:1;
__u8 nfctinfo:3;
__u8 nf_trace:1;
__u8 ip_summed:2;
__u8 ooo_okay:1;
__u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
__u8 no_fcs:1;
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
@ -635,11 +635,11 @@ struct sk_buff {
__u8 csum_complete_sw:1;
__u8 csum_level:2;
__u8 csum_bad:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
__u8 remcsum_offload:1;
/* 3 or 5 bit hole */

View file

@ -762,7 +762,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
* to minimize possbility that any useful information to an
* attacker is leaked. Only lower 20 bits are relevant.
*/
rol32(hash, 16);
hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;

View file

@ -206,4 +206,16 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
/* Per task speculation control */
#define PR_GET_SPECULATION_CTRL 52
#define PR_SET_SPECULATION_CTRL 53
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
# define PR_SPEC_ENABLE (1UL << 1)
# define PR_SPEC_DISABLE (1UL << 2)
# define PR_SPEC_FORCE_DISABLE (1UL << 3)
#endif /* _LINUX_PRCTL_H */

View file

@ -15,7 +15,9 @@
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC 1
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
/*
* All BPF programs must return a 32-bit value.

View file

@ -16,6 +16,8 @@
#include <linux/atomic.h>
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched.h>
#include <linux/seccomp.h>
#include <linux/slab.h>
@ -214,8 +216,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
return true;
}
void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
static inline void seccomp_assign_mode(struct task_struct *task,
unsigned long seccomp_mode)
unsigned long seccomp_mode,
unsigned long flags)
{
assert_spin_locked(&task->sighand->siglock);
@ -225,6 +230,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
* filter) is set.
*/
smp_mb__before_atomic();
/* Assume default seccomp processes want spec flaw mitigation. */
if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
arch_seccomp_spec_mitigate(task);
set_tsk_thread_flag(task, TIF_SECCOMP);
}
@ -292,7 +300,7 @@ static inline pid_t seccomp_can_sync_threads(void)
* without dropping the locks.
*
*/
static inline void seccomp_sync_threads(void)
static inline void seccomp_sync_threads(unsigned long flags)
{
struct task_struct *thread, *caller;
@ -333,7 +341,8 @@ static inline void seccomp_sync_threads(void)
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
flags);
}
}
@ -452,7 +461,7 @@ static long seccomp_attach_filter(unsigned int flags,
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
seccomp_sync_threads();
seccomp_sync_threads(flags);
return 0;
}
@ -747,7 +756,7 @@ static long seccomp_set_mode_strict(void)
#ifdef TIF_NOTSC
disable_TSC();
#endif
seccomp_assign_mode(current, seccomp_mode);
seccomp_assign_mode(current, seccomp_mode, 0);
ret = 0;
out:
@ -805,7 +814,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
/* Do not free the successfully attached filter. */
prepared = NULL;
seccomp_assign_mode(current, seccomp_mode);
seccomp_assign_mode(current, seccomp_mode, flags);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)

View file

@ -2224,6 +2224,17 @@ static int prctl_set_vma(unsigned long opt, unsigned long start,
}
#endif
int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
{
return -EINVAL;
}
int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
unsigned long ctrl)
{
return -EINVAL;
}
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@ -2444,6 +2455,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
break;
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
case PR_GET_SPECULATION_CTRL:
if (arg3 || arg4 || arg5)
return -EINVAL;
error = arch_prctl_spec_ctrl_get(me, arg2);
break;
case PR_SET_SPECULATION_CTRL:
if (arg4 || arg5)
return -EINVAL;
error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
break;
default:
error = -EINVAL;

View file

@ -670,8 +670,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
static size_t rounded_hashtable_size(const struct rhashtable_params *params)
{
return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
(unsigned long)params->min_size);
size_t retsize;
if (params->nelem_hint)
retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
(unsigned long)params->min_size);
else
retsize = max(HASH_DEFAULT_SIZE,
(unsigned long)params->min_size);
return retsize;
}
static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
@ -728,8 +736,6 @@ int rhashtable_init(struct rhashtable *ht,
struct bucket_table *tbl;
size_t size;
size = HASH_DEFAULT_SIZE;
if ((!params->key_len && !params->obj_hashfn) ||
(params->obj_hashfn && !params->obj_cmpfn))
return -EINVAL;
@ -756,8 +762,7 @@ int rhashtable_init(struct rhashtable *ht,
ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
if (params->nelem_hint)
size = rounded_hashtable_size(&ht->p);
size = rounded_hashtable_size(&ht->p);
/* The maximum (not average) chain length grows with the
* size of the hash table, at a rate of (log N)/(log log N).

View file

@ -996,7 +996,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
int nid, zid;
int i;
while ((memcg = parent_mem_cgroup(memcg))) {
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
for_each_node(nid) {
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
mz = &memcg->nodeinfo[nid]->zoneinfo[zid];

View file

@ -854,6 +854,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->cloned = 1;
n->nohdr = 0;
n->peeked = 0;
C(pfmemalloc);
n->destructor = NULL;
C(tail);
C(end);

View file

@ -297,6 +297,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
.flowi4_scope = scope,

View file

@ -145,8 +145,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
high = make_kgid(user_ns, urange[1]);
if (!gid_valid(low) || !gid_valid(high) ||
(urange[1] < urange[0]) || gid_lt(high, low)) {
if (!gid_valid(low) || !gid_valid(high))
return -EINVAL;
if (urange[1] < urange[0] || gid_lt(high, low)) {
low = make_kgid(&init_user_ns, 1);
high = make_kgid(&init_user_ns, 0);
}

View file

@ -1476,15 +1476,19 @@ TEST_F(TRACE_syscall, syscall_dropped)
#define SECCOMP_SET_MODE_FILTER 1
#endif
#ifndef SECCOMP_FLAG_FILTER_TSYNC
#define SECCOMP_FLAG_FILTER_TSYNC 1
#ifndef SECCOMP_FILTER_FLAG_TSYNC
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
#endif
#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#endif
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
int seccomp(unsigned int op, unsigned int flags, void *args)
{
errno = 0;
return syscall(__NR_seccomp, op, flags, filter);
return syscall(__NR_seccomp, op, flags, args);
}
#endif
@ -1576,6 +1580,78 @@ TEST(seccomp_syscall_mode_lock)
}
}
/*
* Test detection of known and unknown filter flags. Userspace needs to be able
* to check if a filter flag is supported by the current kernel and a good way
* of doing that is by attempting to enter filter mode, with the flag bit in
* question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
* that the flag is valid and EINVAL indicates that the flag is invalid.
*/
TEST(detect_seccomp_filter_flags)
{
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_SPEC_ALLOW };
unsigned int flag, all_flags;
int i;
long ret;
/* Test detection of known-good filter flags */
for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
int bits = 0;
flag = flags[i];
/* Make sure the flag is a single bit! */
while (flag) {
if (flag & 0x1)
bits ++;
flag >>= 1;
}
ASSERT_EQ(1, bits);
flag = flags[i];
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_EQ(-1, ret);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
flag);
}
all_flags |= flag;
}
/* Test detection of all known-good filter flags */
ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
all_flags);
}
/* Test detection of an unknown filter flag */
flag = -1;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
flag);
}
/*
* Test detection of an unknown filter flag that may simply need to be
* added to this test
*/
flag = flags[ARRAY_SIZE(flags) - 1] << 1;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
flag);
}
}
TEST(TSYNC_first)
{
struct sock_filter filter[] = {
@ -1592,7 +1668,7 @@ TEST(TSYNC_first)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
@ -1810,7 +1886,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor)
self->sibling_count++;
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Could install filter on all threads!");
@ -1871,7 +1947,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
@ -1919,7 +1995,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
self->sibling_count++;
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(self->sibling[0].system_tid, ret) {
TH_LOG("Did not fail on diverged sibling.");
@ -1971,7 +2047,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(ret, self->sibling[0].system_tid) {
TH_LOG("Did not fail on diverged sibling.");
@ -2000,7 +2076,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
/* Switch to the remaining sibling */
sib = !sib;
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Expected the remaining sibling to sync");
@ -2023,7 +2099,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
while (!kill(self->sibling[sib].system_tid, 0))
sleep(0.1);
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret); /* just us chickens */
}

View file

@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work)
{
struct kvm_kernel_irqfd *irqfd =
container_of(work, struct kvm_kernel_irqfd, shutdown);
struct kvm *kvm = irqfd->kvm;
u64 cnt;
/* Make sure irqfd has been initalized in assign path. */
synchronize_srcu(&kvm->irq_srcu);
/*
* Synchronize with the wait-queue and unhook ourselves to prevent
* further events.
@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
idx = srcu_read_lock(&kvm->irq_srcu);
irqfd_update(kvm, irqfd);
srcu_read_unlock(&kvm->irq_srcu, idx);
list_add_tail(&irqfd->list, &kvm->irqfds.items);
@ -419,6 +422,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
irqfd->consumer.token, ret);
#endif
srcu_read_unlock(&kvm->irq_srcu, idx);
return 0;
fail: