This is the 4.4.180 stable release

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAlzdoa4ACgkQONu9yGCS
 aT6Rdg/+Ph+FR5Y8FI3lAJWSbdWw7ePiiN6LQ7NjM46nWaVWE3KzQoMHTN+qE69Q
 bZLnjSs73uFD2CdVZdxWG/2nkn6XNauEKOCZNeVXxvwbHokyWsTQv4cLaGqngbLw
 1DO02kgs5IXJJuvn41M0R+jU2Sxnj7yck/JDowB3OiMtM3nkdggBAr318ebPXrKb
 ZSWjRqI3d02MJxJO+/sUrrSfiVBiOtpj/2YbSw64vATg5mLrEfsmIuv2H2LZFcGK
 D8v6LKS1m2yERfs8Q361ha/NPKRW9jRsde9VvwKLTisXMDIFJNZL0XcvnfXO/oye
 VWf3TLk7dWUiyxD53LIQ/9S+9Pgv0qP8vgzw5s7BEYGDlUBcqFoZrKyMzCQgogon
 5lo0YQZkvQvXLKqbJVp1OFDASdD6YXXRgvtxGqzlhtS5513MT2p+gWBv164ZPjQn
 TCmqlJiR15QIoahxfDvSHzRssOofNCVVl1SS0Lx30N8++DopuEUc+86xmjE4iOq7
 gNHyrsKqHH/LRnVLaippnVm/fL/6zgiQ/ZaiZ2AWIGzGLGhqgbXTStsTNh6XZ0+c
 mIVUV3Tya/bP1II63R7PaQqoLBtJbARW/bDnbpHgnTDaBH30UcgI0LZx549/QELl
 0SNn8K0QkGqE5EgqGvchX+KMBIqMf1NXoAffeoZopSkdK3DMGB4=
 =37ec
 -----END PGP SIGNATURE-----

Merge 4.4.180 into android-4.4-p

Changes in 4.4.180
	kbuild: simplify ld-option implementation
	KVM: fail KVM_SET_VCPU_EVENTS with invalid exception number
	cifs: do not attempt cifs operation on smb2+ rename error
	MIPS: scall64-o32: Fix indirect syscall number load
	trace: Fix preempt_enable_no_resched() abuse
	sched/numa: Fix a possible divide-by-zero
	ceph: ensure d_name stability in ceph_dentry_hash()
	ceph: fix ci->i_head_snapc leak
	nfsd: Don't release the callback slot unless it was actually held
	sunrpc: don't mark uninitialised items as VALID.
	USB: Add new USB LPM helpers
	USB: Consolidate LPM checks to avoid enabling LPM twice
	powerpc/xmon: Add RFI flush related fields to paca dump
	powerpc/64s: Improve RFI L1-D cache flush fallback
	powerpc/pseries: Support firmware disable of RFI flush
	powerpc/powernv: Support firmware disable of RFI flush
	powerpc/rfi-flush: Move the logic to avoid a redo into the debugfs code
	powerpc/rfi-flush: Make it possible to call setup_rfi_flush() again
	powerpc/rfi-flush: Always enable fallback flush on pseries
	powerpc/rfi-flush: Differentiate enabled and patched flush types
	powerpc/pseries: Add new H_GET_CPU_CHARACTERISTICS flags
	powerpc/rfi-flush: Call setup_rfi_flush() after LPM migration
	powerpc: Add security feature flags for Spectre/Meltdown
	powerpc/pseries: Set or clear security feature flags
	powerpc/powernv: Set or clear security feature flags
	powerpc/64s: Move cpu_show_meltdown()
	powerpc/64s: Enhance the information in cpu_show_meltdown()
	powerpc/powernv: Use the security flags in pnv_setup_rfi_flush()
	powerpc/pseries: Use the security flags in pseries_setup_rfi_flush()
	powerpc/64s: Wire up cpu_show_spectre_v1()
	powerpc/64s: Wire up cpu_show_spectre_v2()
	powerpc/pseries: Fix clearing of security feature flags
	powerpc: Move default security feature flags
	powerpc/pseries: Restore default security feature flags on setup
	powerpc/64s: Fix section mismatch warnings from setup_rfi_flush()
	powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit
	powerpc/64s: Add barrier_nospec
	powerpc/64s: Add support for ori barrier_nospec patching
	powerpc/64s: Patch barrier_nospec in modules
	powerpc/64s: Enable barrier_nospec based on firmware settings
	powerpc/64: Use barrier_nospec in syscall entry
	powerpc: Use barrier_nospec in copy_from_user()
	powerpc/64s: Enhance the information in cpu_show_spectre_v1()
	powerpc64s: Show ori31 availability in spectre_v1 sysfs file not v2
	powerpc/64: Disable the speculation barrier from the command line
	powerpc/64: Make stf barrier PPC_BOOK3S_64 specific.
	powerpc/64: Add CONFIG_PPC_BARRIER_NOSPEC
	powerpc/64: Call setup_barrier_nospec() from setup_arch()
	powerpc/64: Make meltdown reporting Book3S 64 specific
	powerpc/fsl: Add barrier_nospec implementation for NXP PowerPC Book3E
	powerpc/asm: Add a patch_site macro & helpers for patching instructions
	powerpc/64s: Add new security feature flags for count cache flush
	powerpc/64s: Add support for software count cache flush
	powerpc/pseries: Query hypervisor for count cache flush settings
	powerpc/powernv: Query firmware for count cache flush settings
	powerpc: Avoid code patching freed init sections
	powerpc/fsl: Add infrastructure to fixup branch predictor flush
	powerpc/fsl: Add macro to flush the branch predictor
	powerpc/fsl: Fix spectre_v2 mitigations reporting
	powerpc/fsl: Add nospectre_v2 command line argument
	powerpc/fsl: Flush the branch predictor at each kernel entry (64bit)
	powerpc/fsl: Update Spectre v2 reporting
	powerpc/security: Fix spectre_v2 reporting
	powerpc/fsl: Fix the flush of branch predictor.
	tipc: handle the err returned from cmd header function
	slip: make slhc_free() silently accept an error pointer
	intel_th: gth: Fix an off-by-one in output unassigning
	fs/proc/proc_sysctl.c: Fix a NULL pointer dereference
	NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family.
	netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON
	tipc: check bearer name with right length in tipc_nl_compat_bearer_enable
	tipc: check link name with right length in tipc_nl_compat_link_set
	bpf: reject wrong sized filters earlier
	Revert "block/loop: Use global lock for ioctl() operation."
	ipv4: add sanity checks in ipv4_link_failure()
	team: fix possible recursive locking when add slaves
	net: stmmac: move stmmac_check_ether_addr() to driver probe
	ipv4: set the tcp_min_rtt_wlen range from 0 to one day
	powerpc/fsl: Enable runtime patching if nospectre_v2 boot arg is used
	powerpc/fsl: Flush branch predictor when entering KVM
	powerpc/fsl: Emulate SPRN_BUCSR register
	powerpc/fsl: Flush the branch predictor at each kernel entry (32 bit)
	powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms
	powerpc/fsl: Fixed warning: orphan section `__btb_flush_fixup'
	powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg
	Documentation: Add nospectre_v1 parameter
	usbnet: ipheth: prevent TX queue timeouts when device not ready
	usbnet: ipheth: fix potential null pointer dereference in ipheth_carrier_set
	qlcnic: Avoid potential NULL pointer dereference
	netfilter: bridge: set skb transport_header before entering NF_INET_PRE_ROUTING
	sc16is7xx: missing unregister/delete driver on error in sc16is7xx_init()
	usb: gadget: net2280: Fix overrun of OUT messages
	usb: gadget: net2280: Fix net2280_dequeue()
	usb: gadget: net2272: Fix net2272_dequeue()
	ARM: dts: pfla02: increase phy reset duration
	net: ks8851: Dequeue RX packets explicitly
	net: ks8851: Reassert reset pin if chip ID check fails
	net: ks8851: Delay requesting IRQ until opened
	net: ks8851: Set initial carrier state to down
	net: xilinx: fix possible object reference leak
	net: ibm: fix possible object reference leak
	net: ethernet: ti: fix possible object reference leak
	scsi: qla4xxx: fix a potential NULL pointer dereference
	usb: u132-hcd: fix resource leak
	ceph: fix use-after-free on symlink traversal
	scsi: zfcp: reduce flood of fcrscn1 trace records on multi-element RSCN
	libata: fix using DMA buffers on stack
	kconfig/[mn]conf: handle backspace (^H) key
	vfio/type1: Limit DMA mappings per container
	ALSA: line6: use dynamic buffers
	ipv4: ip_do_fragment: Preserve skb_iif during fragmentation
	ipv6/flowlabel: wait rcu grace period before put_pid()
	ipv6: invert flowlabel sharing check in process and user mode
	bnxt_en: Improve multicast address setup logic.
	packet: validate msg_namelen in send directly
	USB: yurex: Fix protection fault after device removal
	USB: w1 ds2490: Fix bug caused by improper use of altsetting array
	USB: core: Fix unterminated string returned by usb_string()
	USB: core: Fix bug caused by duplicate interface PM usage counter
	HID: debug: fix race condition with between rdesc_show() and device removal
	rtc: sh: Fix invalid alarm warning for non-enabled alarm
	igb: Fix WARN_ONCE on runtime suspend
	bonding: show full hw address in sysfs for slave entries
	jffs2: fix use-after-free on symlink traversal
	debugfs: fix use-after-free on symlink traversal
	rtc: da9063: set uie_unsupported when relevant
	vfio/pci: use correct format characters
	scsi: storvsc: Fix calculation of sub-channel count
	net: hns: Use NAPI_POLL_WEIGHT for hns driver
	net: hns: Fix WARNING when remove HNS driver with SMMU enabled
	hugetlbfs: fix memory leak for resv_map
	xsysace: Fix error handling in ace_setup
	ARM: orion: don't use using 64-bit DMA masks
	ARM: iop: don't use using 64-bit DMA masks
	usb: usbip: fix isoc packet num validation in get_pipe
	staging: iio: adt7316: allow adt751x to use internal vref for all dacs
	staging: iio: adt7316: fix the dac read calculation
	staging: iio: adt7316: fix the dac write calculation
	Input: snvs_pwrkey - initialize necessary driver data before enabling IRQ
	selinux: never allow relabeling on context mounts
	x86/mce: Improve error message when kernel cannot recover, p2
	media: v4l2: i2c: ov7670: Fix PLL bypass register values
	scsi: libsas: fix a race condition when smp task timeout
	ASoC:soc-pcm:fix a codec fixup issue in TDM case
	ASoC: cs4270: Set auto-increment bit for register writes
	ASoC: tlv320aic32x4: Fix Common Pins
	perf/x86/intel: Fix handling of wakeup_events for multi-entry PEBS
	scsi: csiostor: fix missing data copy in csio_scsi_err_handler()
	iommu/amd: Set exclusion range correctly
	genirq: Prevent use-after-free and work list corruption
	usb: dwc3: Fix default lpm_nyet_threshold value
	scsi: qla2xxx: Fix incorrect region-size setting in optrom SYSFS routines
	Bluetooth: hidp: fix buffer overflow
	Bluetooth: Align minimum encryption key size for LE and BR/EDR connections
	UAS: fix alignment of scatter/gather segments
	ipv6: fix a potential deadlock in do_ipv6_setsockopt()
	ASoC: Intel: avoid Oops if DMA setup fails
	timer/debug: Change /proc/timer_stats from 0644 to 0600
	netfilter: compat: initialize all fields in xt_init
	platform/x86: sony-laptop: Fix unintentional fall-through
	iio: adc: xilinx: fix potential use-after-free on remove
	HID: input: add mapping for Expose/Overview key
	HID: input: add mapping for keyboard Brightness Up/Down/Toggle keys
	libnvdimm/btt: Fix a kmemdup failure check
	s390/dasd: Fix capacity calculation for large volumes
	s390/3270: fix lockdep false positive on view->lock
	KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing
	tools lib traceevent: Fix missing equality check for strcmp
	init: initialize jump labels before command line option parsing
	ipvs: do not schedule icmp errors from tunnels
	s390: ctcm: fix ctcm_new_device error return code
	selftests/net: correct the return value for run_netsocktests
	gpu: ipu-v3: dp: fix CSC handling
	cw1200: fix missing unlock on error in cw1200_hw_scan()
	x86/vdso: Pass --eh-frame-hdr to the linker
	Don't jump to compute_result state from check_result state
	locking/static_keys: Provide DECLARE and well as DEFINE macros
	x86/microcode/intel: Add a helper which gives the microcode revision
	x86: stop exporting msr-index.h to userland
	bitops: avoid integer overflow in GENMASK(_ULL)
	x86/microcode/intel: Check microcode revision before updating sibling threads
	x86/MCE: Save microcode revision in machine check records
	x86/cpufeatures: Hide AMD-specific speculation flags
	x86/speculation: Support Enhanced IBRS on future CPUs
	x86/speculation: Simplify the CPU bug detection logic
	x86/bugs: Add AMD's variant of SSB_NO
	x86/bugs: Add AMD's SPEC_CTRL MSR usage
	x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features
	locking/atomics, asm-generic: Move some macros from <linux/bitops.h> to a new <linux/bits.h> file
	x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR
	x86/speculation: Remove SPECTRE_V2_IBRS in enum spectre_v2_mitigation
	x86/microcode: Make sure boot_cpu_data.microcode is up-to-date
	x86/microcode: Update the new microcode revision unconditionally
	x86/cpu: Sanitize FAM6_ATOM naming
	KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled
	x86/mm: Use WRITE_ONCE() when setting PTEs
	x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
	x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
	x86/speculation: Propagate information about RSB filling mitigation to sysfs
	x86/speculation: Update the TIF_SSBD comment
	x86/speculation: Clean up spectre_v2_parse_cmdline()
	x86/speculation: Remove unnecessary ret variable in cpu_show_common()
	x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common()
	x86/speculation: Disable STIBP when enhanced IBRS is in use
	x86/speculation: Rename SSBD update functions
	x86/speculation: Reorganize speculation control MSRs update
	x86/Kconfig: Select SCHED_SMT if SMP enabled
	sched: Add sched_smt_active()
	x86/speculation: Rework SMT state change
	x86/speculation: Reorder the spec_v2 code
	x86/speculation: Mark string arrays const correctly
	x86/speculataion: Mark command line parser data __initdata
	x86/speculation: Unify conditional spectre v2 print functions
	x86/speculation: Add command line control for indirect branch speculation
	x86/speculation: Prepare for per task indirect branch speculation control
	x86/process: Consolidate and simplify switch_to_xtra() code
	x86/speculation: Avoid __switch_to_xtra() calls
	x86/speculation: Prepare for conditional IBPB in switch_mm()
	x86/speculation: Split out TIF update
	x86/speculation: Prepare arch_smt_update() for PRCTL mode
	x86/speculation: Prevent stale SPEC_CTRL msr content
	x86/speculation: Add prctl() control for indirect branch speculation
	x86/speculation: Enable prctl mode for spectre_v2_user
	x86/speculation: Add seccomp Spectre v2 user space protection mode
	x86/speculation: Provide IBPB always command line options
	kvm: x86: Report STIBP on GET_SUPPORTED_CPUID
	x86/msr-index: Cleanup bit defines
	x86/speculation: Consolidate CPU whitelists
	x86/speculation/mds: Add basic bug infrastructure for MDS
	x86/speculation/mds: Add BUG_MSBDS_ONLY
	x86/kvm: Expose X86_FEATURE_MD_CLEAR to guests
	x86/speculation/mds: Add mds_clear_cpu_buffers()
	x86/speculation/mds: Clear CPU buffers on exit to user
	x86/speculation/mds: Conditionally clear CPU buffers on idle entry
	x86/speculation/mds: Add mitigation control for MDS
	x86/speculation/l1tf: Document l1tf in sysfs
	x86/speculation/mds: Add sysfs reporting for MDS
	x86/speculation/mds: Add mitigation mode VMWERV
	Documentation: Move L1TF to separate directory
	Documentation: Add MDS vulnerability documentation
	x86/cpu/bugs: Use __initconst for 'const' init data
	x86/speculation: Move arch_smt_update() call to after mitigation decisions
	x86/speculation/mds: Add SMT warning message
	x86/speculation/mds: Fix comment
	x86/speculation/mds: Print SMT vulnerable on MSBDS with mitigations off
	cpu/speculation: Add 'mitigations=' cmdline option
	x86/speculation: Support 'mitigations=' cmdline option
	x86/speculation/mds: Add 'mitigations=' support for MDS
	x86/mds: Add MDSUM variant to the MDS documentation
	Documentation: Correct the possible MDS sysfs values
	x86/speculation/mds: Fix documentation typo
	x86/bugs: Change L1TF mitigation string to match upstream
	USB: serial: use variable for status
	USB: serial: fix unthrottle races
	powerpc/64s: Include cpu header
	bridge: Fix error path for kobject_init_and_add()
	net: ucc_geth - fix Oops when changing number of buffers in the ring
	packet: Fix error path in packet_init
	vlan: disable SIOCSHWTSTAMP in container
	vrf: sit mtu should not be updated when vrf netdev is the link
	ipv4: Fix raw socket lookup for local traffic
	bonding: fix arp_validate toggling in active-backup mode
	drivers/virt/fsl_hypervisor.c: dereferencing error pointers in ioctl
	drivers/virt/fsl_hypervisor.c: prevent integer overflow in ioctl
	powerpc/booke64: set RI in default MSR
	powerpc/lib: fix book3s/32 boot failure due to code patching
	Linux 4.4.180

Change-Id: I72f6c596cc992689d95abc8b5d1303d6ec22b051
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman 2019-05-16 22:34:35 +02:00
commit 4521d273cf
231 changed files with 4182 additions and 989 deletions

View file

@ -277,6 +277,8 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities

View file

@ -0,0 +1,305 @@
MDS - Microarchitectural Data Sampling
======================================
Microarchitectural Data Sampling is a hardware vulnerability which allows
unprivileged speculative access to data which is available in various CPU
internal buffers.
Affected processors
-------------------
This vulnerability affects a wide range of Intel processors. The
vulnerability is not present on:
- Processors from AMD, Centaur and other non Intel vendors
- Older processor models, where the CPU family is < 6
- Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
- Intel processors which have the ARCH_CAP_MDS_NO bit set in the
IA32_ARCH_CAPABILITIES MSR.
Whether a processor is affected or not can be read out from the MDS
vulnerability file in sysfs. See :ref:`mds_sys_info`.
Not all processors are affected by all variants of MDS, but the mitigation
is identical for all of them so the kernel treats them as a single
vulnerability.
Related CVEs
------------
The following CVE entries are related to the MDS vulnerability:
============== ===== ===================================================
CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
============== ===== ===================================================
Problem
-------
When performing store, load, L1 refill operations, processors write data
into temporary microarchitectural structures (buffers). The data in the
buffer can be forwarded to load operations as an optimization.
Under certain conditions, usually a fault/assist caused by a load
operation, data unrelated to the load memory address can be speculatively
forwarded from the buffers. Because the load operation causes a fault or
assist and its result will be discarded, the forwarded data will not cause
incorrect program execution or state changes. But a malicious operation
may be able to forward this speculative data to a disclosure gadget which
allows in turn to infer the value via a cache side channel attack.
Because the buffers are potentially shared between Hyper-Threads cross
Hyper-Thread attacks are possible.
Deeper technical information is available in the MDS specific x86
architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
Attack scenarios
----------------
Attacks against the MDS vulnerabilities can be mounted from malicious non
priviledged user space applications running on hosts or guest. Malicious
guest OSes can obviously mount attacks as well.
Contrary to other speculation based vulnerabilities the MDS vulnerability
does not allow the attacker to control the memory target address. As a
consequence the attacks are purely sampling based, but as demonstrated with
the TLBleed attack samples can be postprocessed successfully.
Web-Browsers
^^^^^^^^^^^^
It's unclear whether attacks through Web-Browsers are possible at
all. The exploitation through Java-Script is considered very unlikely,
but other widely used web technologies like Webassembly could possibly be
abused.
.. _mds_sys_info:
MDS system information
-----------------------
The Linux kernel provides a sysfs interface to enumerate the current MDS
status of the system: whether the system is vulnerable, and which
mitigations are active. The relevant sysfs file is:
/sys/devices/system/cpu/vulnerabilities/mds
The possible values in this file are:
.. list-table::
* - 'Not affected'
- The processor is not vulnerable
* - 'Vulnerable'
- The processor is vulnerable, but no mitigation enabled
* - 'Vulnerable: Clear CPU buffers attempted, no microcode'
- The processor is vulnerable but microcode is not updated.
The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
* - 'Mitigation: Clear CPU buffers'
- The processor is vulnerable and the CPU buffer clearing mitigation is
enabled.
If the processor is vulnerable then the following information is appended
to the above information:
======================== ============================================
'SMT vulnerable' SMT is enabled
'SMT mitigated' SMT is enabled and mitigated
'SMT disabled' SMT is disabled
'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
======================== ============================================
.. _vmwerv:
Best effort mitigation mode
^^^^^^^^^^^^^^^^^^^^^^^^^^^
If the processor is vulnerable, but the availability of the microcode based
mitigation mechanism is not advertised via CPUID the kernel selects a best
effort mitigation mode. This mode invokes the mitigation instructions
without a guarantee that they clear the CPU buffers.
This is done to address virtualization scenarios where the host has the
microcode update applied, but the hypervisor is not yet updated to expose
the CPUID to the guest. If the host has updated microcode the protection
takes effect otherwise a few cpu cycles are wasted pointlessly.
The state in the mds sysfs file reflects this situation accordingly.
Mitigation mechanism
-------------------------
The kernel detects the affected CPUs and the presence of the microcode
which is required.
If a CPU is affected and the microcode is available, then the kernel
enables the mitigation by default. The mitigation can be controlled at boot
time via a kernel command line option. See
:ref:`mds_mitigation_control_command_line`.
.. _cpu_buffer_clear:
CPU buffer clearing
^^^^^^^^^^^^^^^^^^^
The mitigation for MDS clears the affected CPU buffers on return to user
space and when entering a guest.
If SMT is enabled it also clears the buffers on idle entry when the CPU
is only affected by MSBDS and not any other MDS variant, because the
other variants cannot be protected against cross Hyper-Thread attacks.
For CPUs which are only affected by MSBDS the user space, guest and idle
transition mitigations are sufficient and SMT is not affected.
.. _virt_mechanism:
Virtualization mitigation
^^^^^^^^^^^^^^^^^^^^^^^^^
The protection for host to guest transition depends on the L1TF
vulnerability of the CPU:
- CPU is affected by L1TF:
If the L1D flush mitigation is enabled and up to date microcode is
available, the L1D flush mitigation is automatically protecting the
guest transition.
If the L1D flush mitigation is disabled then the MDS mitigation is
invoked explicit when the host MDS mitigation is enabled.
For details on L1TF and virtualization see:
:ref:`Documentation/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
- CPU is not affected by L1TF:
CPU buffers are flushed before entering the guest when the host MDS
mitigation is enabled.
The resulting MDS protection matrix for the host to guest transition:
============ ===== ============= ============ =================
L1TF MDS VMX-L1FLUSH Host MDS MDS-State
Don't care No Don't care N/A Not affected
Yes Yes Disabled Off Vulnerable
Yes Yes Disabled Full Mitigated
Yes Yes Enabled Don't care Mitigated
No Yes N/A Off Vulnerable
No Yes N/A Full Mitigated
============ ===== ============= ============ =================
This only covers the host to guest transition, i.e. prevents leakage from
host to guest, but does not protect the guest internally. Guests need to
have their own protections.
.. _xeon_phi:
XEON PHI specific considerations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The XEON PHI processor family is affected by MSBDS which can be exploited
cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
to use MWAIT in user space (Ring 3) which opens an potential attack vector
for malicious user space. The exposure can be disabled on the kernel
command line with the 'ring3mwait=disable' command line option.
XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
before the CPU enters a idle state. As XEON PHI is not affected by L1TF
either disabling SMT is not required for full protection.
.. _mds_smt_control:
SMT control
^^^^^^^^^^^
All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
means on CPUs which are affected by MFBDS or MLPDS it is necessary to
disable SMT for full protection. These are most of the affected CPUs; the
exception is XEON PHI, see :ref:`xeon_phi`.
Disabling SMT can have a significant performance impact, but the impact
depends on the type of workloads.
See the relevant chapter in the L1TF mitigation documentation for details:
:ref:`Documentation/hw-vuln/l1tf.rst <smt_control>`.
.. _mds_mitigation_control_command_line:
Mitigation control on the kernel command line
---------------------------------------------
The kernel command line allows to control the MDS mitigations at boot
time with the option "mds=". The valid arguments for this option are:
============ =============================================================
full If the CPU is vulnerable, enable all available mitigations
for the MDS vulnerability, CPU buffer clearing on exit to
userspace and when entering a VM. Idle transitions are
protected as well if SMT is enabled.
It does not automatically disable SMT.
off Disables MDS mitigations completely.
============ =============================================================
Not specifying this option is equivalent to "mds=full".
Mitigation selection guide
--------------------------
1. Trusted userspace
^^^^^^^^^^^^^^^^^^^^
If all userspace applications are from a trusted source and do not
execute untrusted code which is supplied externally, then the mitigation
can be disabled.
2. Virtualization with trusted guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The same considerations as above versus trusted user space apply.
3. Virtualization with untrusted guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The protection depends on the state of the L1TF mitigations.
See :ref:`virt_mechanism`.
If the MDS mitigation is enabled and SMT is disabled, guest to host and
guest to guest attacks are prevented.
.. _mds_default_mitigations:
Default mitigations
-------------------
The kernel default mitigations for vulnerable processors are:
- Enable CPU buffer clearing
The kernel does not by default enforce the disabling of SMT, which leaves
SMT systems vulnerable when running untrusted code. The same rationale as
for L1TF applies.
See :ref:`Documentation/hw-vuln//l1tf.rst <default_mitigations>`.

View file

@ -2076,6 +2076,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Format: <first>,<last>
Specifies range of consoles to be captured by the MDA.
mds= [X86,INTEL]
Control mitigation for the Micro-architectural Data
Sampling (MDS) vulnerability.
Certain CPUs are vulnerable to an exploit against CPU
internal buffers which can forward information to a
disclosure gadget under certain conditions.
In vulnerable processors, the speculatively
forwarded data can be used in a cache side channel
attack, to access data to which the attacker does
not have direct access.
This parameter controls the MDS mitigation. The
options are:
full - Enable MDS mitigation on vulnerable CPUs
off - Unconditionally disable MDS mitigation
Not specifying this option is equivalent to
mds=full.
For details see: Documentation/hw-vuln/mds.rst
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used when the kernel is not able
to see the whole system memory or for test.
@ -2190,6 +2214,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
in the "bleeding edge" mini2440 support kernel at
http://repo.or.cz/w/linux-2.6/mini2440.git
mitigations=
[X86] Control optional mitigations for CPU
vulnerabilities. This is a set of curated,
arch-independent options, each of which is an
aggregation of existing arch-specific options.
off
Disable all optional CPU mitigations. This
improves system performance, but it may also
expose users to several CPU vulnerabilities.
Equivalent to: nopti [X86]
nospectre_v2 [X86]
spectre_v2_user=off [X86]
spec_store_bypass_disable=off [X86]
mds=off [X86]
auto (default)
Mitigate all CPU vulnerabilities, but leave SMT
enabled, even if it's vulnerable. This is for
users who don't want to be surprised by SMT
getting disabled across kernel upgrades, or who
have other ways of avoiding SMT-based attacks.
Equivalent to: (default behavior)
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
parameter allows control of the logging verbosity for
@ -2510,7 +2558,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
check bypass). With this option data leaks are possible
in the system.
nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
(indirect branch prediction) vulnerability. System may
allow data leaks with this option, which is equivalent
to spectre_v2=off.
@ -3664,9 +3716,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
user space attacks.
on - unconditionally enable
off - unconditionally disable
on - unconditionally enable, implies
spectre_v2_user=on
off - unconditionally disable, implies
spectre_v2_user=off
auto - kernel detects whether your CPU model is
vulnerable
@ -3676,6 +3732,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
CONFIG_RETPOLINE configuration option, and the
compiler with which the kernel was built.
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
Selecting 'off' will disable both the kernel and
the user space protections.
Specific mitigations can also be selected manually:
retpoline - replace indirect branches
@ -3685,6 +3747,48 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Not specifying this option is equivalent to
spectre_v2=auto.
spectre_v2_user=
[X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability between
user space tasks
on - Unconditionally enable mitigations. Is
enforced by spectre_v2=on
off - Unconditionally disable mitigations. Is
enforced by spectre_v2=off
prctl - Indirect branch speculation is enabled,
but mitigation can be enabled via prctl
per thread. The mitigation control state
is inherited on fork.
prctl,ibpb
- Like "prctl" above, but only STIBP is
controlled per thread. IBPB is issued
always when switching between different user
space processes.
seccomp
- Same as "prctl" above, but all seccomp
threads will enable the mitigation unless
they explicitly opt out.
seccomp,ibpb
- Like "seccomp" above, but only STIBP is
controlled per thread. IBPB is issued
always when switching between different
user space processes.
auto - Kernel selects the mitigation depending on
the available CPU features and vulnerability.
Default mitigation:
If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
Not specifying this option is equivalent to
spectre_v2_user=auto.
spec_store_bypass_disable=
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)

View file

@ -387,6 +387,7 @@ tcp_min_rtt_wlen - INTEGER
minimum RTT when it is moved to a longer path (e.g., due to traffic
engineering). A longer window makes the filter more resistant to RTT
inflations such as transient congestion. The unit is seconds.
Possible values: 0 - 86400 (1 day)
Default: 300
tcp_moderate_rcvbuf - BOOLEAN

View file

@ -92,3 +92,12 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
(Mitigate Spectre V2 style attacks against user processes)
Invocations:
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);

View file

@ -365,11 +365,15 @@ autosuspend the interface's device. When the usage counter is = 0
then the interface is considered to be idle, and the kernel may
autosuspend the device.
Drivers need not be concerned about balancing changes to the usage
counter; the USB core will undo any remaining "get"s when a driver
is unbound from its interface. As a corollary, drivers must not call
any of the usb_autopm_* functions after their disconnect() routine has
returned.
Drivers must be careful to balance their overall changes to the usage
counter. Unbalanced "get"s will remain in effect when a driver is
unbound from its interface, preventing the device from going into
runtime suspend should the interface be bound to a driver again. On
the other hand, drivers are allowed to achieve this balance by calling
the ``usb_autopm_*`` functions even after their ``disconnect`` routine
has returned -- say from within a work-queue routine -- provided they
retain an active reference to the interface (via ``usb_get_intf`` and
``usb_put_intf``).
Drivers using the async routines are responsible for their own
synchronization and mutual exclusion.

225
Documentation/x86/mds.rst Normal file
View file

@ -0,0 +1,225 @@
Microarchitectural Data Sampling (MDS) mitigation
=================================================
.. _mds:
Overview
--------
Microarchitectural Data Sampling (MDS) is a family of side channel attacks
on internal buffers in Intel CPUs. The variants are:
- Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
- Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
- Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
- Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
dependent load (store-to-load forwarding) as an optimization. The forward
can also happen to a faulting or assisting load operation for a different
memory address, which can be exploited under certain conditions. Store
buffers are partitioned between Hyper-Threads so cross thread forwarding is
not possible. But if a thread enters or exits a sleep state the store
buffer is repartitioned which can expose data from one thread to the other.
MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
L1 miss situations and to hold data which is returned or sent in response
to a memory or I/O operation. Fill buffers can forward data to a load
operation and also write data to the cache. When the fill buffer is
deallocated it can retain the stale data of the preceding operations which
can then be forwarded to a faulting or assisting load operation, which can
be exploited under certain conditions. Fill buffers are shared between
Hyper-Threads so cross thread leakage is possible.
MLPDS leaks Load Port Data. Load ports are used to perform load operations
from memory or I/O. The received data is then forwarded to the register
file or a subsequent operation. In some implementations the Load Port can
contain stale data from a previous operation which can be forwarded to
faulting or assisting loads under certain conditions, which again can be
exploited eventually. Load ports are shared between Hyper-Threads so cross
thread leakage is possible.
MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
memory that takes a fault or assist can leave data in a microarchitectural
structure that may later be observed using one of the same methods used by
MSBDS, MFBDS or MLPDS.
Exposure assumptions
--------------------
It is assumed that attack code resides in user space or in a guest with one
exception. The rationale behind this assumption is that the code construct
needed for exploiting MDS requires:
- to control the load to trigger a fault or assist
- to have a disclosure gadget which exposes the speculatively accessed
data for consumption through a side channel.
- to control the pointer through which the disclosure gadget exposes the
data
The existence of such a construct in the kernel cannot be excluded with
100% certainty, but the complexity involved makes it extremly unlikely.
There is one exception, which is untrusted BPF. The functionality of
untrusted BPF is limited, but it needs to be thoroughly investigated
whether it can be used to create such a construct.
Mitigation strategy
-------------------
All variants have the same mitigation strategy at least for the single CPU
thread case (SMT off): Force the CPU to clear the affected buffers.
This is achieved by using the otherwise unused and obsolete VERW
instruction in combination with a microcode update. The microcode clears
the affected CPU buffers when the VERW instruction is executed.
For virtualization there are two ways to achieve CPU buffer
clearing. Either the modified VERW instruction or via the L1D Flush
command. The latter is issued when L1TF mitigation is enabled so the extra
VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
be issued.
If the VERW instruction with the supplied segment selector argument is
executed on a CPU without the microcode update there is no side effect
other than a small number of pointlessly wasted CPU cycles.
This does not protect against cross Hyper-Thread attacks except for MSBDS
which is only exploitable cross Hyper-thread when one of the Hyper-Threads
enters a C-state.
The kernel provides a function to invoke the buffer clearing:
mds_clear_cpu_buffers()
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions.
As a special quirk to address virtualization scenarios where the host has
the microcode updated, but the hypervisor does not (yet) expose the
MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
hope that it might actually clear the buffers. The state is reflected
accordingly.
According to current knowledge additional mitigations inside the kernel
itself are not required because the necessary gadgets to expose the leaked
data cannot be controlled in a way which allows exploitation from malicious
user space or VM guests.
Kernel internal mitigation modes
--------------------------------
======= ============================================================
off Mitigation is disabled. Either the CPU is not affected or
mds=off is supplied on the kernel command line
full Mitigation is enabled. CPU is affected and MD_CLEAR is
advertised in CPUID.
vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
advertised in CPUID. That is mainly for virtualization
scenarios where the host has the updated microcode but the
hypervisor does not expose MD_CLEAR in CPUID. It's a best
effort approach without guarantee.
======= ============================================================
If the CPU is affected and mds=off is not supplied on the kernel command
line then the kernel selects the appropriate mitigation mode depending on
the availability of the MD_CLEAR CPUID bit.
Mitigation points
-----------------
1. Return to user space
^^^^^^^^^^^^^^^^^^^^^^^
When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel
command line. The migitation is enabled through the static key
mds_user_clear.
The mitigation is invoked in prepare_exit_to_usermode() which covers
most of the kernel to user space transitions. There are a few exceptions
which are not invoking prepare_exit_to_usermode() on return to user
space. These exceptions use the paranoid exit code.
- Non Maskable Interrupt (NMI):
Access to sensible data like keys, credentials in the NMI context is
mostly theoretical: The CPU can do prefetching or execute a
misspeculated code path and thereby fetching data which might end up
leaking through a buffer.
But for mounting other attacks the kernel stack address of the task is
already valuable information. So in full mitigation mode, the NMI is
mitigated on the return from do_nmi() to provide almost complete
coverage.
- Double fault (#DF):
A double fault is usually fatal, but the ESPFIX workaround, which can
be triggered from user space through modify_ldt(2) is a recoverable
double fault. #DF uses the paranoid exit path, so explicit mitigation
in the double fault handler is required.
- Machine Check Exception (#MC):
Another corner case is a #MC which hits between the CPU buffer clear
invocation and the actual return to user. As this still is in kernel
space it takes the paranoid exit path which does not clear the CPU
buffers. So the #MC handler repopulates the buffers to some
extent. Machine checks are not reliably controllable and the window is
extremly small so mitigation would just tick a checkbox that this
theoretical corner case is covered. To keep the amount of special
cases small, ignore #MC.
- Debug Exception (#DB):
This takes the paranoid exit path only when the INT1 breakpoint is in
kernel space. #DB on a user space address takes the regular exit path,
so no extra mitigation required.
2. C-State transition
^^^^^^^^^^^^^^^^^^^^^
When a CPU goes idle and enters a C-State the CPU buffers need to be
cleared on affected CPUs when SMT is active. This addresses the
repartitioning of the store buffer when one of the Hyper-Threads enters
a C-State.
When SMT is inactive, i.e. either the CPU does not support it or all
sibling threads are offline CPU buffer clearing is not required.
The idle clearing is enabled on CPUs which are only affected by MSBDS
and not by any other MDS variant. The other MDS variants cannot be
protected against cross Hyper-Thread attacks because the Fill Buffer and
the Load Ports are shared. So on CPUs affected by other variants, the
idle clearing would be a window dressing exercise and is therefore not
activated.
The invocation is controlled by the static key mds_idle_clear which is
switched depending on the chosen mitigation mode and the SMT state of
the system.
The buffer clear is only invoked before entering the C-State to prevent
that stale data from the idling CPU from spilling to the Hyper-Thread
sibling after the store buffer got repartitioned and all entries are
available to the non idle sibling.
When coming out of idle the store buffer is partitioned again so each
sibling has half of it available. The back from idle CPU could be then
speculatively exposed to contents of the sibling. The buffers are
flushed either on exit to user space or on VMENTER so malicious code
in user space or the guest cannot speculatively access them.
The mitigation is hooked into all variants of halt()/mwait(), but does
not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
has been superseded by the intel_idle driver around 2010 and is
preferred on all affected CPUs which are expected to gain the MD_CLEAR
functionality in microcode. Aside of that the IO-Port mechanism is a
legacy interface which is only used on older systems which are either
not affected or do not receive microcode updates anymore.

View file

@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 179
SUBLEVEL = 180
EXTRAVERSION =
NAME = Blurry Fish Butt

View file

@ -90,6 +90,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
phy-mode = "rgmii";
phy-reset-duration = <10>; /* in msecs */
phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
phy-supply = <&vdd_eth_io_reg>;
status = "disabled";

View file

@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = {
}
};
static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = {
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = {
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = {
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_2_data,
},
};

View file

@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
}
};
u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
static struct platform_device iop13xx_tpmi_0_device = {
.name = "iop-tpmi",
.id = 0,
@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
.resource = iop13xx_tpmi_0_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
.resource = iop13xx_tpmi_1_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
.resource = iop13xx_tpmi_2_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
.resource = iop13xx_tpmi_3_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
},
};

View file

@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = {
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = {
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = {
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_aau_data,
},
};

View file

@ -645,7 +645,7 @@ static struct platform_device orion_xor0_shared = {
.resource = orion_xor0_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor0_pdata,
},
};
@ -706,7 +706,7 @@ static struct platform_device orion_xor1_shared = {
.resource = orion_xor1_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(64),
.coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor1_pdata,
},
};

View file

@ -126,7 +126,7 @@ trace_a_syscall:
subu t1, v0, __NR_O32_Linux
move a1, v0
bnez t1, 1f /* __NR_syscall at offset 0 */
lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
.set pop
1: jal syscall_trace_enter

View file

@ -137,7 +137,7 @@ config PPC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@ -163,6 +163,11 @@ config PPC
select ARCH_HAS_DMA_SET_COHERENT_MASK
select HAVE_ARCH_SECCOMP_FILTER
config PPC_BARRIER_NOSPEC
bool
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN

View file

@ -0,0 +1,21 @@
#ifndef _ASM_POWERPC_ASM_PROTOTYPES_H
#define _ASM_POWERPC_ASM_PROTOTYPES_H
/*
* This file is for prototypes of C functions that are only called
* from asm, and any associated variables.
*
* Copyright 2016, Daniel Axtens, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*/
/* Patch sites */
extern s32 patch__call_flush_count_cache;
extern s32 patch__flush_count_cache_return;
extern long flush_count_cache;
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */

View file

@ -92,4 +92,25 @@ do { \
#define smp_mb__after_atomic() smp_mb()
#define smp_mb__before_spinlock() smp_mb()
#ifdef CONFIG_PPC_BOOK3S_64
#define NOSPEC_BARRIER_SLOT nop
#elif defined(CONFIG_PPC_FSL_BOOK3E)
#define NOSPEC_BARRIER_SLOT nop; nop
#endif
#ifdef CONFIG_PPC_BARRIER_NOSPEC
/*
* Prevent execution of subsequent instructions until preceding branches have
* been fully resolved and are no longer executing speculatively.
*/
#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT
// This also acts as a compiler barrier due to the memory clobber.
#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
#else /* !CONFIG_PPC_BARRIER_NOSPEC */
#define barrier_nospec_asm
#define barrier_nospec()
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
#endif /* _ASM_POWERPC_BARRIER_H */

View file

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright 2018, Michael Ellerman, IBM Corporation.
*/
#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H
#define _ASM_POWERPC_CODE_PATCHING_ASM_H
/* Define a "site" that can be patched */
.macro patch_site label name
.pushsection ".rodata"
.balign 4
.global \name
\name:
.4byte \label - .
.popsection
.endm
#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */

View file

@ -28,6 +28,8 @@ unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags);
int patch_branch(unsigned int *addr, unsigned long target, int flags);
int patch_instruction(unsigned int *addr, unsigned int instr);
int patch_instruction_site(s32 *addr, unsigned int instr);
int patch_branch_site(s32 *site, unsigned long target, int flags);
int instr_is_relative_branch(unsigned int instr);
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);

View file

@ -50,6 +50,27 @@
#define EX_PPR 88 /* SMT thread status register (priority) */
#define EX_CTR 96
#define STF_ENTRY_BARRIER_SLOT \
STF_ENTRY_BARRIER_FIXUP_SECTION; \
nop; \
nop; \
nop
#define STF_EXIT_BARRIER_SLOT \
STF_EXIT_BARRIER_FIXUP_SECTION; \
nop; \
nop; \
nop; \
nop; \
nop; \
nop
/*
* r10 must be free to use, r13 must be paca
*/
#define INTERRUPT_TO_KERNEL \
STF_ENTRY_BARRIER_SLOT
/*
* Macros for annotating the expected destination of (h)rfid
*
@ -66,16 +87,19 @@
rfid
#define RFI_TO_USER \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_USER_OR_KERNEL \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_GUEST \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
@ -84,21 +108,25 @@
hrfid
#define HRFI_TO_USER \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_USER_OR_KERNEL \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_GUEST \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_UNKNOWN \
STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
@ -226,6 +254,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define __EXCEPTION_PROLOG_1(area, extra, vec) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
INTERRUPT_TO_KERNEL; \
SAVE_CTR(r10, area); \
mfcr r9; \
extra(vec); \
@ -512,6 +541,12 @@ label##_relon_hv: \
#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
__MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
#define MASKABLE_EXCEPTION_OOL(vec, label) \
.globl label##_ool; \
label##_ool: \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \
EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD);
#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
. = loc; \
.globl label##_pSeries; \

View file

@ -184,6 +184,22 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
#define STF_ENTRY_BARRIER_FIXUP_SECTION \
953: \
.pushsection __stf_entry_barrier_fixup,"a"; \
.align 2; \
954: \
FTR_ENTRY_OFFSET 953b-954b; \
.popsection;
#define STF_EXIT_BARRIER_FIXUP_SECTION \
955: \
.pushsection __stf_exit_barrier_fixup,"a"; \
.align 2; \
956: \
FTR_ENTRY_OFFSET 955b-956b; \
.popsection;
#define RFI_FLUSH_FIXUP_SECTION \
951: \
.pushsection __rfi_flush_fixup,"a"; \
@ -192,10 +208,34 @@ label##3: \
FTR_ENTRY_OFFSET 951b-952b; \
.popsection;
#define NOSPEC_BARRIER_FIXUP_SECTION \
953: \
.pushsection __barrier_nospec_fixup,"a"; \
.align 2; \
954: \
FTR_ENTRY_OFFSET 953b-954b; \
.popsection;
#define START_BTB_FLUSH_SECTION \
955: \
#define END_BTB_FLUSH_SECTION \
956: \
.pushsection __btb_flush_fixup,"a"; \
.align 2; \
957: \
FTR_ENTRY_OFFSET 955b-957b; \
FTR_ENTRY_OFFSET 956b-957b; \
.popsection;
#ifndef __ASSEMBLY__
extern long stf_barrier_fallback;
extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
#endif

View file

@ -292,10 +292,15 @@
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5
#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6
#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7
#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5
#ifndef __ASSEMBLY__
#include <linux/types.h>

View file

@ -199,8 +199,7 @@ struct paca_struct {
*/
u64 exrfi[13] __aligned(0x80);
void *rfi_flush_fallback_area;
u64 l1d_flush_congruence;
u64 l1d_flush_sets;
u64 l1d_flush_size;
#endif
};

View file

@ -147,6 +147,7 @@
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
#define PPC_INST_ISYNC 0x4c00012c
#define PPC_INST_LXVD2X 0x7c000698
#define PPC_INST_MCRXR 0x7c000400
#define PPC_INST_MCRXR_MASK 0xfc0007fe

View file

@ -821,4 +821,15 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
.long 0x2400004c /* rfid */
#endif /* !CONFIG_PPC_BOOK3E */
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_PPC_FSL_BOOK3E
#define BTB_FLUSH(reg) \
lis reg,BUCSR_INIT@h; \
ori reg,reg,BUCSR_INIT@l; \
mtspr SPRN_BUCSR,reg; \
isync;
#else
#define BTB_FLUSH(reg)
#endif /* CONFIG_PPC_FSL_BOOK3E */
#endif /* _ASM_POWERPC_PPC_ASM_H */

View file

@ -41,7 +41,7 @@
#if defined(CONFIG_PPC_BOOK3E_64)
#define MSR_64BIT MSR_CM
#define MSR_ (MSR_ME | MSR_CE)
#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)

View file

@ -0,0 +1,92 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Security related feature bit definitions.
*
* Copyright 2018, Michael Ellerman, IBM Corporation.
*/
#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
#define _ASM_POWERPC_SECURITY_FEATURES_H
extern unsigned long powerpc_security_features;
extern bool rfi_flush;
/* These are bit flags */
enum stf_barrier_type {
STF_BARRIER_NONE = 0x1,
STF_BARRIER_FALLBACK = 0x2,
STF_BARRIER_EIEIO = 0x4,
STF_BARRIER_SYNC_ORI = 0x8,
};
void setup_stf_barrier(void);
void do_stf_barrier_fixups(enum stf_barrier_type types);
void setup_count_cache_flush(void);
static inline void security_ftr_set(unsigned long feature)
{
powerpc_security_features |= feature;
}
static inline void security_ftr_clear(unsigned long feature)
{
powerpc_security_features &= ~feature;
}
static inline bool security_ftr_enabled(unsigned long feature)
{
return !!(powerpc_security_features & feature);
}
// Features indicating support for Spectre/Meltdown mitigations
// The L1-D cache can be flushed with ori r30,r30,0
#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull
// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull
// ori r31,r31,0 acts as a speculation barrier
#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull
// Speculation past bctr is disabled
#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull
// Entries in L1-D are private to a SMT thread
#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull
// Indirect branch prediction cache disabled
#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull
// bcctr 2,0,0 triggers a hardware assisted count cache flush
#define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull
// Features indicating need for Spectre/Meltdown mitigations
// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull
// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull
// A speculation barrier should be used for bounds checks (Spectre variant 1)
#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull
// Firmware configuration indicates user favours security over performance
#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull
// Software required to flush count cache on context switch
#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull
// Features enabled by default
#define SEC_FTR_DEFAULT \
(SEC_FTR_L1D_FLUSH_HV | \
SEC_FTR_L1D_FLUSH_PR | \
SEC_FTR_BNDS_CHK_SPEC_BAR | \
SEC_FTR_FAVOUR_SECURITY)
#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */

View file

@ -8,6 +8,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
extern unsigned int rtas_data;
extern unsigned long long memory_limit;
extern bool init_mem_is_free;
extern unsigned long klimit;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
@ -36,8 +37,28 @@ enum l1d_flush_type {
L1D_FLUSH_MTTRIG = 0x8,
};
void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
void setup_rfi_flush(enum l1d_flush_type, bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#ifdef CONFIG_PPC_BARRIER_NOSPEC
void setup_barrier_nospec(void);
#else
static inline void setup_barrier_nospec(void) { };
#endif
void do_barrier_nospec_fixups(bool enable);
extern bool barrier_nospec_enabled;
#ifdef CONFIG_PPC_BARRIER_NOSPEC
void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
#else
static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { };
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void);
#else
static inline void setup_spectre_v2(void) {};
#endif
void do_btb_flush_fixups(void);
#endif /* !__ASSEMBLY__ */

View file

@ -269,6 +269,7 @@ do { \
__chk_user_ptr(ptr); \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
barrier_nospec(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
@ -283,6 +284,7 @@ do { \
__chk_user_ptr(ptr); \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
barrier_nospec(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
@ -295,8 +297,10 @@ do { \
unsigned long __gu_val = 0; \
__typeof__(*(ptr)) __user *__gu_addr = (ptr); \
might_fault(); \
if (access_ok(VERIFY_READ, __gu_addr, (size))) \
if (access_ok(VERIFY_READ, __gu_addr, (size))) { \
barrier_nospec(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
} \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
@ -307,6 +311,7 @@ do { \
unsigned long __gu_val; \
__typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
barrier_nospec(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
@ -323,8 +328,10 @@ extern unsigned long __copy_tofrom_user(void __user *to,
static inline unsigned long copy_from_user(void *to,
const void __user *from, unsigned long n)
{
if (likely(access_ok(VERIFY_READ, from, n)))
if (likely(access_ok(VERIFY_READ, from, n))) {
barrier_nospec();
return __copy_tofrom_user((__force void __user *)to, from, n);
}
memset(to, 0, n);
return n;
}
@ -359,21 +366,27 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
switch (n) {
case 1:
barrier_nospec();
__get_user_size(*(u8 *)to, from, 1, ret);
break;
case 2:
barrier_nospec();
__get_user_size(*(u16 *)to, from, 2, ret);
break;
case 4:
barrier_nospec();
__get_user_size(*(u32 *)to, from, 4, ret);
break;
case 8:
barrier_nospec();
__get_user_size(*(u64 *)to, from, 8, ret);
break;
}
if (ret == 0)
return 0;
}
barrier_nospec();
return __copy_tofrom_user((__force void __user *)to, from, n);
}
@ -400,6 +413,7 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
if (ret == 0)
return 0;
}
return __copy_tofrom_user(to, (__force const void __user *)from, n);
}

View file

@ -44,6 +44,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o

View file

@ -245,8 +245,7 @@ int main(void)
DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence));
DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets));
DEFINE(PACA_L1D_FLUSH_SIZE, offsetof(struct paca_struct, l1d_flush_size));
#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));

View file

@ -33,6 +33,7 @@
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/ptrace.h>
#include <asm/barrier.h>
/*
* MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
@ -340,6 +341,15 @@ syscall_dotrace_cont:
ori r10,r10,sys_call_table@l
slwi r0,r0,2
bge- 66f
barrier_nospec_asm
/*
* Prevent the load of the handler below (based on the user-passed
* system call number) being speculatively executed until the test
* against NR_syscalls and branch to .66f above has
* committed.
*/
lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
mtlr r10
addi r9,r1,STACK_FRAME_OVERHEAD

View file

@ -25,6 +25,7 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/thread_info.h>
#include <asm/code-patching-asm.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
@ -36,6 +37,7 @@
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
#include <asm/tm.h>
#include <asm/barrier.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/exception-64s.h>
#else
@ -75,6 +77,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
std r0,GPR0(r1)
std r10,GPR1(r1)
beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r10, r11)
2: std r2,GPR2(r1)
std r3,GPR3(r1)
@ -177,6 +184,15 @@ system_call: /* label this so stack traces look sane */
clrldi r8,r8,32
15:
slwi r0,r0,4
barrier_nospec_asm
/*
* Prevent the load of the handler below (based on the user-passed
* system call number) being speculatively executed until the test
* against NR_syscalls and branch to .Lsyscall_enosys above has
* committed.
*/
ldx r12,r11,r0 /* Fetch system call handler [ptr] */
mtctr r12
bctrl /* Call handler */
@ -440,6 +456,57 @@ _GLOBAL(ret_from_kernel_thread)
li r3,0
b .Lsyscall_exit
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
1: nop; \
patch_site 1b, patch__call_flush_count_cache
#define BCCTR_FLUSH .long 0x4c400420
.macro nops number
.rept \number
nop
.endr
.endm
.balign 32
.global flush_count_cache
flush_count_cache:
/* Save LR into r9 */
mflr r9
.rept 64
bl .+4
.endr
b 1f
nops 6
.balign 32
/* Restore LR */
1: mtlr r9
li r9,0x7fff
mtctr r9
BCCTR_FLUSH
2: nop
patch_site 2b patch__flush_count_cache_return
nops 3
.rept 278
.balign 32
BCCTR_FLUSH
nops 7
.endr
blr
#else
#define FLUSH_COUNT_CACHE
#endif /* CONFIG_PPC_BOOK3S_64 */
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
@ -503,6 +570,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
#endif
FLUSH_COUNT_CACHE
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all

View file

@ -295,7 +295,8 @@ ret_from_mc_except:
andi. r10,r11,MSR_PR; /* save stack pointer */ \
beq 1f; /* branch around if supervisor */ \
ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
1: type##_BTB_FLUSH \
cmpdi cr1,r1,0; /* check if SP makes sense */ \
bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
@ -327,6 +328,30 @@ ret_from_mc_except:
#define SPRN_MC_SRR0 SPRN_MCSRR0
#define SPRN_MC_SRR1 SPRN_MCSRR1
#ifdef CONFIG_PPC_FSL_BOOK3E
#define GEN_BTB_FLUSH \
START_BTB_FLUSH_SECTION \
beq 1f; \
BTB_FLUSH(r10) \
1: \
END_BTB_FLUSH_SECTION
#define CRIT_BTB_FLUSH \
START_BTB_FLUSH_SECTION \
BTB_FLUSH(r10) \
END_BTB_FLUSH_SECTION
#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
#define MC_BTB_FLUSH CRIT_BTB_FLUSH
#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
#else
#define GEN_BTB_FLUSH
#define CRIT_BTB_FLUSH
#define DBG_BTB_FLUSH
#define MC_BTB_FLUSH
#define GDBELL_BTB_FLUSH
#endif
#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \
EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))

View file

@ -36,6 +36,7 @@ BEGIN_FTR_SECTION \
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mr r9,r13 ; \
GET_PACA(r13) ; \
INTERRUPT_TO_KERNEL ; \
mfspr r11,SPRN_SRR0 ; \
0:
@ -292,7 +293,9 @@ hardware_interrupt_hv:
. = 0x900
.globl decrementer_pSeries
decrementer_pSeries:
_MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b decrementer_ool
STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
@ -319,6 +322,7 @@ system_call_pSeries:
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
HMT_MEDIUM;
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
mfcr r9
KVMTEST(0xc00)
@ -607,6 +611,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.align 7
/* moved from 0xe00 */
MASKABLE_EXCEPTION_OOL(0x900, decrementer)
STD_EXCEPTION_HV_OOL(0xe02, h_data_storage)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage)
@ -1564,6 +1569,21 @@ power4_fixup_nap:
blr
#endif
.balign 16
.globl stf_barrier_fallback
stf_barrier_fallback:
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
sync
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ori 31,31,0
.rept 14
b 1f
1:
.endr
blr
.globl rfi_flush_fallback
rfi_flush_fallback:
SET_SCRATCH0(r13);
@ -1571,39 +1591,37 @@ rfi_flush_fallback:
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
std r12,PACA_EXRFI+EX_R12(r13)
std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SETS(r13)
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
addi r12,r12,8
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
1: li r8,0
.rept 8 /* 8-way set associative */
ldx r11,r10,r8
add r8,r8,r12
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
.endr
addi r10,r10,128 /* 128 byte cache line */
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
1:
ld r11,(0x80 + 8)*0(r10)
ld r11,(0x80 + 8)*1(r10)
ld r11,(0x80 + 8)*2(r10)
ld r11,(0x80 + 8)*3(r10)
ld r11,(0x80 + 8)*4(r10)
ld r11,(0x80 + 8)*5(r10)
ld r11,(0x80 + 8)*6(r10)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
ld r12,PACA_EXRFI+EX_R12(r13)
ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
rfid
@ -1614,39 +1632,37 @@ hrfi_flush_fallback:
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
std r12,PACA_EXRFI+EX_R12(r13)
std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SETS(r13)
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
addi r12,r12,8
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
1: li r8,0
.rept 8 /* 8-way set associative */
ldx r11,r10,r8
add r8,r8,r12
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
.endr
addi r10,r10,128 /* 128 byte cache line */
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
1:
ld r11,(0x80 + 8)*0(r10)
ld r11,(0x80 + 8)*1(r10)
ld r11,(0x80 + 8)*2(r10)
ld r11,(0x80 + 8)*3(r10)
ld r11,(0x80 + 8)*4(r10)
ld r11,(0x80 + 8)*5(r10)
ld r11,(0x80 + 8)*6(r10)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
ld r12,PACA_EXRFI+EX_R12(r13)
ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
hrfid

View file

@ -31,6 +31,16 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
#ifdef CONFIG_PPC_FSL_BOOK3E
#define BOOKE_CLEAR_BTB(reg) \
START_BTB_FLUSH_SECTION \
BTB_FLUSH(reg) \
END_BTB_FLUSH_SECTION
#else
#define BOOKE_CLEAR_BTB(reg)
#endif
#define NORMAL_EXCEPTION_PROLOG(intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
@ -42,6 +52,7 @@
andi. r11, r11, MSR_PR; /* check whether user or kernel */\
mr r11, r1; \
beq 1f; \
BOOKE_CLEAR_BTB(r11) \
/* if from user, start at top of this thread's kernel stack */ \
lwz r11, THREAD_INFO-THREAD(r10); \
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
@ -127,6 +138,7 @@
stw r9,_CCR(r8); /* save CR on stack */\
mfspr r11,exc_level_srr1; /* check whether user or kernel */\
DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\

View file

@ -451,6 +451,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
START_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1
andi. r10,r11,MSR_PR
beq 1f
BTB_FLUSH(r10)
1:
END_BTB_FLUSH_SECTION
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@ -545,6 +552,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
START_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1
andi. r10,r11,MSR_PR
beq 1f
BTB_FLUSH(r10)
1:
END_BTB_FLUSH_SECTION
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the

View file

@ -67,7 +67,15 @@ int module_finalize(const Elf_Ehdr *hdr,
do_feature_fixups(powerpc_firmware_features,
(void *)sect->sh_addr,
(void *)sect->sh_addr + sect->sh_size);
#endif
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_BARRIER_NOSPEC
sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
if (sect != NULL)
do_barrier_nospec_fixups_range(barrier_nospec_enabled,
(void *)sect->sh_addr,
(void *)sect->sh_addr + sect->sh_size);
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
sect = find_section(hdr, sechdrs, "__lwsync_fixup");
if (sect != NULL)

View file

@ -0,0 +1,434 @@
// SPDX-License-Identifier: GPL-2.0+
//
// Security related flags and so on.
//
// Copyright 2018, Michael Ellerman, IBM Corporation.
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/seq_buf.h>
#include <asm/debug.h>
#include <asm/asm-prototypes.h>
#include <asm/code-patching.h>
#include <asm/security_features.h>
#include <asm/setup.h>
unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
enum count_cache_flush_type {
COUNT_CACHE_FLUSH_NONE = 0x1,
COUNT_CACHE_FLUSH_SW = 0x2,
COUNT_CACHE_FLUSH_HW = 0x4,
};
static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
#ifdef CONFIG_PPC_FSL_BOOK3E
static bool no_spectrev2;
#endif
static void enable_barrier_nospec(bool enable)
{
barrier_nospec_enabled = enable;
do_barrier_nospec_fixups(enable);
}
void setup_barrier_nospec(void)
{
bool enable;
/*
* It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well.
* But there's a good reason not to. The two flags we check below are
* both are enabled by default in the kernel, so if the hcall is not
* functional they will be enabled.
* On a system where the host firmware has been updated (so the ori
* functions as a barrier), but on which the hypervisor (KVM/Qemu) has
* not been updated, we would like to enable the barrier. Dropping the
* check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is
* we potentially enable the barrier on systems where the host firmware
* is not updated, but that's harmless as it's a no-op.
*/
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
if (!no_nospec)
enable_barrier_nospec(enable);
}
static int __init handle_nospectre_v1(char *p)
{
no_nospec = true;
return 0;
}
early_param("nospectre_v1", handle_nospectre_v1);
#ifdef CONFIG_DEBUG_FS
static int barrier_nospec_set(void *data, u64 val)
{
switch (val) {
case 0:
case 1:
break;
default:
return -EINVAL;
}
if (!!val == !!barrier_nospec_enabled)
return 0;
enable_barrier_nospec(!!val);
return 0;
}
static int barrier_nospec_get(void *data, u64 *val)
{
*val = barrier_nospec_enabled ? 1 : 0;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
barrier_nospec_get, barrier_nospec_set, "%llu\n");
static __init int barrier_nospec_debugfs_init(void)
{
debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
&fops_barrier_nospec);
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_PPC_FSL_BOOK3E
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
void setup_spectre_v2(void)
{
if (no_spectrev2)
do_btb_flush_fixups();
else
btb_flush_enabled = true;
}
#endif /* CONFIG_PPC_FSL_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S_64
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
bool thread_priv;
thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
if (rfi_flush || thread_priv) {
struct seq_buf s;
seq_buf_init(&s, buf, PAGE_SIZE - 1);
seq_buf_printf(&s, "Mitigation: ");
if (rfi_flush)
seq_buf_printf(&s, "RFI Flush");
if (rfi_flush && thread_priv)
seq_buf_printf(&s, ", ");
if (thread_priv)
seq_buf_printf(&s, "L1D private per thread");
seq_buf_printf(&s, "\n");
return s.len;
}
if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Vulnerable\n");
}
#endif
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
struct seq_buf s;
seq_buf_init(&s, buf, PAGE_SIZE - 1);
if (security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) {
if (barrier_nospec_enabled)
seq_buf_printf(&s, "Mitigation: __user pointer sanitization");
else
seq_buf_printf(&s, "Vulnerable");
if (security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31))
seq_buf_printf(&s, ", ori31 speculation barrier enabled");
seq_buf_printf(&s, "\n");
} else
seq_buf_printf(&s, "Not affected\n");
return s.len;
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
struct seq_buf s;
bool bcs, ccd;
seq_buf_init(&s, buf, PAGE_SIZE - 1);
bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
if (bcs || ccd) {
seq_buf_printf(&s, "Mitigation: ");
if (bcs)
seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
if (bcs && ccd)
seq_buf_printf(&s, ", ");
if (ccd)
seq_buf_printf(&s, "Indirect branch cache disabled");
} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
seq_buf_printf(&s, "Mitigation: Software count cache flush");
if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
seq_buf_printf(&s, " (hardware accelerated)");
} else if (btb_flush_enabled) {
seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
} else {
seq_buf_printf(&s, "Vulnerable");
}
seq_buf_printf(&s, "\n");
return s.len;
}
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Store-forwarding barrier support.
*/
static enum stf_barrier_type stf_enabled_flush_types;
static bool no_stf_barrier;
bool stf_barrier;
static int __init handle_no_stf_barrier(char *p)
{
pr_info("stf-barrier: disabled on command line.");
no_stf_barrier = true;
return 0;
}
early_param("no_stf_barrier", handle_no_stf_barrier);
/* This is the generic flag used by other architectures */
static int __init handle_ssbd(char *p)
{
if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
/* Until firmware tells us, we have the barrier with auto */
return 0;
} else if (strncmp(p, "off", 3) == 0) {
handle_no_stf_barrier(NULL);
return 0;
} else
return 1;
return 0;
}
early_param("spec_store_bypass_disable", handle_ssbd);
/* This is the generic flag used by other architectures */
static int __init handle_no_ssbd(char *p)
{
handle_no_stf_barrier(NULL);
return 0;
}
early_param("nospec_store_bypass_disable", handle_no_ssbd);
static void stf_barrier_enable(bool enable)
{
if (enable)
do_stf_barrier_fixups(stf_enabled_flush_types);
else
do_stf_barrier_fixups(STF_BARRIER_NONE);
stf_barrier = enable;
}
void setup_stf_barrier(void)
{
enum stf_barrier_type type;
bool enable, hv;
hv = cpu_has_feature(CPU_FTR_HVMODE);
/* Default to fallback in case fw-features are not available */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
type = STF_BARRIER_SYNC_ORI;
else if (cpu_has_feature(CPU_FTR_ARCH_206))
type = STF_BARRIER_FALLBACK;
else
type = STF_BARRIER_NONE;
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
if (type == STF_BARRIER_FALLBACK) {
pr_info("stf-barrier: fallback barrier available\n");
} else if (type == STF_BARRIER_SYNC_ORI) {
pr_info("stf-barrier: hwsync barrier available\n");
} else if (type == STF_BARRIER_EIEIO) {
pr_info("stf-barrier: eieio barrier available\n");
}
stf_enabled_flush_types = type;
if (!no_stf_barrier)
stf_barrier_enable(enable);
}
ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
{
if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
const char *type;
switch (stf_enabled_flush_types) {
case STF_BARRIER_EIEIO:
type = "eieio";
break;
case STF_BARRIER_SYNC_ORI:
type = "hwsync";
break;
case STF_BARRIER_FALLBACK:
type = "fallback";
break;
default:
type = "unknown";
}
return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
}
if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Vulnerable\n");
}
#ifdef CONFIG_DEBUG_FS
static int stf_barrier_set(void *data, u64 val)
{
bool enable;
if (val == 1)
enable = true;
else if (val == 0)
enable = false;
else
return -EINVAL;
/* Only do anything if we're changing state */
if (enable != stf_barrier)
stf_barrier_enable(enable);
return 0;
}
static int stf_barrier_get(void *data, u64 *val)
{
*val = stf_barrier ? 1 : 0;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
static __init int stf_barrier_debugfs_init(void)
{
debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
return 0;
}
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
static void toggle_count_cache_flush(bool enable)
{
if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
pr_info("count-cache-flush: software flush disabled.\n");
return;
}
patch_branch_site(&patch__call_flush_count_cache,
(u64)&flush_count_cache, BRANCH_SET_LINK);
if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
pr_info("count-cache-flush: full software flush sequence enabled.\n");
return;
}
patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR);
count_cache_flush_type = COUNT_CACHE_FLUSH_HW;
pr_info("count-cache-flush: hardware assisted flush sequence enabled\n");
}
void setup_count_cache_flush(void)
{
toggle_count_cache_flush(true);
}
#ifdef CONFIG_DEBUG_FS
static int count_cache_flush_set(void *data, u64 val)
{
bool enable;
if (val == 1)
enable = true;
else if (val == 0)
enable = false;
else
return -EINVAL;
toggle_count_cache_flush(enable);
return 0;
}
static int count_cache_flush_get(void *data, u64 *val)
{
if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE)
*val = 0;
else
*val = 1;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
count_cache_flush_set, "%llu\n");
static __init int count_cache_flush_debugfs_init(void)
{
debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root,
NULL, &fops_count_cache_flush);
return 0;
}
device_initcall(count_cache_flush_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_PPC_BOOK3S_64 */

View file

@ -322,6 +322,9 @@ void __init setup_arch(char **cmdline_p)
ppc_md.setup_arch();
if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
setup_barrier_nospec();
setup_spectre_v2();
paging_init();
/* Initialize the MMU context management stuff */

View file

@ -736,6 +736,9 @@ void __init setup_arch(char **cmdline_p)
if (ppc_md.setup_arch)
ppc_md.setup_arch();
setup_barrier_nospec();
setup_spectre_v2();
paging_init();
/* Initialize the MMU context management stuff */
@ -873,9 +876,6 @@ static void do_nothing(void *unused)
void rfi_flush_enable(bool enable)
{
if (rfi_flush == enable)
return;
if (enable) {
do_rfi_flush_fixups(enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
@ -885,11 +885,15 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
static void init_fallback_flush(void)
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
int cpu;
/* Only allocate the fallback flush area once (at boot time). */
if (l1d_flush_fallback_area)
return;
l1d_size = ppc64_caches.dsize;
limit = min(safe_stack_limit(), ppc64_rma_size);
@ -902,34 +906,23 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
/*
* The fallback flush is currently coded for 8-way
* associativity. Different associativity is possible, but it
* will be treated as 8-way and may not evict the lines as
* effectively.
*
* 128 byte lines are mandatory.
*/
u64 c = l1d_size / 8;
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
paca[cpu].l1d_flush_congruence = c;
paca[cpu].l1d_flush_sets = c / 128;
paca[cpu].l1d_flush_size = l1d_size;
}
}
void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
void setup_rfi_flush(enum l1d_flush_type types, bool enable)
{
if (types & L1D_FLUSH_FALLBACK) {
pr_info("rfi-flush: Using fallback displacement flush\n");
pr_info("rfi-flush: fallback displacement flush available\n");
init_fallback_flush();
}
if (types & L1D_FLUSH_ORI)
pr_info("rfi-flush: Using ori type flush\n");
pr_info("rfi-flush: ori type flush available\n");
if (types & L1D_FLUSH_MTTRIG)
pr_info("rfi-flush: Using mttrig type flush\n");
pr_info("rfi-flush: mttrig type flush available\n");
enabled_flush_types = types;
@ -940,13 +933,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
bool enable;
if (val == 1)
rfi_flush_enable(true);
enable = true;
else if (val == 0)
rfi_flush_enable(false);
enable = false;
else
return -EINVAL;
/* Only do anything if we're changing state */
if (enable != rfi_flush)
rfi_flush_enable(enable);
return 0;
}
@ -965,12 +964,4 @@ static __init int rfi_flush_debugfs_init(void)
}
device_initcall(rfi_flush_debugfs_init);
#endif
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
if (rfi_flush)
return sprintf(buf, "Mitigation: RFI Flush\n");
return sprintf(buf, "Vulnerable\n");
}
#endif /* CONFIG_PPC_BOOK3S_64 */

View file

@ -74,14 +74,45 @@ SECTIONS
RODATA
#ifdef CONFIG_PPC64
. = ALIGN(8);
__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
__start___stf_entry_barrier_fixup = .;
*(__stf_entry_barrier_fixup)
__stop___stf_entry_barrier_fixup = .;
}
. = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
*(__stf_exit_barrier_fixup)
__stop___stf_exit_barrier_fixup = .;
}
. = ALIGN(8);
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
__start___rfi_flush_fixup = .;
*(__rfi_flush_fixup)
__stop___rfi_flush_fixup = .;
}
#endif
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_BARRIER_NOSPEC
. = ALIGN(8);
__spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
__start___barrier_nospec_fixup = .;
*(__barrier_nospec_fixup)
__stop___barrier_nospec_fixup = .;
}
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
#ifdef CONFIG_PPC_FSL_BOOK3E
. = ALIGN(8);
__spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
__start__btb_flush_fixup = .;
*(__btb_flush_fixup)
__stop__btb_flush_fixup = .;
}
#endif
EXCEPTION_TABLE(0)
NOTES :kernel :notes

View file

@ -75,6 +75,10 @@
PPC_LL r1, VCPU_HOST_STACK(r4)
PPC_LL r2, HOST_R2(r1)
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
mfspr r10, SPRN_PID
lwz r8, VCPU_HOST_PID(r4)
PPC_LL r11, VCPU_SHARED(r4)

View file

@ -277,6 +277,13 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
vcpu->arch.pwrmgtcr0 = spr_val;
break;
case SPRN_BUCSR:
/*
* If we are here, it means that we have already flushed the
* branch predictor, so just return to guest.
*/
break;
/* extra exceptions */
#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:

View file

@ -14,12 +14,25 @@
#include <asm/page.h>
#include <asm/code-patching.h>
#include <asm/uaccess.h>
#include <asm/setup.h>
#include <asm/sections.h>
static inline bool is_init(unsigned int *addr)
{
return addr >= (unsigned int *)__init_begin && addr < (unsigned int *)__init_end;
}
int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
/* Make sure we aren't patching a freed init section */
if (*PTRRELOC(&init_mem_is_free) && is_init(addr)) {
pr_debug("Skipping init section patching addr: 0x%px\n", addr);
return 0;
}
__put_user_size(instr, addr, 4, err);
if (err)
return err;
@ -32,6 +45,22 @@ int patch_branch(unsigned int *addr, unsigned long target, int flags)
return patch_instruction(addr, create_branch(addr, target, flags));
}
int patch_branch_site(s32 *site, unsigned long target, int flags)
{
unsigned int *addr;
addr = (unsigned int *)((unsigned long)site + *site);
return patch_instruction(addr, create_branch(addr, target, flags));
}
int patch_instruction_site(s32 *site, unsigned int instr)
{
unsigned int *addr;
addr = (unsigned int *)((unsigned long)site + *site);
return patch_instruction(addr, instr);
}
unsigned int create_branch(const unsigned int *addr,
unsigned long target, int flags)
{

View file

@ -21,7 +21,7 @@
#include <asm/page.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/security_features.h>
struct fixup_entry {
unsigned long mask;
@ -115,6 +115,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
#ifdef CONFIG_PPC_BOOK3S_64
void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
{
unsigned int instrs[3], *dest;
long *start, *end;
int i;
start = PTRRELOC(&__start___stf_entry_barrier_fixup),
end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
instrs[0] = 0x60000000; /* nop */
instrs[1] = 0x60000000; /* nop */
instrs[2] = 0x60000000; /* nop */
i = 0;
if (types & STF_BARRIER_FALLBACK) {
instrs[i++] = 0x7d4802a6; /* mflr r10 */
instrs[i++] = 0x60000000; /* branch patched below */
instrs[i++] = 0x7d4803a6; /* mtlr r10 */
} else if (types & STF_BARRIER_EIEIO) {
instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
} else if (types & STF_BARRIER_SYNC_ORI) {
instrs[i++] = 0x7c0004ac; /* hwsync */
instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction(dest, instrs[0]);
if (types & STF_BARRIER_FALLBACK)
patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
BRANCH_SET_LINK);
else
patch_instruction(dest + 1, instrs[1]);
patch_instruction(dest + 2, instrs[2]);
}
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
(types == STF_BARRIER_FALLBACK) ? "fallback" :
(types == STF_BARRIER_EIEIO) ? "eieio" :
(types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
: "unknown");
}
void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
{
unsigned int instrs[6], *dest;
long *start, *end;
int i;
start = PTRRELOC(&__start___stf_exit_barrier_fixup),
end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
instrs[0] = 0x60000000; /* nop */
instrs[1] = 0x60000000; /* nop */
instrs[2] = 0x60000000; /* nop */
instrs[3] = 0x60000000; /* nop */
instrs[4] = 0x60000000; /* nop */
instrs[5] = 0x60000000; /* nop */
i = 0;
if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
if (cpu_has_feature(CPU_FTR_HVMODE)) {
instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
} else {
instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */
instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */
}
instrs[i++] = 0x7c0004ac; /* hwsync */
instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
} else {
instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
}
} else if (types & STF_BARRIER_EIEIO) {
instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction(dest, instrs[0]);
patch_instruction(dest + 1, instrs[1]);
patch_instruction(dest + 2, instrs[2]);
patch_instruction(dest + 3, instrs[3]);
patch_instruction(dest + 4, instrs[4]);
patch_instruction(dest + 5, instrs[5]);
}
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
(types == STF_BARRIER_FALLBACK) ? "fallback" :
(types == STF_BARRIER_EIEIO) ? "eieio" :
(types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
: "unknown");
}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
do_stf_entry_barrier_fixups(types);
do_stf_exit_barrier_fixups(types);
}
void do_rfi_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
@ -151,10 +265,110 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
patch_instruction(dest + 2, instrs[2]);
}
printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
? "ori+mttrig type"
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
}
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
unsigned int instr, *dest;
long *start, *end;
int i;
start = fixup_start;
end = fixup_end;
instr = 0x60000000; /* nop */
if (enable) {
pr_info("barrier-nospec: using ORI speculation barrier\n");
instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction(dest, instr);
}
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BARRIER_NOSPEC
void do_barrier_nospec_fixups(bool enable)
{
void *start, *end;
start = PTRRELOC(&__start___barrier_nospec_fixup),
end = PTRRELOC(&__stop___barrier_nospec_fixup);
do_barrier_nospec_fixups_range(enable, start, end);
}
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
#ifdef CONFIG_PPC_FSL_BOOK3E
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
unsigned int instr[2], *dest;
long *start, *end;
int i;
start = fixup_start;
end = fixup_end;
instr[0] = PPC_INST_NOP;
instr[1] = PPC_INST_NOP;
if (enable) {
pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
instr[0] = PPC_INST_ISYNC;
instr[1] = PPC_INST_SYNC;
}
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction(dest, instr[0]);
patch_instruction(dest + 1, instr[1]);
}
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
static void patch_btb_flush_section(long *curr)
{
unsigned int *start, *end;
start = (void *)curr + *curr;
end = (void *)curr + *(curr + 1);
for (; start < end; start++) {
pr_devel("patching dest %lx\n", (unsigned long)start);
patch_instruction(start, PPC_INST_NOP);
}
}
void do_btb_flush_fixups(void)
{
long *start, *end;
start = PTRRELOC(&__start__btb_flush_fixup);
end = PTRRELOC(&__stop__btb_flush_fixup);
for (; start < end; start += 2)
patch_btb_flush_section(start);
}
#endif /* CONFIG_PPC_FSL_BOOK3E */
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;

View file

@ -62,6 +62,7 @@
#endif
unsigned long long memory_limit;
bool init_mem_is_free;
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
@ -381,6 +382,7 @@ void __init mem_init(void)
void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
init_mem_is_free = true;
free_initmem_default(POISON_FREE_INITMEM);
}

View file

@ -69,6 +69,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
std r15,EX_TLB_R15(r12)
std r10,EX_TLB_CR(r12)
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1
andi. r10,r11,MSR_PR
beq 1f
BTB_FLUSH(r10)
1:
END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
#endif
TLB_MISS_PROLOG_STATS

View file

@ -37,53 +37,99 @@
#include <asm/smp.h>
#include <asm/tm.h>
#include <asm/setup.h>
#include <asm/security_features.h>
#include "powernv.h"
static bool fw_feature_is(const char *state, const char *name,
struct device_node *fw_features)
{
struct device_node *np;
bool rc = false;
np = of_get_child_by_name(fw_features, name);
if (np) {
rc = of_property_read_bool(np, state);
of_node_put(np);
}
return rc;
}
static void init_fw_feat_flags(struct device_node *np)
{
if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
*/
if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
static void pnv_setup_rfi_flush(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
int enable;
bool enable;
/* Default to fallback in case fw-features are not available */
type = L1D_FLUSH_FALLBACK;
enable = 1;
np = of_find_node_by_name(NULL, "ibm,opal");
fw_features = of_get_child_by_name(np, "fw-features");
of_node_put(np);
if (fw_features) {
np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
if (np && of_property_read_bool(np, "enabled"))
init_fw_feat_flags(fw_features);
of_node_put(fw_features);
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
type = L1D_FLUSH_MTTRIG;
of_node_put(np);
np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
if (np && of_property_read_bool(np, "enabled"))
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
type = L1D_FLUSH_ORI;
of_node_put(np);
/* Enable unless firmware says NOT to */
enable = 2;
np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
if (np && of_property_read_bool(np, "disabled"))
enable--;
of_node_put(np);
np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
if (np && of_property_read_bool(np, "disabled"))
enable--;
of_node_put(np);
of_node_put(fw_features);
}
setup_rfi_flush(type, enable > 0);
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
setup_rfi_flush(type, enable);
setup_count_cache_flush();
}
static void __init pnv_setup_arch(void)
@ -91,6 +137,7 @@ static void __init pnv_setup_arch(void)
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
pnv_setup_rfi_flush();
setup_stf_barrier();
/* Initialize SMP */
pnv_smp_init();

View file

@ -314,6 +314,9 @@ void post_mobility_fixup(void)
printk(KERN_ERR "Post-mobility device tree update "
"failed: %d\n", rc);
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
return;
}

View file

@ -81,4 +81,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops;
unsigned long pseries_memory_block_size(void);
void pseries_setup_rfi_flush(void);
#endif /* _PSERIES_PSERIES_H */

View file

@ -67,6 +67,7 @@
#include <asm/eeh.h>
#include <asm/reg.h>
#include <asm/plpar_wrappers.h>
#include <asm/security_features.h>
#include "pseries.h"
@ -499,37 +500,87 @@ static void __init find_and_init_phbs(void)
of_pci_check_probe_only();
}
static void pseries_setup_rfi_flush(void)
static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
{
/*
* The features below are disabled by default, so we instead look to see
* if firmware has *enabled* them, and set them if so.
*/
if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
*/
if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
void pseries_setup_rfi_flush(void)
{
struct h_cpu_char_result result;
enum l1d_flush_type types;
bool enable;
long rc;
/* Enable by default */
enable = true;
/*
* Set features to the defaults assumed by init_cpu_char_feature_flags()
* so it can set/clear again any features that might have changed after
* migration, and in case the hypercall fails and it is not even called.
*/
powerpc_security_features = SEC_FTR_DEFAULT;
rc = plpar_get_cpu_characteristics(&result);
if (rc == H_SUCCESS) {
types = L1D_FLUSH_NONE;
if (rc == H_SUCCESS)
init_cpu_char_feature_flags(&result);
if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
types |= L1D_FLUSH_MTTRIG;
if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
types |= L1D_FLUSH_ORI;
/*
* We're the guest so this doesn't apply to us, clear it to simplify
* handling of it elsewhere.
*/
security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
/* Use fallback if nothing set in hcall */
if (types == L1D_FLUSH_NONE)
types = L1D_FLUSH_FALLBACK;
types = L1D_FLUSH_FALLBACK;
if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
enable = false;
} else {
/* Default to fallback if case hcall is not available */
types = L1D_FLUSH_FALLBACK;
}
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
types |= L1D_FLUSH_MTTRIG;
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
types |= L1D_FLUSH_ORI;
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
setup_rfi_flush(types, enable);
setup_count_cache_flush();
}
static void __init pSeries_setup_arch(void)
@ -549,6 +600,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_rfi_flush();
setup_stf_barrier();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);

View file

@ -2144,6 +2144,8 @@ static void dump_one_paca(int cpu)
DUMP(p, slb_cache_ptr, "x");
for (i = 0; i < SLB_CACHE_ENTRIES; i++)
printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]);
DUMP(p, rfi_flush_fallback_area, "px");
#endif
DUMP(p, dscr_default, "llx");
#ifdef CONFIG_PPC_BOOK3E

View file

@ -916,13 +916,7 @@ config NR_CPUS
approximately eight kilobytes to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
---help---
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.
def_bool y if SMP
config SCHED_MC
def_bool y

View file

@ -28,6 +28,7 @@
#include <asm/vdso.h>
#include <asm/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/nospec-branch.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@ -295,6 +296,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
#endif
user_enter();
mds_user_clear_cpu_buffers();
}
#define SYSCALL_EXIT_WORK_FLAGS \

View file

@ -162,7 +162,8 @@ quiet_cmd_vdso = VDSO $@
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
$(call ld-option, --build-id) -Bsymbolic
$(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
-Bsymbolic
GCOV_PROFILE := n
#

View file

@ -214,6 +214,7 @@
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@ -265,10 +266,12 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@ -307,6 +310,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
@ -332,5 +336,7 @@
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#endif /* _ASM_X86_CPUFEATURES_H */

View file

@ -50,19 +50,23 @@
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
#define INTEL_FAM6_ATOM_LINCROFT 0x26
#define INTEL_FAM6_ATOM_PENWELL 0x27
#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
/* Xeon Phi */

View file

@ -4,6 +4,9 @@
#include <asm/processor-flags.h>
#ifndef __ASSEMBLY__
#include <asm/nospec-branch.h>
/*
* Interrupt control:
*/
@ -49,11 +52,13 @@ static inline void native_irq_enable(void)
static inline void native_safe_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static inline void native_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}

View file

@ -53,6 +53,21 @@ struct extended_sigtable {
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
static inline u32 intel_get_microcode_revision(void)
{
u32 rev, dummy;
native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
return rev;
}
extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev);
extern int microcode_sanity_check(void *mc, int print_err);
extern int find_matching_signature(void *mc, unsigned int csig, int cpf);

View file

@ -1,6 +1,8 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
#include <linux/bits.h>
/* CPU model specific register (MSR) numbers */
/* x86-64 specific MSRs */
@ -33,13 +35,14 @@
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
@ -56,13 +59,18 @@
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
#define ARCH_CAP_SSB_NO (1 << 4) /*
* Not susceptible to Speculative Store Bypass
* attack, so no Speculative Store Bypass
* control required.
*/
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
* attack, so no Speculative Store Bypass
* control required.
*/
#define ARCH_CAP_MDS_NO BIT(5) /*
* Not susceptible to
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e

View file

@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <asm/cpufeature.h>
#include <asm/nospec-branch.h>
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
@ -38,6 +39,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
static inline void __mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
@ -72,6 +75,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
static inline void __mwaitx(unsigned long eax, unsigned long ebx,
unsigned long ecx)
{
/* No MDS buffer clear as this is AMD/HYGON only */
/* "mwaitx %eax, %ebx, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xfb;"
:: "a" (eax), "b" (ebx), "c" (ecx));
@ -79,6 +84,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
trace_hardirqs_on();
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"

View file

@ -3,6 +3,8 @@
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
@ -169,7 +171,15 @@ enum spectre_v2_mitigation {
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS,
SPECTRE_V2_IBRS_ENHANCED,
};
/* The indirect branch speculation control variants */
enum spectre_v2_user_mitigation {
SPECTRE_V2_USER_NONE,
SPECTRE_V2_USER_STRICT,
SPECTRE_V2_USER_PRCTL,
SPECTRE_V2_USER_SECCOMP,
};
/* The Speculative Store Bypass disable variants */
@ -248,6 +258,60 @@ do { \
preempt_enable(); \
} while (0)
DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
#include <asm/segment.h>
/**
* mds_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
static inline void mds_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
/*
* Has to be the memory-operand variant because only that
* guarantees the CPU buffer flush functionality according to
* documentation. The register-operand variant does not.
* Works with any segment selector, but a valid writable
* data segment is the fastest variant.
*
* "cc" clobber is required because VERW modifies ZF.
*/
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
}
/**
* mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
static inline void mds_user_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_user_clear))
mds_clear_cpu_buffers();
}
/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
static inline void mds_idle_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_idle_clear))
mds_clear_cpu_buffers();
}
#endif /* __ASSEMBLY__ */
/*

View file

@ -44,15 +44,15 @@ struct mm_struct;
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
WRITE_ONCE(*ptep, pte);
}
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
*ptep = native_make_pte(0);
}
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
native_set_pte(ptep, native_make_pte(0));
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@ -62,7 +62,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
WRITE_ONCE(*pmdp, pmd);
}
static inline void native_pmd_clear(pmd_t *pmd)
@ -98,7 +98,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
WRITE_ONCE(*pudp, pud);
}
static inline void native_pud_clear(pud_t *pud)
@ -131,7 +131,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
WRITE_ONCE(*pgdp, kaiser_set_shadow_pgd(pgdp, pgd));
}
static inline void native_pgd_clear(pgd_t *pgd)

View file

@ -845,4 +845,11 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
enum mds_mitigations {
MDS_MITIGATION_OFF,
MDS_MITIGATION_FULL,
MDS_MITIGATION_VMWERV,
};
#endif /* _ASM_X86_PROCESSOR_H */

View file

@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
{
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
}
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
}
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
{
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
static inline void speculative_store_bypass_ht_init(void) { }
#endif
extern void speculative_store_bypass_update(unsigned long tif);
static inline void speculative_store_bypass_update_current(void)
{
speculative_store_bypass_update(current_thread_info()->flags);
}
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
#endif

View file

@ -6,9 +6,6 @@
struct task_struct; /* one of the stranger aspects of C forward declarations */
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
#ifdef CONFIG_X86_32

View file

@ -92,10 +92,12 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SSBD 5 /* Speculative store bypass disable */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@ -121,6 +123,8 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
@ -148,8 +152,18 @@ struct thread_info {
_TIF_NOHZ)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_BASE \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP| \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
*/
#ifdef CONFIG_SMP
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
#else
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)

View file

@ -68,8 +68,12 @@ static inline void invpcid_flush_all_nonglobals(void)
struct tlb_state {
struct mm_struct *active_mm;
int state;
/* last user mm's ctx id */
u64 last_ctx_id;
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
unsigned long last_user_mm_ibpb;
};
/*
* Access to this CR4 shadow and to H/W CR4 is protected by

View file

@ -27,7 +27,6 @@ header-y += ldt.h
header-y += mce.h
header-y += mman.h
header-y += msgbuf.h
header-y += msr-index.h
header-y += msr.h
header-y += mtrr.h
header-y += param.h

View file

@ -26,6 +26,10 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
__u32 microcode;/* Microcode revision */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)

View file

@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@ -23,6 +24,7 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
#include <asm/hypervisor.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/intel-family.h>
@ -31,13 +33,12 @@
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
/*
* Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
* writes to SPEC_CTRL contain whatever reserved bits have been set.
*/
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
* The vendor and possibly platform specific bits which can be modified in
@ -52,6 +53,19 @@ static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 x86_amd_ls_cfg_base;
u64 x86_amd_ls_cfg_ssbd_mask;
/* Control conditional STIPB in switch_to() */
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
/* Control conditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
/* Control MDS CPU buffer clear before returning to user space */
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
void __init check_bugs(void)
{
identify_boot_cpu();
@ -84,6 +98,10 @@ void __init check_bugs(void)
l1tf_select_mitigation();
mds_select_mitigation();
arch_smt_update();
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@ -116,29 +134,6 @@ void __init check_bugs(void)
#endif
}
/* The kernel command line selection */
enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_NONE,
SPECTRE_V2_CMD_AUTO,
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
SPECTRE_V2_CMD_RETPOLINE_AMD,
};
static const char *spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
};
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
@ -156,9 +151,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
/* Conditional STIBP enabled? */
if (static_branch_unlikely(&switch_to_cond_stibp))
hostval |= stibp_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@ -192,7 +192,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
ssbd_spec_ctrl_to_tif(hostval);
speculative_store_bypass_update(tif);
speculation_ctrl_update(tif);
}
}
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
@ -207,6 +207,57 @@ static void x86_amd_ssb_disable(void)
wrmsrl(MSR_AMD64_LS_CFG, msrval);
}
#undef pr_fmt
#define pr_fmt(fmt) "MDS: " fmt
/* Default mitigation for MDS-affected CPUs */
static enum mds_mitigations mds_mitigation = MDS_MITIGATION_FULL;
static const char * const mds_strings[] = {
[MDS_MITIGATION_OFF] = "Vulnerable",
[MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
[MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode",
};
static void __init mds_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
mds_mitigation = MDS_MITIGATION_OFF;
return;
}
if (mds_mitigation == MDS_MITIGATION_FULL) {
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
static_branch_enable(&mds_user_clear);
}
pr_info("%s\n", mds_strings[mds_mitigation]);
}
static int __init mds_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_MDS))
return 0;
if (!str)
return -EINVAL;
if (!strcmp(str, "off"))
mds_mitigation = MDS_MITIGATION_OFF;
else if (!strcmp(str, "full"))
mds_mitigation = MDS_MITIGATION_FULL;
return 0;
}
early_param("mds", mds_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user = SPECTRE_V2_USER_NONE;
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@ -228,15 +279,192 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
static void __init spec2_print_if_insecure(const char *reason)
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
pr_info("%s selected on command line.\n", reason);
int len = strlen(opt);
return len == arglen && !strncmp(arg, opt, len);
}
static void __init spec2_print_if_secure(const char *reason)
/* The kernel command line selection for spectre v2 */
enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_NONE,
SPECTRE_V2_CMD_AUTO,
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
SPECTRE_V2_CMD_RETPOLINE_AMD,
};
enum spectre_v2_user_cmd {
SPECTRE_V2_USER_CMD_NONE,
SPECTRE_V2_USER_CMD_AUTO,
SPECTRE_V2_USER_CMD_FORCE,
SPECTRE_V2_USER_CMD_PRCTL,
SPECTRE_V2_USER_CMD_PRCTL_IBPB,
SPECTRE_V2_USER_CMD_SECCOMP,
SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
};
static const char * const spectre_v2_user_strings[] = {
[SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
[SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
[SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
[SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
};
static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
{ "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
{ "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
{ "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
{ "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
};
static void __init spec_v2_user_print_cond(const char *reason, bool secure)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
static enum spectre_v2_user_cmd __init
spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
{
char arg[20];
int ret, i;
switch (v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
return SPECTRE_V2_USER_CMD_FORCE;
default:
break;
}
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_USER_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
if (match_option(arg, ret, v2_user_options[i].option)) {
spec_v2_user_print_cond(v2_user_options[i].option,
v2_user_options[i].secure);
return v2_user_options[i].cmd;
}
}
pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_USER_CMD_AUTO;
}
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
enum spectre_v2_user_cmd cmd;
if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
return;
if (!IS_ENABLED(CONFIG_SMP))
smt_possible = false;
cmd = spectre_v2_parse_user_cmdline(v2_cmd);
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
case SPECTRE_V2_USER_CMD_FORCE:
mode = SPECTRE_V2_USER_STRICT;
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
mode = SPECTRE_V2_USER_PRCTL;
break;
case SPECTRE_V2_USER_CMD_AUTO:
case SPECTRE_V2_USER_CMD_SECCOMP:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
if (IS_ENABLED(CONFIG_SECCOMP))
mode = SPECTRE_V2_USER_SECCOMP;
else
mode = SPECTRE_V2_USER_PRCTL;
break;
}
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
switch (cmd) {
case SPECTRE_V2_USER_CMD_FORCE:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
static_branch_enable(&switch_mm_always_ibpb);
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_AUTO:
case SPECTRE_V2_USER_CMD_SECCOMP:
static_branch_enable(&switch_mm_cond_ibpb);
break;
default:
break;
}
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
}
/* If enhanced IBRS is enabled no STIPB required */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
* If SMT is not possible or STIBP is not available clear the STIPB
* mode.
*/
if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
set_mode:
spectre_v2_user = mode;
/* Only print the STIBP mode when SMT possible */
if (smt_possible)
pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
[SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
};
static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
pr_info("%s selected on command line.\n", reason);
}
@ -245,50 +473,30 @@ static inline bool retp_compiler(void)
return __is_defined(RETPOLINE);
}
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
return len == arglen && !strncmp(arg, opt, len);
}
static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
} mitigation_options[] = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
char arg[20];
int ret, i;
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
cpu_mitigations_off())
return SPECTRE_V2_CMD_NONE;
else {
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
if (!match_option(arg, ret, mitigation_options[i].option))
continue;
cmd = mitigation_options[i].cmd;
break;
}
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
if (i >= ARRAY_SIZE(mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
if (!match_option(arg, ret, mitigation_options[i].option))
continue;
cmd = mitigation_options[i].cmd;
break;
}
if (i >= ARRAY_SIZE(mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
@ -305,11 +513,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
if (mitigation_options[i].secure)
spec2_print_if_secure(mitigation_options[i].option);
else
spec2_print_if_insecure(mitigation_options[i].option);
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
}
@ -332,6 +537,13 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
mode = SPECTRE_V2_IBRS_ENHANCED;
/* Force it so VMEXIT will restore correctly */
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
goto specv2_set_mode;
}
if (IS_ENABLED(CONFIG_RETPOLINE))
goto retpoline_auto;
break;
@ -369,6 +581,7 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
specv2_set_mode:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@ -383,20 +596,114 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/* Initialize Indirect Branch Prediction Barrier if supported */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. But firmware isn't, so use IBRS to protect that.
* branches. Enhanced IBRS protects firmware too, so, enable restricted
* speculation around firmware calls only when Enhanced IBRS isn't
* supported.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
if (boot_cpu_has(X86_FEATURE_IBRS)) {
if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
spectre_v2_user_select_mitigation(cmd);
}
static void update_stibp_msr(void * __unused)
{
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
static void update_stibp_strict(void)
{
u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
if (sched_smt_active())
mask |= SPEC_CTRL_STIBP;
if (mask == x86_spec_ctrl_base)
return;
pr_info("Update user space SMT mitigation: STIBP %s\n",
mask & SPEC_CTRL_STIBP ? "always-on" : "off");
x86_spec_ctrl_base = mask;
on_each_cpu(update_stibp_msr, NULL, 1);
}
/* Update the static key controlling the evaluation of TIF_SPEC_IB */
static void update_indir_branch_cond(void)
{
if (sched_smt_active())
static_branch_enable(&switch_to_cond_stibp);
else
static_branch_disable(&switch_to_cond_stibp);
}
#undef pr_fmt
#define pr_fmt(fmt) fmt
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
*
* The other variants cannot be mitigated when SMT is enabled, so
* clearing the buffers on idle just to prevent the Store Buffer
* repartitioning leak would be a window dressing exercise.
*/
if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
return;
if (sched_smt_active())
static_branch_enable(&mds_idle_clear);
else
static_branch_disable(&mds_idle_clear);
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
void arch_smt_update(void)
{
/* Enhanced IBRS implies STIBP. No update required. */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
mutex_lock(&spec_ctrl_mutex);
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
update_stibp_strict();
break;
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
update_indir_branch_cond();
break;
}
switch (mds_mitigation) {
case MDS_MITIGATION_FULL:
case MDS_MITIGATION_VMWERV:
if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
pr_warn_once(MDS_MSG_SMT);
update_mds_branch_idle();
break;
case MDS_MITIGATION_OFF:
break;
}
mutex_unlock(&spec_ctrl_mutex);
}
#undef pr_fmt
@ -413,7 +720,7 @@ enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_SECCOMP,
};
static const char *ssb_strings[] = {
static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
@ -423,7 +730,7 @@ static const char *ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
} ssb_mitigation_options[] = {
} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@ -437,7 +744,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
char arg[20];
int ret, i;
if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
cpu_mitigations_off()) {
return SPEC_STORE_BYPASS_CMD_NONE;
} else {
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
@ -507,18 +815,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
* a completely different MSR and bit dependent on family.
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
* use a completely different MSR and bit dependent on family.
*/
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
!static_cpu_has(X86_FEATURE_AMD_SSBD)) {
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
break;
case X86_VENDOR_AMD:
x86_amd_ssb_disable();
break;
}
}
@ -536,10 +842,25 @@ static void ssb_select_mitigation(void)
#undef pr_fmt
#define pr_fmt(fmt) "Speculation prctl: " fmt
static void task_update_spec_tif(struct task_struct *tsk)
{
/* Force the update of the real TIF bits */
set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
/*
* Immediately update the speculation control MSRs for the current
* task, but for a non-current task delay setting the CPU
* mitigation until it is scheduled next.
*
* This can only happen for SECCOMP mitigation. For PRCTL it's
* always the current task.
*/
if (tsk == current)
speculation_ctrl_update_current();
}
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
bool update;
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
return -ENXIO;
@ -550,28 +871,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
return 0;
}
/*
* If being set on non-current task, delay setting the CPU
* mitigation until it is next scheduled.
*/
if (task == current && update)
speculative_store_bypass_update_current();
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
return 0;
/*
* Indirect branch speculation is always disabled in strict
* mode.
*/
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
case PR_SPEC_FORCE_DISABLE:
/*
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
return -EPERM;
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
return 0;
}
@ -581,6 +930,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@ -591,6 +942,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@ -613,11 +966,35 @@ static int ssb_prctl_get(struct task_struct *task)
}
}
static int ib_prctl_get(struct task_struct *task)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
return PR_SPEC_ENABLE;
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
case SPECTRE_V2_USER_STRICT:
return PR_SPEC_DISABLE;
default:
return PR_SPEC_NOT_AFFECTED;
}
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
default:
return -ENODEV;
}
@ -694,16 +1071,66 @@ static void __init l1tf_select_mitigation(void)
pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
half_pa);
pr_info("However, doing so will make a part of your RAM unusable.\n");
pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n");
return;
}
setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
}
#undef pr_fmt
#define pr_fmt(fmt) fmt
#ifdef CONFIG_SYSFS
static ssize_t mds_show_state(char *buf)
{
#ifdef CONFIG_HYPERVISOR_GUEST
if (x86_hyper) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
#endif
if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
(mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
sched_smt_active() ? "mitigated" : "disabled"));
}
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static char *stibp_state(void)
{
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
return ", STIBP: conditional";
}
return "";
}
static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
return ", IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
return ", IBPB: conditional";
return ", IBPB: disabled";
}
return "";
}
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@ -721,9 +1148,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
spectre_v2_module_string());
case X86_BUG_SPEC_STORE_BYPASS:
@ -731,9 +1160,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_L1TF:
if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
return sprintf(buf, "Mitigation: Page Table Inversion\n");
return sprintf(buf, "Mitigation: PTE Inversion\n");
break;
case X86_BUG_MDS:
return mds_show_state(buf);
default:
break;
}
@ -765,4 +1197,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b
{
return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
}
ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
}
#endif

View file

@ -709,6 +709,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_STIBP);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
set_cpu_cap(c, X86_FEATURE_SSBD);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
}
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@ -841,81 +847,95 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
{ X86_VENDOR_ANY, 4 },
#define NO_SPECULATION BIT(0)
#define NO_MELTDOWN BIT(1)
#define NO_SSB BIT(2)
#define NO_L1TF BIT(3)
#define NO_MDS BIT(4)
#define MSBDS_ONLY BIT(5)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
#define VULNWL_INTEL(model, whitelist) \
VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
#define VULNWL_AMD(family, whitelist) \
VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
/* AMD Family 0xf - 0x12 */
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
{}
};
static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{ X86_VENDOR_AMD },
{}
};
static bool __init cpu_matches(unsigned long which)
{
const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{ X86_VENDOR_CENTAUR, 5, },
{ X86_VENDOR_INTEL, 5, },
{ X86_VENDOR_NSC, 5, },
{ X86_VENDOR_AMD, 0x12, },
{ X86_VENDOR_AMD, 0x11, },
{ X86_VENDOR_AMD, 0x10, },
{ X86_VENDOR_AMD, 0xf, },
{ X86_VENDOR_ANY, 4, },
{}
};
static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
/* in addition to cpu_no_speculation */
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{}
};
return m && !!(m->driver_data & which);
}
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
!(ia32_cap & ARCH_CAP_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (x86_match_cpu(cpu_no_speculation))
if (cpu_matches(NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (x86_match_cpu(cpu_no_meltdown))
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (ia32_cap & ARCH_CAP_IBRS_ALL)
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
}
if (cpu_matches(NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
@ -924,7 +944,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
if (x86_match_cpu(cpu_no_l1tf))
if (cpu_matches(NO_L1TF))
return;
setup_force_cpu_bug(X86_BUG_L1TF);

View file

@ -14,6 +14,7 @@
#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/intel-family.h>
#include <asm/microcode_intel.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@ -102,14 +103,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
unsigned lower_word;
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* Required by the SDM */
sync_core();
rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
}
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
c->microcode = intel_get_microcode_revision();
/* Now if any of them are set, check the blacklist and clear the lot */
if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||

View file

@ -132,6 +132,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
MCESEV(
PANIC, "Instruction fetch error in kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
KERNEL
),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",

View file

@ -138,6 +138,8 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
m->microcode = boot_cpu_data.microcode;
}
DEFINE_PER_CPU(struct mce, injectm);
@ -258,7 +260,7 @@ static void print_mce(struct mce *m)
*/
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode);
m->microcode);
/*
* Print out human-readable details about the MCE error,

View file

@ -695,22 +695,26 @@ int apply_microcode_amd(int cpu)
return -1;
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
uci->cpu_sig.rev = rev;
return 0;
}
if (rev >= mc_amd->hdr.patch_id)
goto out;
if (__apply_microcode_amd(mc_amd)) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
return -1;
}
pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
mc_amd->hdr.patch_id);
uci->cpu_sig.rev = mc_amd->hdr.patch_id;
c->microcode = mc_amd->hdr.patch_id;
rev = mc_amd->hdr.patch_id;
pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
out:
uci->cpu_sig.rev = rev;
c->microcode = rev;
/* Update boot_cpu_data's revision too, if we're on the BSP: */
if (c->cpu_index == boot_cpu_data.cpu_index)
boot_cpu_data.microcode = rev;
return 0;
}

View file

@ -376,15 +376,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig.pf = 1 << ((val[1] >> 18) & 7);
}
native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
csig.rev = val[1];
csig.rev = intel_get_microcode_revision();
uci->cpu_sig = csig;
uci->valid = 1;
@ -654,31 +647,37 @@ static inline void print_ucode(struct ucode_cpu_info *uci)
static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
{
struct microcode_intel *mc_intel;
unsigned int val[2];
u32 rev;
mc_intel = uci->mc;
if (mc_intel == NULL)
return 0;
/*
* Save us the MSR write below - which is a particular expensive
* operation - when the other hyperthread has updated the microcode
* already.
*/
rev = intel_get_microcode_revision();
if (rev >= mc_intel->hdr.rev) {
uci->cpu_sig.rev = rev;
return 0;
}
/* write microcode via MSR 0x79 */
native_wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) mc_intel->bits,
(unsigned long) mc_intel->bits >> 16 >> 16);
native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (val[1] != mc_intel->hdr.rev)
rev = intel_get_microcode_revision();
if (rev != mc_intel->hdr.rev)
return -1;
#ifdef CONFIG_X86_64
/* Flush global tlb. This is precaution. */
flush_tlb_early();
#endif
uci->cpu_sig.rev = val[1];
uci->cpu_sig.rev = rev;
if (early)
print_ucode(uci);
@ -852,7 +851,7 @@ static int apply_microcode_intel(int cpu)
{
struct microcode_intel *mc_intel;
struct ucode_cpu_info *uci;
unsigned int val[2];
u32 rev;
int cpu_num = raw_smp_processor_id();
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
@ -873,31 +872,40 @@ static int apply_microcode_intel(int cpu)
if (get_matching_mc(mc_intel, cpu) == 0)
return 0;
/*
* Save us the MSR write below - which is a particular expensive
* operation - when the other hyperthread has updated the microcode
* already.
*/
rev = intel_get_microcode_revision();
if (rev >= mc_intel->hdr.rev)
goto out;
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) mc_intel->bits,
(unsigned long) mc_intel->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
rev = intel_get_microcode_revision();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (val[1] != mc_intel->hdr.rev) {
if (rev != mc_intel->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
cpu_num, mc_intel->hdr.rev);
return -1;
}
pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
cpu_num, val[1],
cpu_num, rev,
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
(mc_intel->hdr.date >> 16) & 0xff);
uci->cpu_sig.rev = val[1];
c->microcode = val[1];
out:
uci->cpu_sig.rev = rev;
c->microcode = rev;
/* Update boot_cpu_data's revision too, if we're on the BSP: */
if (c->cpu_index == boot_cpu_data.cpu_index)
boot_cpu_data.microcode = rev;
return 0;
}

View file

@ -2513,7 +2513,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
return ret;
if (event->attr.precise_ip) {
if (!event->attr.freq) {
if (!(event->attr.freq || event->attr.wakeup_events)) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_free_running_flags(event)))

View file

@ -29,6 +29,7 @@
#include <asm/mach_traps.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/nospec-branch.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@ -522,6 +523,9 @@ nmi_restart:
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
if (user_mode(regs))
mds_user_clear_cpu_buffers();
}
NOKPROBE_SYMBOL(do_nmi);

View file

@ -33,6 +33,8 @@
#include <asm/vm86.h>
#include <asm/spec-ctrl.h>
#include "process.h"
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
@ -165,11 +167,12 @@ int set_tsc_mode(unsigned int val)
return 0;
}
static inline void switch_to_bitmap(struct tss_struct *tss,
struct thread_struct *prev,
static inline void switch_to_bitmap(struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
@ -303,32 +306,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
static __always_inline void intel_set_ssb_state(unsigned long tifn)
/*
* Update the MSRs managing speculation control, during context switch.
*
* tifp: Previous task's thread flags
* tifn: Next task's thread flags
*/
static __always_inline void __speculation_ctrl_update(unsigned long tifp,
unsigned long tifn)
{
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
unsigned long tif_diff = tifp ^ tifn;
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
* enabled this branch can't be taken because nothing can set
* TIF_SSBD.
*/
if (tif_diff & _TIF_SSBD) {
if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
amd_set_ssb_virt_state(tifn);
} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
amd_set_core_ssb_state(tifn);
} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD)) {
msr |= ssbd_tif_to_spec_ctrl(tifn);
updmsr = true;
}
}
/*
* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
* otherwise avoid the MSR write.
*/
if (IS_ENABLED(CONFIG_SMP) &&
static_branch_unlikely(&switch_to_cond_stibp)) {
updmsr |= !!(tif_diff & _TIF_SPEC_IB);
msr |= stibp_tif_to_spec_ctrl(tifn);
}
if (updmsr)
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
{
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
amd_set_ssb_virt_state(tifn);
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
amd_set_core_ssb_state(tifn);
else
intel_set_ssb_state(tifn);
if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
if (task_spec_ssb_disable(tsk))
set_tsk_thread_flag(tsk, TIF_SSBD);
else
clear_tsk_thread_flag(tsk, TIF_SSBD);
if (task_spec_ib_disable(tsk))
set_tsk_thread_flag(tsk, TIF_SPEC_IB);
else
clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
}
/* Return the updated threadinfo flags*/
return task_thread_info(tsk)->flags;
}
void speculative_store_bypass_update(unsigned long tif)
void speculation_ctrl_update(unsigned long tif)
{
/* Forced update. Make sure all relevant TIF flags are different */
preempt_disable();
__speculative_store_bypass_update(tif);
__speculation_ctrl_update(~tif, tif);
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
/* Called from seccomp/prctl update */
void speculation_ctrl_update_current(void)
{
preempt_disable();
speculation_ctrl_update(speculation_ctrl_update_tif(current));
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
@ -338,7 +394,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
switch_to_bitmap(tss, prev, next, tifp, tifn);
switch_to_bitmap(prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
@ -356,8 +412,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOTSC)
cr4_toggle_bits(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_SSBD)
__speculative_store_bypass_update(tifn);
if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
__speculation_ctrl_update(tifp, tifn);
} else {
speculation_ctrl_update_tif(prev_p);
tifn = speculation_ctrl_update_tif(next_p);
/* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn);
}
}
/*

39
arch/x86/kernel/process.h Normal file
View file

@ -0,0 +1,39 @@
// SPDX-License-Identifier: GPL-2.0
//
// Code shared between 32 and 64 bit
#include <asm/spec-ctrl.h>
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
/*
* This needs to be inline to optimize for the common case where no extra
* work needs to be done.
*/
static inline void switch_to_extra(struct task_struct *prev,
struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long prev_tif = task_thread_info(prev)->flags;
if (IS_ENABLED(CONFIG_SMP)) {
/*
* Avoid __switch_to_xtra() invocation when conditional
* STIPB is disabled and the only different bit is
* TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
* in the TIF_WORK_CTXSW masks.
*/
if (!static_branch_likely(&switch_to_cond_stibp)) {
prev_tif &= ~_TIF_SPEC_IB;
next_tif &= ~_TIF_SPEC_IB;
}
}
/*
* __switch_to_xtra() handles debug registers, i/o bitmaps,
* speculation mitigations etc.
*/
if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
prev_tif & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev, next);
}

View file

@ -55,6 +55,8 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>
#include "process.h"
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
@ -279,12 +281,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl);
/*
* Now maybe handle debug registers and/or IO bitmaps
*/
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
switch_to_extra(prev_p, next_p);
/*
* Leave lazy mode, flushing any hypercalls made here.

View file

@ -50,6 +50,8 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include "process.h"
asmlinkage extern void ret_from_fork(void);
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
@ -406,12 +408,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload esp0 and ss1. This changes current_thread_info(). */
load_sp0(tss, next);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
*/
if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
switch_to_extra(prev_p, next_p);
#ifdef CONFIG_XEN
/*

View file

@ -61,6 +61,7 @@
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
#include <asm/trace/mpx.h>
#include <asm/nospec-branch.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
@ -337,6 +338,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
regs->ip = (unsigned long)general_protection;
regs->sp = (unsigned long)&normal_regs->orig_ax;
/*
* This situation can be triggered by userspace via
* modify_ldt(2) and the return does not take the regular
* user space exit, so a CPU buffer clear is required when
* MDS mitigation is enabled.
*/
mds_user_clear_cpu_buffers();
return;
}
#endif

View file

@ -343,7 +343,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP);
/* cpuid 0xC0000001.edx */
const u32 kvm_supported_word5_x86_features =
@ -364,7 +365,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) |
F(INTEL_STIBP) | F(MD_CLEAR);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@ -607,7 +609,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
/*
* The preference is to use SPEC CTRL MSR instead of the
* VIRT_SPEC MSR.
*/
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->ebx |= F(VIRT_SSBD);
break;
}

View file

@ -175,7 +175,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS)))
if (best && (best->ebx & (bit(X86_FEATURE_AMD_IBRS | bit(X86_FEATURE_AMD_SSBD)))))
return true;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD)));

View file

@ -3197,7 +3197,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
svm->spec_ctrl = data;
@ -3928,8 +3928,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
local_irq_enable();
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@ -3938,6 +3936,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
local_irq_enable();
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@ -4060,12 +4060,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
reload_tss(vcpu);
local_irq_disable();
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;

View file

@ -434,13 +434,13 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u16, tm )
__field( __u8, vec )
),

View file

@ -2972,6 +2972,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SMM))
return -EINVAL;
if (events->exception.injected &&
(events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
return -EINVAL;
/* INITs are latched while in SMM */
if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
(events->smi.smm || events->smi.pending) &&

View file

@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
@ -297,7 +298,8 @@ void __init kaiser_check_boottime_disable(void)
goto skip;
}
if (cmdline_find_option_bool(boot_command_line, "nopti"))
if (cmdline_find_option_bool(boot_command_line, "nopti") ||
cpu_mitigations_off())
goto disable;
skip:

View file

@ -247,7 +247,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
pgdp[i] = native_make_pgd(0);
pgd_clear(&pgdp[i]);
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
@ -424,7 +424,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(*ptep, entry);
if (changed && dirty) {
*ptep = entry;
set_pte(ptep, entry);
pte_update_defer(vma->vm_mm, address, ptep);
}
@ -441,7 +441,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed && dirty) {
*pmdp = entry;
set_pmd(pmdp, entry);
pmd_update_defer(vma->vm_mm, address, pmdp);
/*
* We had a write-protection fault here and changed the pmd

View file

@ -30,6 +30,12 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
/*
* Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
* stored in cpu_tlb_state.last_user_mm_ibpb.
*/
#define LAST_USER_MM_IBPB 0x1UL
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
struct flush_tlb_info {
@ -101,41 +107,101 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
return (unsigned long)next->mm | ibpb;
}
static void cond_ibpb(struct task_struct *next)
{
if (!next || !next->mm)
return;
/*
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
* same process. Using the mm pointer instead of mm->context.ctx_id
* opens a hypothetical hole vs. mm_struct reuse, which is more or
* less impossible to control by an attacker. Aside of that it
* would only affect the first schedule so the theoretically
* exposed data is not really interesting.
*/
if (static_branch_likely(&switch_mm_cond_ibpb)) {
unsigned long prev_mm, next_mm;
/*
* This is a bit more complex than the always mode because
* it has to handle two cases:
*
* 1) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB set to a user space task
* (potential victim) which has TIF_SPEC_IB not set.
*
* 2) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB not set to a user space task
* (potential victim) which has TIF_SPEC_IB set.
*
* This could be done by unconditionally issuing IBPB when
* a task which has TIF_SPEC_IB set is either scheduled in
* or out. Though that results in two flushes when:
*
* - the same user space task is scheduled out and later
* scheduled in again and only a kernel thread ran in
* between.
*
* - a user space task belonging to the same process is
* scheduled in after a kernel thread ran in between
*
* - a user space task belonging to the same process is
* scheduled in immediately.
*
* Optimize this with reasonably small overhead for the
* above cases. Mangle the TIF_SPEC_IB bit into the mm
* pointer of the incoming task which is stored in
* cpu_tlbstate.last_user_mm_ibpb for comparison.
*/
next_mm = mm_mangle_tif_spec_ib(next);
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
/*
* Issue IBPB only if the mm's are different and one or
* both have the IBPB bit set.
*/
if (next_mm != prev_mm &&
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
}
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
/*
* Only flush when switching to a user space task with a
* different context than the user space task which ran
* last on this CPU.
*/
if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
}
}
}
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
*
* As an optimization, flush indirect branches only when
* switching into processes that disable dumping. This
* protects high value processes like gpg, without having
* too high performance overhead. IBPB is *expensive*!
*
* This will not flush branches when switching into kernel
* threads. It will also not flush if we switch to idle
* thread and back to the same process. It will flush if we
* switch to a different non-dumpable process.
*/
if (tsk && tsk->mm &&
tsk->mm->context.ctx_id != last_ctx_id &&
get_dumpable(tsk->mm) != SUID_DUMP_USER)
indirect_branch_prediction_barrier();
/*
* Record last user mm's context id, so we can avoid
* flushing branch buffer with IBPB if we switch back
* to the same user.
*/
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
cond_ibpb(tsk);
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);

View file

@ -51,38 +51,52 @@ static int eject_tray(struct ata_device *dev)
/* Per the spec, only slot type and drawer type ODD can be supported */
static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
{
char buf[16];
char *buf;
unsigned int ret;
struct rm_feature_desc *desc = (void *)(buf + 8);
struct rm_feature_desc *desc;
struct ata_taskfile tf;
static const char cdb[] = { GPCMD_GET_CONFIGURATION,
2, /* only 1 feature descriptor requested */
0, 3, /* 3, removable medium feature */
0, 0, 0,/* reserved */
0, sizeof(buf),
0, 16,
0, 0, 0,
};
buf = kzalloc(16, GFP_KERNEL);
if (!buf)
return ODD_MECH_TYPE_UNSUPPORTED;
desc = (void *)(buf + 8);
ata_tf_init(dev, &tf);
tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
tf.command = ATA_CMD_PACKET;
tf.protocol = ATAPI_PROT_PIO;
tf.lbam = sizeof(buf);
tf.lbam = 16;
ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
buf, sizeof(buf), 0);
if (ret)
buf, 16, 0);
if (ret) {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
}
if (be16_to_cpu(desc->feature_code) != 3)
if (be16_to_cpu(desc->feature_code) != 3) {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
}
if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1)
if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) {
kfree(buf);
return ODD_MECH_TYPE_SLOT;
else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1)
} else if (desc->mech_type == 1 && desc->load == 0 &&
desc->eject == 1) {
kfree(buf);
return ODD_MECH_TYPE_DRAWER;
else
} else {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
}
}
/* Test if ODD is zero power ready by sense code */

View file

@ -530,11 +530,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev,
return sprintf(buf, "Not affected\n");
}
ssize_t __weak cpu_show_mds(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "Not affected\n");
}
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@ -542,6 +549,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_spectre_v2.attr,
&dev_attr_spec_store_bypass.attr,
&dev_attr_l1tf.attr,
&dev_attr_mds.attr,
NULL
};

View file

@ -82,7 +82,6 @@
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_index_mutex);
static DEFINE_MUTEX(loop_ctl_mutex);
static int max_part;
static int part_shift;
@ -1045,7 +1044,7 @@ static int loop_clr_fd(struct loop_device *lo)
*/
if (atomic_read(&lo->lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
}
@ -1094,12 +1093,12 @@ static int loop_clr_fd(struct loop_device *lo)
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
loop_unprepare_queue(lo);
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold loop_ctl_mutex to fput backing file.
* Calling fput holding loop_ctl_mutex triggers a circular
* Need not hold lo_ctl_mutex to fput backing file.
* Calling fput holding lo_ctl_mutex triggers a circular
* lock dependency possibility warning as fput can take
* bd_mutex which is usually taken before loop_ctl_mutex.
* bd_mutex which is usually taken before lo_ctl_mutex.
*/
fput(filp);
return 0;
@ -1362,7 +1361,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
mutex_lock_nested(&loop_ctl_mutex, 1);
mutex_lock_nested(&lo->lo_ctl_mutex, 1);
switch (cmd) {
case LOOP_SET_FD:
err = loop_set_fd(lo, mode, bdev, arg);
@ -1371,7 +1370,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
err = loop_change_fd(lo, bdev, arg);
break;
case LOOP_CLR_FD:
/* loop_clr_fd would have unlocked loop_ctl_mutex on success */
/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
@ -1407,7 +1406,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
out_unlocked:
return err;
@ -1540,16 +1539,16 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
switch(cmd) {
case LOOP_SET_STATUS:
mutex_lock(&loop_ctl_mutex);
mutex_lock(&lo->lo_ctl_mutex);
err = loop_set_status_compat(
lo, (const struct compat_loop_info __user *) arg);
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
break;
case LOOP_GET_STATUS:
mutex_lock(&loop_ctl_mutex);
mutex_lock(&lo->lo_ctl_mutex);
err = loop_get_status_compat(
lo, (struct compat_loop_info __user *) arg);
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
break;
case LOOP_SET_CAPACITY:
case LOOP_CLR_FD:
@ -1593,7 +1592,7 @@ static void __lo_release(struct loop_device *lo)
if (atomic_dec_return(&lo->lo_refcnt))
return;
mutex_lock(&loop_ctl_mutex);
mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
/*
* In autoclear mode, stop the loop thread
@ -1610,7 +1609,7 @@ static void __lo_release(struct loop_device *lo)
loop_flush(lo);
}
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
}
static void lo_release(struct gendisk *disk, fmode_t mode)
@ -1656,10 +1655,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
struct loop_device *lo = ptr;
struct loop_func_table *xfer = data;
mutex_lock(&loop_ctl_mutex);
mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_encryption == xfer)
loop_release_xfer(lo);
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
}
@ -1821,6 +1820,7 @@ static int loop_add(struct loop_device **l, int i)
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
atomic_set(&lo->lo_refcnt, 0);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
@ -1933,19 +1933,19 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm);
if (ret < 0)
break;
mutex_lock(&loop_ctl_mutex);
mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
lo->lo_disk->private_data = NULL;
mutex_unlock(&loop_ctl_mutex);
mutex_unlock(&lo->lo_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
break;

View file

@ -55,6 +55,7 @@ struct loop_device {
spinlock_t lo_lock;
int lo_state;
struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;

View file

@ -1062,6 +1062,8 @@ static int ace_setup(struct ace_device *ace)
return 0;
err_read:
/* prevent double queue cleanup */
ace->gd->queue = NULL;
put_disk(ace->gd);
err_alloc_disk:
blk_cleanup_queue(ace->queue);

View file

@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp,
ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
DP_COM_CONF_CSC_DEF_BOTH);
} else {
if (flow->foreground.in_cs == flow->out_cs)
if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN ||
flow->foreground.in_cs == flow->out_cs)
/*
* foreground identical to output, apply color
* conversion on background
@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp)
struct ipu_dp_priv *priv = flow->priv;
u32 reg, csc;
dp->in_cs = IPUV3_COLORSPACE_UNKNOWN;
if (!dp->foreground)
return;
@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp)
reg = readl(flow->base + DP_COM_CONF);
csc = reg & DP_COM_CONF_CSC_DEF_MASK;
if (csc == DP_COM_CONF_CSC_DEF_FG)
reg &= ~DP_COM_CONF_CSC_DEF_MASK;
reg &= ~DP_COM_CONF_CSC_DEF_MASK;
if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG)
reg |= DP_COM_CONF_CSC_DEF_BG;
reg &= ~DP_COM_CONF_FG_EN;
writel(reg, flow->base + DP_COM_CONF);
@ -350,6 +354,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
mutex_init(&priv->mutex);
for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN;
priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN;
priv->flow[i].foreground.foreground = true;
priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
priv->flow[i].priv = priv;

View file

@ -1058,10 +1058,15 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p)
seq_printf(f, "\n\n");
/* dump parsed data and input mappings */
if (down_interruptible(&hdev->driver_input_lock))
return 0;
hid_dump_device(hdev, f);
seq_printf(f, "\n");
hid_dump_input_mapping(hdev, f);
up(&hdev->driver_input_lock);
return 0;
}

Some files were not shown because too many files have changed in this diff Show more