* msm-4.4/tmp-2bf7955:
Linux 4.4.8
Revert "usb: hub: do not clear BOS field during reset device"
usbvision: fix crash on detecting device with invalid configuration
staging: android: ion: Set the length of the DMA sg entries in buffer
Revert "PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()"
Revert "PCI: Add helpers to manage pci_dev->irq and pci_dev->irq_managed"
Revert "x86/PCI: Don't alloc pcibios-irq when MSI is enabled"
HID: usbhid: fix inconsistent reset/resume/reset-resume behavior
HID: wacom: fix Bamboo ONE oops
ALSA: usb-audio: Skip volume controls triggers hangup on Dell USB Dock
ALSA: usb-audio: Add a quirk for Plantronics BT300
ALSA: usb-audio: Add a sample rate quirk for Phoenix Audio TMX320
ALSA: hda/realtek - Enable the ALC292 dock fixup on the Thinkpad T460s
ALSA: hda - fix front mic problem for a HP desktop
ALSA: hda - Fix headset support and noise on HP EliteBook 755 G2
ALSA: hda - Fixup speaker pass-through control for nid 0x14 on ALC225
mmc: sdhci-pci: Add support and PCI IDs for more Broxton host controllers
perf: Cure event->pending_disable race
perf: Do not double free
arm64: replace read_lock to rcu lock in call_step_hook
Btrfs: fix file/data loss caused by fsync after rename and new inode
iommu: Don't overwrite domain pointer when there is no default_domain
ext4: ignore quota mount options if the quota feature is enabled
ext4: add lockdep annotations for i_data_sem
btrfs: fix crash/invalid memory access on fsync when using overlayfs
nfs: use file_dentry()
fs: add file_dentry()
sd: Fix excessive capacity printing on devices with blocks bigger than 512 bytes
iio: gyro: bmg160: fix endianness when reading axes
iio: gyro: bmg160: fix buffer read values
iio: accel: bmc150: fix endianness when reading axes
iio: st_magn: always define ST_MAGN_TRIGGER_SET_STATE
usb: renesas_usbhs: fix to avoid using a disabled ep in usbhsg_queue_done()
usb: renesas_usbhs: disable TX IRQ before starting TX DMAC transfer
usb: renesas_usbhs: avoid NULL pointer derefernce in usbhsf_pkt_handler()
mac80211: fix txq queue related crashes
mac80211: fix unnecessary frame drops in mesh fwding
mac80211: fix ibss scan parameters
mac80211: avoid excessive stack usage in sta_info
mac80211: properly deal with station hashtable insert errors
virtio: virtio 1.0 cs04 spec compliance for reset
rbd: use GFP_NOIO consistently for request allocations
pcmcia: db1xxx_ss: fix last irq_to_gpio user
v4l: vsp1: Set the SRU CTRL0 register when starting the stream
coda: fix error path in case of missing pdata on non-DT platform
au0828: Fix dev_state handling
au0828: fix au0828_v4l2_close() dev_state race condition
pinctrl: freescale: imx: fix bogus check of of_iomap() return value
pinctrl: nomadik: fix pull debug print inversion
pinctrl: sunxi: Fix A33 external interrupts not working
pinctrl: sh-pfc: only use dummy states for non-DT platforms
pinctrl: pistachio: fix mfio84-89 function description and pinmux.
MIPS: Fix MSA ld unaligned failure cases
KVM: x86: reduce default value of halt_poll_ns parameter
KVM: x86: Inject pending interrupt even if pending nmi exist
cdc-acm: fix NULL pointer reference
USB: uas: Add a new NO_REPORT_LUNS quirk
USB: uas: Limit qdepth at the scsi-host level
mpls: find_outdev: check for err ptr in addition to NULL check
ipv6: Count in extension headers in skb->network_header
ip6_tunnel: set rtnl_link_ops before calling register_netdevice
ipv6: l2tp: fix a potential issue in l2tp_ip6_recv
ipv4: l2tp: fix a potential issue in l2tp_ip_recv
tuntap: restore default qdisc
tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter
rtnl: fix msg size calculation in if_nlmsg_size()
bridge: Allow set bridge ageing time when switchdev disabled
ipv6: udp: fix UDP_MIB_IGNOREDMULTI updates
qmi_wwan: add "D-Link DWM-221 B1" device id
xfrm: Fix crash observed during device unregistration and decryption
ppp: take reference on channels netns
ipv4: initialize flowi4_flags before calling fib_lookup()
ipv4: fix broadcast packets reception
bonding: fix bond_get_stats()
net: bcmgenet: fix dma api length mismatch
qlge: Fix receive packets drop.
tcp/dccp: remove obsolete WARN_ON() in icmp handlers
ppp: ensure file->private_data can't be overridden
ath9k: fix buffer overrun for ar9287
farsync: fix off-by-one bug in fst_add_one
mlx4: add missing braces in verify_qp_parameters
net: Fix use after free in the recvmmsg exit path
ipv4: Don't do expensive useless work during inetdev destroy.
bridge: allow zero ageing time
rocker: set FDB cleanup timer according to lowest ageing time
mlxsw: spectrum: Check requested ageing time is valid
macvtap: always pass ethernet header in linear
qlcnic: Fix mailbox completion handling during spurious interrupt
qlcnic: Remove unnecessary usage of atomic_t
sh_eth: advance 'rxdesc' later in sh_eth_ring_format()
sh_eth: fix NULL pointer dereference in sh_eth_ring_format()
bpf: avoid copying junk bytes in bpf_get_current_comm()
packet: validate variable length ll headers
ax25: add link layer header validation function
net: validate variable length ll headers
ppp: release rtnl mutex when interface creation fails
tcp: fix tcpi_segs_in after connection establishment
udp6: fix UDP/IPv6 encap resubmit path
usbnet: cleanup after bind() in probe()
cdc_ncm: toggle altsetting to force reset before setup
vxlan: fix missing options_len update on RX with collect metadata
ipv6: re-enable fragment header matching in ipv6_find_hdr
qmi_wwan: add Sierra Wireless EM74xx device ID
tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain"
mld, igmp: Fix reserved tailroom calculation
sctp: lack the check for ports in sctp_v6_cmp_addr
net: fix bridge multicast packet checksum validation
net: qca_spi: clear IFF_TX_SKB_SHARING
net: qca_spi: Don't clear IFF_BROADCAST
net: vrf: Remove direct access to skb->data
net: jme: fix suspend/resume on JMC260
ipv4: only create late gso-skb if skb is already set up with CHECKSUM_PARTIAL
tunnel: Clear IPCB(skb)->opt before dst_link_failure called
tcp: convert cached rtt from usec to jiffies when feeding initial rto
xen/events: Mask a moving irq
drm/amdgpu/gmc: use proper register for vram type on Fiji
drm/amdgpu/gmc: move vram type fetching into sw_init
drm/radeon: add a dpm quirk for all R7 370 parts
drm/radeon: add another R7 370 quirk
drm/radeon: add a dpm quirk for sapphire Dual-X R7 370 2G D5
drm/udl: Use unlocked gem unreferencing
drm/dp: move hw_mutex up the call stack
arm64: opcodes.h: Add arm big-endian config options before including arm header
compiler-gcc: disable -ftracer for __noclone functions
libnvdimm, pfn: fix uuid validation
libnvdimm: fix smart data retrieval
powerpc/mm: Fixup preempt underflow with huge pages
mm: fix invalid node in alloc_migrate_target()
ALSA: hda - Apply fix for white noise on Asus N550JV, too
ALSA: hda - Fix white noise on Asus N750JV headphone
ALSA: hda - Asus N750JV external subwoofer fixup
ALSA: timer: Use mod_timer() for rearming the system timer
parisc: Unbreak handling exceptions from kernel modules
parisc: Fix kernel crash with reversed copy_from_user()
parisc: Avoid function pointers for kernel exception routines
PKCS#7: pkcs7_validate_trust(): initialize the _trusted output argument
hwmon: (max1111) Return -ENODEV from max1111_read_channel if not instantiated
Linux 4.4.7
perf/x86/intel: Fix PEBS data source interpretation on Nehalem/Westmere
perf/x86/intel: Use PAGE_SIZE for PEBS buffer size on Core2
perf/x86/intel: Fix PEBS warning by only restoring active PMU in pmi
perf/x86/pebs: Add workaround for broken OVFL status on HSW+
sched/cputime: Fix steal time accounting vs. CPU hotplug
scsi_common: do not clobber fixed sense information
PM / sleep: Clear pm_suspend_global_flags upon hibernate
intel_idle: prevent SKL-H boot failure when C8+C9+C10 enabled
mtd: onenand: fix deadlock in onenand_block_markbad
mm/page_alloc: prevent merging between isolated and other pageblocks
ocfs2/dlm: fix BUG in dlm_move_lockres_to_recovery_list
ocfs2/dlm: fix race between convert and recovery
Input: ati_remote2 - fix crashes on detecting device with invalid descriptor
Input: ims-pcu - sanity check against missing interfaces
Input: synaptics - handle spurious release of trackstick buttons, again
writeback, cgroup: fix use of the wrong bdi_writeback which mismatches the inode
writeback, cgroup: fix premature wb_put() in locked_inode_to_wb_and_lock_list()
ACPI / PM: Runtime resume devices when waking from hibernate
ARM: dts: at91: sama5d4 Xplained: don't disable hsmci regulator
ARM: dts: at91: sama5d3 Xplained: don't disable hsmci regulator
nfsd: fix deadlock secinfo+readdir compound
nfsd4: fix bad bounds checking
iser-target: Rework connection termination
iser-target: Separate flows for np listeners and connections cma events
iser-target: Add new state ISER_CONN_BOUND to isert_conn
iser-target: Fix identification of login rx descriptor type
target: Fix target_release_cmd_kref shutdown comp leak
clk: bcm2835: Fix setting of PLL divider clock rates
clk: rockchip: add hclk_cpubus to the list of rk3188 critical clocks
clk: rockchip: rk3368: fix hdmi_cec gate-register
clk: rockchip: rk3368: fix parents of video encoder/decoder
clk: rockchip: rk3368: fix cpuclk core dividers
clk: rockchip: rk3368: fix cpuclk mux bit of big cpu-cluster
mmc: sdhci: Fix override of timeout clk wrt max_busy_timeout
mmc: sdhci: fix data timeout (part 2)
mmc: sdhci: fix data timeout (part 1)
mmc: mmc_spi: Add Card Detect comments and fix CD GPIO case
mmc: block: fix ABI regression of mmc_blk_ioctl
ideapad-laptop: Add ideapad Y700 (15) to the no_hw_rfkill DMI list
MAINTAINERS: Update mailing list and web page for hwmon subsystem
kbuild/mkspec: fix grub2 installkernel issue
scripts/kconfig: allow building with make 3.80 again
scripts/coccinelle: modernize &
bitops: Do not default to __clear_bit() for __clear_bit_unlock()
tracing: Fix trace_printk() to print when not using bprintk()
tracing: Fix crash from reading trace_pipe with sendfile
tracing: Have preempt(irqs)off trace preempt disabled functions
IB/ipoib: fix for rare multicast join race condition
drm/amdgpu: include the right version of gmc header files for iceland
drm/amdgpu: disable runtime pm on PX laptops without dGPU power control
drm/radeon: Don't drop DP 2.7 Ghz link setup on some cards.
drm/radeon: disable runtime pm on PX laptops without dGPU power control
iwlwifi: mvm: Fix paging memory leak
ipr: Fix regression when loading firmware
ipr: Fix out-of-bounds null overwrite
rapidio/rionet: fix deadlock on SMP
fs/coredump: prevent fsuid=0 dumps into user-controlled directories
fuse: Add reference counting for fuse_io_priv
fuse: do not use iocb after it may have been freed
md: multipath: don't hardcopy bio in .make_request path
md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
RAID5: revert e9e4c377e2
to fix a livelock
RAID5: check_reshape() shouldn't call mddev_suspend
md/raid5: Compare apples to apples (or sectors to sectors)
raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
xfs: fix two memory leaks in xfs_attr_list.c error paths
quota: Fix possible GPF due to uninitialised pointers
ARC: bitops: Remove non relevant comments
ARC: [BE] readl()/writel() to work in Big Endian CPU configuration
xtensa: clear all DBREAKC registers on start
xtensa: fix preemption in {clear,copy}_user_highpage
xtensa: ISS: don't hang if stdin EOF is reached
splice: handle zero nr_pages in splice_to_pipe()
vfs: show_vfsstat: do not ignore errors from show_devname method
of: alloc anywhere from memblock if range not specified
net: mvneta: enable change MAC address when interface is up
cgroup: ignore css_sets associated with dead cgroups during migration
Bluetooth: Fix potential buffer overflow with Add Advertising
Bluetooth: Add new AR3012 ID 0489:e095
watchdog: rc32434_wdt: fix ioctl error handling
watchdog: don't run proc_watchdog_update if new value is same as old
ia64: define ioremap_uc()
mm: memcontrol: reclaim and OOM kill when shrinking memory.max below usage
mm: memcontrol: reclaim when shrinking memory.high below usage
bcache: fix cache_set_flush() NULL pointer dereference on OOM
bcache: fix race of writeback thread starting before complete initialization
bcache: cleaned up error handling around register_cache()
IB/srpt: Simplify srpt_handle_tsk_mgmt()
brd: Fix discard request processing
jbd2: fix FS corruption possibility in jbd2_journal_destroy() on umount path
tools/hv: Use include/uapi with __EXPORTED_HEADERS__
ALSA: hda - Fix unconditional GPIO toggle via automute
ALSA: hda - fix the mic mute button and led problem for a Lenovo AIO
ALSA: hda - Don't handle ELD notify from invalid port
ALSA: intel8x0: Add clock quirk entry for AD1981B on IBM ThinkPad X41.
ALSA: pcm: Avoid "BUG:" string for warnings again
ALSA: hda - Apply reboot D3 fix for CX20724 codec, too
mtip32xx: Cleanup queued requests after surprise removal
mtip32xx: Implement timeout handler
mtip32xx: Handle FTL rebuild failure state during device initialization
mtip32xx: Handle safe removal during IO
mtip32xx: Fix for rmmod crash when drive is in FTL rebuild
mtip32xx: Print exact time when an internal command is interrupted
mtip32xx: Remove unwanted code from taskfile error handler
mtip32xx: Fix broken service thread handling
mtip32xx: Avoid issuing standby immediate cmd during FTL rebuild
media: v4l2-compat-ioctl32: fix missing length copy in put_v4l2_buffer32
coda: fix first encoded frame payload
bttv: Width must be a multiple of 16 when capturing planar formats
adv7511: TX_EDID_PRESENT is still 1 after a disconnect
saa7134: Fix bytesperline not being set correctly for planar formats
8250: use callbacks to access UART_DLL/UART_DLM
net: irda: Fix use-after-free in irtty_open()
tty: Fix GPF in flush_to_ldisc(), part 2
staging: comedi: ni_mio_common: fix the ni_write[blw]() functions
staging: android: ion_test: fix check of platform_device_register_simple() error code
staging: comedi: ni_tiocmd: change mistaken use of start_src for start_arg
HID: fix hid_ignore_special_drivers module parameter
HID: multitouch: force retrieving of Win8 signature blob
HID: i2c-hid: fix OOB write in i2c_hid_set_or_send_report()
HID: logitech: fix Dual Action gamepad support
tpm: fix the cleanup of struct tpm_chip
tpm_eventlog.c: fix binary_bios_measurements
tpm_crb: tpm2_shutdown() must be called before tpm_chip_unregister()
tpm: fix the rollback in tpm_chip_register()
mei: bus: check if the device is enabled before data transfer
X.509: Fix leap year handling again
crypto: marvell/cesa - forward devm_ioremap_resource() error code
crypto: ux500 - fix checks of error code returned by devm_ioremap_resource()
crypto: atmel - fix checks of error code returned by devm_ioremap_resource()
crypto: keywrap - memzero the correct memory
crypto: ccp - memset request context to zero during import
crypto: ccp - Don't assume export/import areas are aligned
crypto: ccp - Limit the amount of information exported
crypto: ccp - Add hash state import and export support
Bluetooth: btusb: Add a new AR3012 ID 13d3:3472
Bluetooth: btusb: Add a new AR3012 ID 04ca:3014
Bluetooth: btusb: Add new AR3012 ID 13d3:3395
ALSA: usb-audio: Fix double-free in error paths after snd_usb_add_audio_stream() call
ALSA: usb-audio: Minor code cleanup in create_fixed_stream_quirk()
ALSA: usb-audio: add Microsoft HD-5001 to quirks
ALSA: usb-audio: Add sanity checks for endpoint accesses
ALSA: usb-audio: Fix NULL dereference in create_fixed_stream_quirk()
Input: powermate - fix oops with malicious USB descriptors
pwc: Add USB id for Philips Spc880nc webcam
USB: option: add "D-Link DWM-221 B1" device id
USB: serial: ftdi_sio: Add support for ICP DAS I-756xU devices
USB: serial: cp210x: Adding GE Healthcare Device ID
USB: cypress_m8: add endpoint sanity check
USB: digi_acceleport: do sanity checking for the number of ports
USB: mct_u232: add sanity checking in probe
USB: usb_driver_claim_interface: add sanity checking
USB: iowarrior: fix oops with malicious USB descriptors
USB: cdc-acm: more sanity checking
USB: uas: Reduce can_queue to MAX_CMNDS
usb: hub: fix a typo in hub_port_init() leading to wrong logic
usb: retry reset if a device times out
dm: fix rq_end_stats() NULL pointer in dm_requeue_original_request()
dm cache: make sure every metadata function checks fail_io
dm thin metadata: don't issue prefetches if a transaction abort has failed
dm: fix excessive dm-mq context switching
dm snapshot: disallow the COW and origin devices from being identical
libnvdimm: Fix security issue with DSM IOCTL.
aic7xxx: Fix queue depth handling
be2iscsi: set the boot_kset pointer to NULL in case of failure
scsi: storvsc: fix SRB_STATUS_ABORTED handling
sd: Fix discard granularity when LBPRZ=1
aacraid: Set correct msix count for EEH recovery
aacraid: Fix memory leak in aac_fib_map_free
aacraid: Fix RRQ overload
sg: fix dxferp in from_to case
x86/mm: TLB_REMOTE_SEND_IPI should count pages
x86/iopl: Fix iopl capability check on Xen PV
x86/iopl/64: Properly context-switch IOPL on Xen PV
x86/apic: Fix suspicious RCU usage in smp_trace_call_function_interrupt()
x86/irq: Cure live lock in fixup_irqs()
PCI: ACPI: IA64: fix IO port generic range check
PCI: Disable IO/MEM decoding for devices with non-compliant BARs
pinctrl-bcm2835: Fix cut-and-paste error in "pull" parsing
s390/pci: enforce fmb page boundary rule
s390/cpumf: add missing lpp magic initialization
s390: fix floating pointer register corruption (again)
EDAC, amd64_edac: Shift wrapping issue in f1x_get_norm_dct_addr()
EDAC/sb_edac: Fix computation of channel address
sched/preempt, sh: kmap_coherent relies on disabled preemption
sched/cputime: Fix steal_account_process_tick() to always return jiffies
Thermal: Ignore invalid trip points
perf tools: Fix python extension build
perf tools: Fix checking asprintf return value
perf tools: Dont stop PMU parsing on alias parse error
perf/core: Fix perf_sched_count derailment
KVM: VMX: fix nested vpid for old KVM guests
KVM: VMX: avoid guest hang on invalid invvpid instruction
KVM: VMX: avoid guest hang on invalid invept instruction
KVM: fix spin_lock_init order on x86
KVM: i8254: change PIT discard tick policy
KVM: x86: fix missed hardware breakpoints
x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs
perf/x86/intel: Add definition for PT PMI bit
x86/entry/compat: Keep TS_COMPAT set during signal delivery
x86/microcode: Untangle from BLK_DEV_INITRD
x86/microcode/intel: Make early loader look for builtin microcode too
mmc: sh_mmcif: Correct TX DMA channel allocation
mmc: sh_mmcif: rework dma channel handling
ASoC: samsung: pass DMA channels as pointers
regulator: core: Fix nested locking of supplies
regulator: core: avoid unused variable warning
s390/cpumf: Fix lpp detection
cpufreq: dt: No need to allocate resources anymore
cpufreq: dt: No need to fetch voltage-tolerance
cpufreq: dt: Use dev_pm_opp_set_rate() to switch frequency
cpufreq: dt: Reuse dev_pm_opp_get_max_transition_latency()
cpufreq: dt: Unsupported OPPs are already disabled
cpufreq: dt: Pass regulator name to the OPP core
cpufreq: dt: OPP layers handles clock-latency for V1 bindings as well
cpufreq: dt: Rename 'need_update' to 'opp_v1'
cpufreq: dt: Convert few pr_debug/err() calls to dev_dbg/err()
cpufreq-dt: fix handling regulator_get_voltage() result
cpufreq-dt: Supply power coefficient when registering cooling devices
PM / OPP: Rename structures for clarity
PM / OPP: Fix incorrect comments
PM / OPP: Initialize regulator pointer to an error value
PM / OPP: Initialize u_volt_min/max to a valid value
PM / OPP: Fix NULL pointer dereference crash when disabling OPPs
PM / OPP: Add dev_pm_opp_set_rate()
PM / OPP: Manage device clk
PM / OPP: Parse clock-latency and voltage-tolerance for v1 bindings
PM / OPP: Introduce dev_pm_opp_get_max_transition_latency()
PM / OPP: Introduce dev_pm_opp_get_max_volt_latency()
PM / OPP: Disable OPPs that aren't supported by the regulator
PM / OPP: get/put regulators from OPP core
cpufreq: cpufreq-dt: avoid uninitialized variable warnings:
PM / OPP: Use snprintf() instead of sprintf()
PM / OPP: Set cpu_dev->id in cpumask first
PM / OPP: Fix parsing of opp-microvolt and opp-microamp properties
PM / OPP: Parse 'opp-<prop>-<name>' bindings
PM / OPP: Parse 'opp-supported-hw' binding
PM / OPP: Add missing doc comments
PM / OPP: Rename OPP nodes as opp@<opp-hz>
PM / OPP: Remove 'operating-points-names' binding
PM / OPP: Add {opp-microvolt|opp-microamp}-<name> binding
PM / OPP: Add "opp-supported-hw" binding
PM / OPP: Add debugfs support
arm64: vdso: Mark vDSO code as read-only
Conflicts:
drivers/staging/android/ion/ion.c
mm/page_alloc.c
CRs-Fixed: 1010239
Change-Id: Id59539cad642885e1e41340cebae4159ba1f7eaf
Signed-off-by: Trilok Soni <tsoni@codeaurora.org>
1185 lines
33 KiB
C
1185 lines
33 KiB
C
/*
|
|
* Detect hard and soft lockups on a system
|
|
*
|
|
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
|
|
*
|
|
* Note: Most of this code is borrowed heavily from the original softlockup
|
|
* detector, so thanks to Ingo for the initial implementation.
|
|
* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
|
|
* to those contributors as well.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "NMI watchdog: " fmt
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/nmi.h>
|
|
#include <linux/init.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/smpboot.h>
|
|
#include <linux/sched/rt.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <asm/irq_regs.h>
|
|
#include <linux/kvm_para.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/kthread.h>
|
|
|
|
/*
|
|
* The run state of the lockup detectors is controlled by the content of the
|
|
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
|
|
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
|
|
*
|
|
* 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
|
|
* are variables that are only used as an 'interface' between the parameters
|
|
* in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
|
|
* 'watchdog_thresh' variable is handled differently because its value is not
|
|
* boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
|
|
* is equal zero.
|
|
*/
|
|
#define NMI_WATCHDOG_ENABLED_BIT 0
|
|
#define SOFT_WATCHDOG_ENABLED_BIT 1
|
|
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
|
|
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
|
|
|
|
static DEFINE_MUTEX(watchdog_proc_mutex);
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
|
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
|
|
#else
|
|
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
|
|
#endif
|
|
int __read_mostly nmi_watchdog_enabled;
|
|
int __read_mostly soft_watchdog_enabled;
|
|
int __read_mostly watchdog_user_enabled;
|
|
int __read_mostly watchdog_thresh = 10;
|
|
|
|
#ifdef CONFIG_SMP
|
|
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
|
|
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
|
|
#else
|
|
#define sysctl_softlockup_all_cpu_backtrace 0
|
|
#define sysctl_hardlockup_all_cpu_backtrace 0
|
|
#endif
|
|
static struct cpumask watchdog_cpumask __read_mostly;
|
|
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
|
|
|
|
/* Helper for online, unparked cpus. */
|
|
#define for_each_watchdog_cpu(cpu) \
|
|
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
|
|
|
|
/*
|
|
* The 'watchdog_running' variable is set to 1 when the watchdog threads
|
|
* are registered/started and is set to 0 when the watchdog threads are
|
|
* unregistered/stopped, so it is an indicator whether the threads exist.
|
|
*/
|
|
static int __read_mostly watchdog_running;
|
|
/*
|
|
* If a subsystem has a need to deactivate the watchdog temporarily, it
|
|
* can use the suspend/resume interface to achieve this. The content of
|
|
* the 'watchdog_suspended' variable reflects this state. Existing threads
|
|
* are parked/unparked by the lockup_detector_{suspend|resume} functions
|
|
* (see comment blocks pertaining to those functions for further details).
|
|
*
|
|
* 'watchdog_suspended' also prevents threads from being registered/started
|
|
* or unregistered/stopped via parameters in /proc/sys/kernel, so the state
|
|
* of 'watchdog_running' cannot change while the watchdog is deactivated
|
|
* temporarily (see related code in 'proc' handlers).
|
|
*/
|
|
static int __read_mostly watchdog_suspended;
|
|
|
|
static u64 __read_mostly sample_period;
|
|
|
|
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
|
|
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
|
|
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
|
|
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
|
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
|
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
|
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
|
|
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
|
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
|
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
|
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
|
#endif
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
|
|
static cpumask_t __read_mostly watchdog_cpus;
|
|
#endif
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
|
|
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
|
#endif
|
|
static unsigned long soft_lockup_nmi_warn;
|
|
|
|
/* boot commands */
|
|
/*
|
|
* Should we panic when a soft-lockup or hard-lockup occurs:
|
|
*/
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
|
unsigned int __read_mostly hardlockup_panic =
|
|
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
|
|
static unsigned long hardlockup_allcpu_dumped;
|
|
#endif
|
|
/*
|
|
* We may not want to enable hard lockup detection by default in all cases,
|
|
* for example when running the kernel as a guest on a hypervisor. In these
|
|
* cases this function can be called to disable hard lockup detection. This
|
|
* function should only be executed once by the boot processor before the
|
|
* kernel command line parameters are parsed, because otherwise it is not
|
|
* possible to override this in hardlockup_panic_setup().
|
|
*/
|
|
void hardlockup_detector_disable(void)
|
|
{
|
|
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
|
|
}
|
|
|
|
static int __init hardlockup_panic_setup(char *str)
|
|
{
|
|
if (!strncmp(str, "panic", 5))
|
|
hardlockup_panic = 1;
|
|
else if (!strncmp(str, "nopanic", 7))
|
|
hardlockup_panic = 0;
|
|
else if (!strncmp(str, "0", 1))
|
|
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
|
|
else if (!strncmp(str, "1", 1))
|
|
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
|
|
return 1;
|
|
}
|
|
__setup("nmi_watchdog=", hardlockup_panic_setup);
|
|
#endif
|
|
|
|
unsigned int __read_mostly softlockup_panic =
|
|
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
|
|
|
|
static int __init softlockup_panic_setup(char *str)
|
|
{
|
|
softlockup_panic = simple_strtoul(str, NULL, 0);
|
|
|
|
return 1;
|
|
}
|
|
__setup("softlockup_panic=", softlockup_panic_setup);
|
|
|
|
static int __init nowatchdog_setup(char *str)
|
|
{
|
|
watchdog_enabled = 0;
|
|
return 1;
|
|
}
|
|
__setup("nowatchdog", nowatchdog_setup);
|
|
|
|
static int __init nosoftlockup_setup(char *str)
|
|
{
|
|
watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
|
|
return 1;
|
|
}
|
|
__setup("nosoftlockup", nosoftlockup_setup);
|
|
|
|
#ifdef CONFIG_SMP
|
|
static int __init softlockup_all_cpu_backtrace_setup(char *str)
|
|
{
|
|
sysctl_softlockup_all_cpu_backtrace =
|
|
!!simple_strtol(str, NULL, 0);
|
|
return 1;
|
|
}
|
|
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
|
|
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
|
|
{
|
|
sysctl_hardlockup_all_cpu_backtrace =
|
|
!!simple_strtol(str, NULL, 0);
|
|
return 1;
|
|
}
|
|
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
|
|
#endif
|
|
|
|
/*
|
|
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
|
|
* lockups can have false positives under extreme conditions. So we generally
|
|
* want a higher threshold for soft lockups than for hard lockups. So we couple
|
|
* the thresholds with a factor: we make the soft threshold twice the amount of
|
|
* time the hard threshold is.
|
|
*/
|
|
static int get_softlockup_thresh(void)
|
|
{
|
|
return watchdog_thresh * 2;
|
|
}
|
|
|
|
/*
|
|
* Returns seconds, approximately. We don't need nanosecond
|
|
* resolution, and we don't need to waste time with a big divide when
|
|
* 2^30ns == 1.074s.
|
|
*/
|
|
static unsigned long get_timestamp(void)
|
|
{
|
|
return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
|
|
}
|
|
|
|
static void set_sample_period(void)
|
|
{
|
|
/*
|
|
* convert watchdog_thresh from seconds to ns
|
|
* the divide by 5 is to give hrtimer several chances (two
|
|
* or three with the current relation between the soft
|
|
* and hard thresholds) to increment before the
|
|
* hardlockup detector generates a warning
|
|
*/
|
|
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
|
|
}
|
|
|
|
/* Commands for resetting the watchdog */
|
|
static void __touch_watchdog(void)
|
|
{
|
|
__this_cpu_write(watchdog_touch_ts, get_timestamp());
|
|
}
|
|
|
|
/**
|
|
* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
|
|
*
|
|
* Call when the scheduler may have stalled for legitimate reasons
|
|
* preventing the watchdog task from executing - e.g. the scheduler
|
|
* entering idle state. This should only be used for scheduler events.
|
|
* Use touch_softlockup_watchdog() for everything else.
|
|
*/
|
|
void touch_softlockup_watchdog_sched(void)
|
|
{
|
|
/*
|
|
* Preemption can be enabled. It doesn't matter which CPU's timestamp
|
|
* gets zeroed here, so use the raw_ operation.
|
|
*/
|
|
raw_cpu_write(watchdog_touch_ts, 0);
|
|
}
|
|
|
|
void touch_softlockup_watchdog(void)
|
|
{
|
|
touch_softlockup_watchdog_sched();
|
|
wq_watchdog_touch(raw_smp_processor_id());
|
|
}
|
|
EXPORT_SYMBOL(touch_softlockup_watchdog);
|
|
|
|
void touch_all_softlockup_watchdogs(void)
|
|
{
|
|
int cpu;
|
|
|
|
/*
|
|
* this is done lockless
|
|
* do we care if a 0 races with a timestamp?
|
|
* all it means is the softlock check starts one cycle later
|
|
*/
|
|
for_each_watchdog_cpu(cpu)
|
|
per_cpu(watchdog_touch_ts, cpu) = 0;
|
|
wq_watchdog_touch(-1);
|
|
}
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
|
void touch_nmi_watchdog(void)
|
|
{
|
|
/*
|
|
* Using __raw here because some code paths have
|
|
* preemption enabled. If preemption is enabled
|
|
* then interrupts should be enabled too, in which
|
|
* case we shouldn't have to worry about the watchdog
|
|
* going off.
|
|
*/
|
|
raw_cpu_write(watchdog_nmi_touch, true);
|
|
touch_softlockup_watchdog();
|
|
}
|
|
EXPORT_SYMBOL(touch_nmi_watchdog);
|
|
|
|
#endif
|
|
|
|
void touch_softlockup_watchdog_sync(void)
|
|
{
|
|
__this_cpu_write(softlockup_touch_sync, true);
|
|
__this_cpu_write(watchdog_touch_ts, 0);
|
|
}
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
|
|
/* watchdog detector functions */
|
|
static bool is_hardlockup(void)
|
|
{
|
|
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
|
|
|
|
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
|
|
return true;
|
|
|
|
__this_cpu_write(hrtimer_interrupts_saved, hrint);
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
|
|
static unsigned int watchdog_next_cpu(unsigned int cpu)
|
|
{
|
|
cpumask_t cpus = watchdog_cpus;
|
|
unsigned int next_cpu;
|
|
|
|
next_cpu = cpumask_next(cpu, &cpus);
|
|
if (next_cpu >= nr_cpu_ids)
|
|
next_cpu = cpumask_first(&cpus);
|
|
|
|
if (next_cpu == cpu)
|
|
return nr_cpu_ids;
|
|
|
|
return next_cpu;
|
|
}
|
|
|
|
static int is_hardlockup_other_cpu(unsigned int cpu)
|
|
{
|
|
unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
|
|
|
|
if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
|
|
return 1;
|
|
|
|
per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
|
|
return 0;
|
|
}
|
|
|
|
static void watchdog_check_hardlockup_other_cpu(void)
|
|
{
|
|
unsigned int next_cpu;
|
|
|
|
/*
|
|
* Test for hardlockups every 3 samples. The sample period is
|
|
* watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
|
|
* watchdog_thresh (over by 20%).
|
|
*/
|
|
if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
|
|
return;
|
|
|
|
/* check for a hardlockup on the next cpu */
|
|
next_cpu = watchdog_next_cpu(smp_processor_id());
|
|
if (next_cpu >= nr_cpu_ids)
|
|
return;
|
|
|
|
smp_rmb();
|
|
|
|
if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
|
|
per_cpu(watchdog_nmi_touch, next_cpu) = false;
|
|
return;
|
|
}
|
|
|
|
if (is_hardlockup_other_cpu(next_cpu)) {
|
|
/* only warn once */
|
|
if (per_cpu(hard_watchdog_warn, next_cpu) == true)
|
|
return;
|
|
|
|
if (hardlockup_panic)
|
|
panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
|
|
else
|
|
WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
|
|
|
|
per_cpu(hard_watchdog_warn, next_cpu) = true;
|
|
} else {
|
|
per_cpu(hard_watchdog_warn, next_cpu) = false;
|
|
}
|
|
}
|
|
#else
|
|
static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
|
|
#endif
|
|
|
|
static int is_softlockup(unsigned long touch_ts)
|
|
{
|
|
unsigned long now = get_timestamp();
|
|
|
|
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
|
|
/* Warn about unreasonable delays. */
|
|
if (time_after(now, touch_ts + get_softlockup_thresh()))
|
|
return now - touch_ts;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
|
|
|
|
static struct perf_event_attr wd_hw_attr = {
|
|
.type = PERF_TYPE_HARDWARE,
|
|
.config = PERF_COUNT_HW_CPU_CYCLES,
|
|
.size = sizeof(struct perf_event_attr),
|
|
.pinned = 1,
|
|
.disabled = 1,
|
|
};
|
|
|
|
/* Callback function for perf event subsystem */
|
|
static void watchdog_overflow_callback(struct perf_event *event,
|
|
struct perf_sample_data *data,
|
|
struct pt_regs *regs)
|
|
{
|
|
/* Ensure the watchdog never gets throttled */
|
|
event->hw.interrupts = 0;
|
|
|
|
if (__this_cpu_read(watchdog_nmi_touch) == true) {
|
|
__this_cpu_write(watchdog_nmi_touch, false);
|
|
return;
|
|
}
|
|
|
|
/* check for a hardlockup
|
|
* This is done by making sure our timer interrupt
|
|
* is incrementing. The timer interrupt should have
|
|
* fired multiple times before we overflow'd. If it hasn't
|
|
* then this is a good indication the cpu is stuck
|
|
*/
|
|
if (is_hardlockup()) {
|
|
int this_cpu = smp_processor_id();
|
|
struct pt_regs *regs = get_irq_regs();
|
|
|
|
/* only print hardlockups once */
|
|
if (__this_cpu_read(hard_watchdog_warn) == true)
|
|
return;
|
|
|
|
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
|
|
print_modules();
|
|
print_irqtrace_events(current);
|
|
if (regs)
|
|
show_regs(regs);
|
|
else
|
|
dump_stack();
|
|
|
|
/*
|
|
* Perform all-CPU dump only once to avoid multiple hardlockups
|
|
* generating interleaving traces
|
|
*/
|
|
if (sysctl_hardlockup_all_cpu_backtrace &&
|
|
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
|
|
trigger_allbutself_cpu_backtrace();
|
|
|
|
if (hardlockup_panic)
|
|
panic("Hard LOCKUP");
|
|
|
|
__this_cpu_write(hard_watchdog_warn, true);
|
|
return;
|
|
}
|
|
|
|
__this_cpu_write(hard_watchdog_warn, false);
|
|
return;
|
|
}
|
|
#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
|
|
|
|
static void watchdog_interrupt_count(void)
|
|
{
|
|
__this_cpu_inc(hrtimer_interrupts);
|
|
}
|
|
|
|
static int watchdog_nmi_enable(unsigned int cpu);
|
|
static void watchdog_nmi_disable(unsigned int cpu);
|
|
|
|
static int watchdog_enable_all_cpus(void);
|
|
static void watchdog_disable_all_cpus(void);
|
|
|
|
/* watchdog kicker functions */
|
|
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|
{
|
|
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
|
|
struct pt_regs *regs = get_irq_regs();
|
|
int duration;
|
|
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
|
|
|
/* kick the hardlockup detector */
|
|
watchdog_interrupt_count();
|
|
|
|
/* test for hardlockups on the next cpu */
|
|
watchdog_check_hardlockup_other_cpu();
|
|
|
|
/* kick the softlockup detector */
|
|
wake_up_process(__this_cpu_read(softlockup_watchdog));
|
|
|
|
/* .. and repeat */
|
|
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
|
|
|
|
if (touch_ts == 0) {
|
|
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
|
|
/*
|
|
* If the time stamp was touched atomically
|
|
* make sure the scheduler tick is up to date.
|
|
*/
|
|
__this_cpu_write(softlockup_touch_sync, false);
|
|
sched_clock_tick();
|
|
}
|
|
|
|
/* Clear the guest paused flag on watchdog reset */
|
|
kvm_check_and_clear_guest_paused();
|
|
__touch_watchdog();
|
|
return HRTIMER_RESTART;
|
|
}
|
|
|
|
/* check for a softlockup
|
|
* This is done by making sure a high priority task is
|
|
* being scheduled. The task touches the watchdog to
|
|
* indicate it is getting cpu time. If it hasn't then
|
|
* this is a good indication some task is hogging the cpu
|
|
*/
|
|
duration = is_softlockup(touch_ts);
|
|
if (unlikely(duration)) {
|
|
/*
|
|
* If a virtual machine is stopped by the host it can look to
|
|
* the watchdog like a soft lockup, check to see if the host
|
|
* stopped the vm before we issue the warning
|
|
*/
|
|
if (kvm_check_and_clear_guest_paused())
|
|
return HRTIMER_RESTART;
|
|
|
|
/* only warn once */
|
|
if (__this_cpu_read(soft_watchdog_warn) == true) {
|
|
/*
|
|
* When multiple processes are causing softlockups the
|
|
* softlockup detector only warns on the first one
|
|
* because the code relies on a full quiet cycle to
|
|
* re-arm. The second process prevents the quiet cycle
|
|
* and never gets reported. Use task pointers to detect
|
|
* this.
|
|
*/
|
|
if (__this_cpu_read(softlockup_task_ptr_saved) !=
|
|
current) {
|
|
__this_cpu_write(soft_watchdog_warn, false);
|
|
__touch_watchdog();
|
|
}
|
|
return HRTIMER_RESTART;
|
|
}
|
|
|
|
if (softlockup_all_cpu_backtrace) {
|
|
/* Prevent multiple soft-lockup reports if one cpu is already
|
|
* engaged in dumping cpu back traces
|
|
*/
|
|
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
|
|
/* Someone else will report us. Let's give up */
|
|
__this_cpu_write(soft_watchdog_warn, true);
|
|
return HRTIMER_RESTART;
|
|
}
|
|
}
|
|
|
|
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
|
smp_processor_id(), duration,
|
|
current->comm, task_pid_nr(current));
|
|
__this_cpu_write(softlockup_task_ptr_saved, current);
|
|
print_modules();
|
|
print_irqtrace_events(current);
|
|
if (regs)
|
|
show_regs(regs);
|
|
else
|
|
dump_stack();
|
|
|
|
if (softlockup_all_cpu_backtrace) {
|
|
/* Avoid generating two back traces for current
|
|
* given that one is already made above
|
|
*/
|
|
trigger_allbutself_cpu_backtrace();
|
|
|
|
clear_bit(0, &soft_lockup_nmi_warn);
|
|
/* Barrier to sync with other cpus */
|
|
smp_mb__after_atomic();
|
|
}
|
|
|
|
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
|
|
if (softlockup_panic)
|
|
panic("softlockup: hung tasks");
|
|
__this_cpu_write(soft_watchdog_warn, true);
|
|
} else
|
|
__this_cpu_write(soft_watchdog_warn, false);
|
|
|
|
return HRTIMER_RESTART;
|
|
}
|
|
|
|
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
|
|
{
|
|
struct sched_param param = { .sched_priority = prio };
|
|
|
|
sched_setscheduler(current, policy, ¶m);
|
|
}
|
|
|
|
static void watchdog_enable(unsigned int cpu)
|
|
{
|
|
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
|
|
|
|
/* kick off the timer for the hardlockup detector */
|
|
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
|
hrtimer->function = watchdog_timer_fn;
|
|
|
|
/* Enable the perf event */
|
|
watchdog_nmi_enable(cpu);
|
|
|
|
/* done here because hrtimer_start can only pin to smp_processor_id() */
|
|
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
|
|
HRTIMER_MODE_REL_PINNED);
|
|
|
|
/* initialize timestamp */
|
|
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
|
|
__touch_watchdog();
|
|
}
|
|
|
|
static void watchdog_disable(unsigned int cpu)
|
|
{
|
|
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
|
|
|
|
watchdog_set_prio(SCHED_NORMAL, 0);
|
|
hrtimer_cancel(hrtimer);
|
|
/* disable the perf event */
|
|
watchdog_nmi_disable(cpu);
|
|
}
|
|
|
|
static void watchdog_cleanup(unsigned int cpu, bool online)
|
|
{
|
|
watchdog_disable(cpu);
|
|
}
|
|
|
|
static int watchdog_should_run(unsigned int cpu)
|
|
{
|
|
return __this_cpu_read(hrtimer_interrupts) !=
|
|
__this_cpu_read(soft_lockup_hrtimer_cnt);
|
|
}
|
|
|
|
/*
|
|
* The watchdog thread function - touches the timestamp.
|
|
*
|
|
* It only runs once every sample_period seconds (4 seconds by
|
|
* default) to reset the softlockup timestamp. If this gets delayed
|
|
* for more than 2*watchdog_thresh seconds then the debug-printout
|
|
* triggers in watchdog_timer_fn().
|
|
*/
|
|
static void watchdog(unsigned int cpu)
|
|
{
|
|
__this_cpu_write(soft_lockup_hrtimer_cnt,
|
|
__this_cpu_read(hrtimer_interrupts));
|
|
__touch_watchdog();
|
|
|
|
/*
|
|
* watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
|
|
* failure path. Check for failures that can occur asynchronously -
|
|
* for example, when CPUs are on-lined - and shut down the hardware
|
|
* perf event on each CPU accordingly.
|
|
*
|
|
* The only non-obvious place this bit can be cleared is through
|
|
* watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
|
|
* pr_info here would be too noisy as it would result in a message
|
|
* every few seconds if the hardlockup was disabled but the softlockup
|
|
* enabled.
|
|
*/
|
|
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
|
|
watchdog_nmi_disable(cpu);
|
|
}
|
|
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
|
|
/*
|
|
* People like the simple clean cpu node info on boot.
|
|
* Reduce the watchdog noise by only printing messages
|
|
* that are different from what cpu0 displayed.
|
|
*/
|
|
static unsigned long cpu0_err;
|
|
|
|
static int watchdog_nmi_enable(unsigned int cpu)
|
|
{
|
|
struct perf_event_attr *wd_attr;
|
|
struct perf_event *event = per_cpu(watchdog_ev, cpu);
|
|
|
|
/* nothing to do if the hard lockup detector is disabled */
|
|
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
|
|
goto out;
|
|
|
|
/* is it already setup and enabled? */
|
|
if (event && event->state > PERF_EVENT_STATE_OFF)
|
|
goto out;
|
|
|
|
/* it is setup but not enabled */
|
|
if (event != NULL)
|
|
goto out_enable;
|
|
|
|
wd_attr = &wd_hw_attr;
|
|
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
|
|
|
|
/* Try to register using hardware perf events */
|
|
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
|
|
|
|
/* save cpu0 error for future comparision */
|
|
if (cpu == 0 && IS_ERR(event))
|
|
cpu0_err = PTR_ERR(event);
|
|
|
|
if (!IS_ERR(event)) {
|
|
/* only print for cpu0 or different than cpu0 */
|
|
if (cpu == 0 || cpu0_err)
|
|
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
|
|
goto out_save;
|
|
}
|
|
|
|
/*
|
|
* Disable the hard lockup detector if _any_ CPU fails to set up
|
|
* set up the hardware perf event. The watchdog() function checks
|
|
* the NMI_WATCHDOG_ENABLED bit periodically.
|
|
*
|
|
* The barriers are for syncing up watchdog_enabled across all the
|
|
* cpus, as clear_bit() does not use barriers.
|
|
*/
|
|
smp_mb__before_atomic();
|
|
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
|
|
smp_mb__after_atomic();
|
|
|
|
/* skip displaying the same error again */
|
|
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
|
|
return PTR_ERR(event);
|
|
|
|
/* vary the KERN level based on the returned errno */
|
|
if (PTR_ERR(event) == -EOPNOTSUPP)
|
|
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
|
|
else if (PTR_ERR(event) == -ENOENT)
|
|
pr_warn("disabled (cpu%i): hardware events not enabled\n",
|
|
cpu);
|
|
else
|
|
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
|
|
cpu, PTR_ERR(event));
|
|
|
|
pr_info("Shutting down hard lockup detector on all cpus\n");
|
|
|
|
return PTR_ERR(event);
|
|
|
|
/* success path */
|
|
out_save:
|
|
per_cpu(watchdog_ev, cpu) = event;
|
|
out_enable:
|
|
perf_event_enable(per_cpu(watchdog_ev, cpu));
|
|
out:
|
|
return 0;
|
|
}
|
|
|
|
static void watchdog_nmi_disable(unsigned int cpu)
|
|
{
|
|
struct perf_event *event = per_cpu(watchdog_ev, cpu);
|
|
|
|
if (event) {
|
|
perf_event_disable(event);
|
|
per_cpu(watchdog_ev, cpu) = NULL;
|
|
|
|
/* should be in cleanup, but blocks oprofile */
|
|
perf_event_release_kernel(event);
|
|
}
|
|
if (cpu == 0) {
|
|
/* watchdog_nmi_enable() expects this to be zero initially. */
|
|
cpu0_err = 0;
|
|
}
|
|
}
|
|
|
|
#else
|
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
|
|
static int watchdog_nmi_enable(unsigned int cpu)
|
|
{
|
|
/*
|
|
* The new cpu will be marked online before the first hrtimer interrupt
|
|
* runs on it. If another cpu tests for a hardlockup on the new cpu
|
|
* before it has run its first hrtimer, it will get a false positive.
|
|
* Touch the watchdog on the new cpu to delay the first check for at
|
|
* least 3 sampling periods to guarantee one hrtimer has run on the new
|
|
* cpu.
|
|
*/
|
|
per_cpu(watchdog_nmi_touch, cpu) = true;
|
|
smp_wmb();
|
|
cpumask_set_cpu(cpu, &watchdog_cpus);
|
|
return 0;
|
|
}
|
|
|
|
static void watchdog_nmi_disable(unsigned int cpu)
|
|
{
|
|
unsigned int next_cpu = watchdog_next_cpu(cpu);
|
|
|
|
/*
|
|
* Offlining this cpu will cause the cpu before this one to start
|
|
* checking the one after this one. If this cpu just finished checking
|
|
* the next cpu and updating hrtimer_interrupts_saved, and then the
|
|
* previous cpu checks it within one sample period, it will trigger a
|
|
* false positive. Touch the watchdog on the next cpu to prevent it.
|
|
*/
|
|
if (next_cpu < nr_cpu_ids)
|
|
per_cpu(watchdog_nmi_touch, next_cpu) = true;
|
|
smp_wmb();
|
|
cpumask_clear_cpu(cpu, &watchdog_cpus);
|
|
}
|
|
#else
|
|
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
|
|
static void watchdog_nmi_disable(unsigned int cpu) { return; }
|
|
#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
|
|
#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
|
|
|
|
static struct smp_hotplug_thread watchdog_threads = {
|
|
.store = &softlockup_watchdog,
|
|
.thread_should_run = watchdog_should_run,
|
|
.thread_fn = watchdog,
|
|
.thread_comm = "watchdog/%u",
|
|
.setup = watchdog_enable,
|
|
.cleanup = watchdog_cleanup,
|
|
.park = watchdog_disable,
|
|
.unpark = watchdog_enable,
|
|
};
|
|
|
|
/*
|
|
* park all watchdog threads that are specified in 'watchdog_cpumask'
|
|
*
|
|
* This function returns an error if kthread_park() of a watchdog thread
|
|
* fails. In this situation, the watchdog threads of some CPUs can already
|
|
* be parked and the watchdog threads of other CPUs can still be runnable.
|
|
* Callers are expected to handle this special condition as appropriate in
|
|
* their context.
|
|
*
|
|
* This function may only be called in a context that is protected against
|
|
* races with CPU hotplug - for example, via get_online_cpus().
|
|
*/
|
|
static int watchdog_park_threads(void)
|
|
{
|
|
int cpu, ret = 0;
|
|
|
|
for_each_watchdog_cpu(cpu) {
|
|
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* unpark all watchdog threads that are specified in 'watchdog_cpumask'
|
|
*
|
|
* This function may only be called in a context that is protected against
|
|
* races with CPU hotplug - for example, via get_online_cpus().
|
|
*/
|
|
static void watchdog_unpark_threads(void)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_watchdog_cpu(cpu)
|
|
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
|
|
}
|
|
|
|
/*
|
|
* Suspend the hard and soft lockup detector by parking the watchdog threads.
|
|
*/
|
|
int lockup_detector_suspend(void)
|
|
{
|
|
int ret = 0;
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&watchdog_proc_mutex);
|
|
/*
|
|
* Multiple suspend requests can be active in parallel (counted by
|
|
* the 'watchdog_suspended' variable). If the watchdog threads are
|
|
* running, the first caller takes care that they will be parked.
|
|
* The state of 'watchdog_running' cannot change while a suspend
|
|
* request is active (see related code in 'proc' handlers).
|
|
*/
|
|
if (watchdog_running && !watchdog_suspended)
|
|
ret = watchdog_park_threads();
|
|
|
|
if (ret == 0)
|
|
watchdog_suspended++;
|
|
else {
|
|
watchdog_disable_all_cpus();
|
|
pr_err("Failed to suspend lockup detectors, disabled\n");
|
|
watchdog_enabled = 0;
|
|
}
|
|
|
|
mutex_unlock(&watchdog_proc_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Resume the hard and soft lockup detector by unparking the watchdog threads.
|
|
*/
|
|
void lockup_detector_resume(void)
|
|
{
|
|
mutex_lock(&watchdog_proc_mutex);
|
|
|
|
watchdog_suspended--;
|
|
/*
|
|
* The watchdog threads are unparked if they were previously running
|
|
* and if there is no more active suspend request.
|
|
*/
|
|
if (watchdog_running && !watchdog_suspended)
|
|
watchdog_unpark_threads();
|
|
|
|
mutex_unlock(&watchdog_proc_mutex);
|
|
put_online_cpus();
|
|
}
|
|
|
|
static int update_watchdog_all_cpus(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = watchdog_park_threads();
|
|
if (ret)
|
|
return ret;
|
|
|
|
watchdog_unpark_threads();
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int watchdog_enable_all_cpus(void)
|
|
{
|
|
int err = 0;
|
|
|
|
if (!watchdog_running) {
|
|
err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
|
|
&watchdog_cpumask);
|
|
if (err)
|
|
pr_err("Failed to create watchdog threads, disabled\n");
|
|
else
|
|
watchdog_running = 1;
|
|
} else {
|
|
/*
|
|
* Enable/disable the lockup detectors or
|
|
* change the sample period 'on the fly'.
|
|
*/
|
|
err = update_watchdog_all_cpus();
|
|
|
|
if (err) {
|
|
watchdog_disable_all_cpus();
|
|
pr_err("Failed to update lockup detectors, disabled\n");
|
|
}
|
|
}
|
|
|
|
if (err)
|
|
watchdog_enabled = 0;
|
|
|
|
return err;
|
|
}
|
|
|
|
static void watchdog_disable_all_cpus(void)
|
|
{
|
|
if (watchdog_running) {
|
|
watchdog_running = 0;
|
|
smpboot_unregister_percpu_thread(&watchdog_threads);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
/*
|
|
* Update the run state of the lockup detectors.
|
|
*/
|
|
static int proc_watchdog_update(void)
|
|
{
|
|
int err = 0;
|
|
|
|
/*
|
|
* Watchdog threads won't be started if they are already active.
|
|
* The 'watchdog_running' variable in watchdog_*_all_cpus() takes
|
|
* care of this. If those threads are already active, the sample
|
|
* period will be updated and the lockup detectors will be enabled
|
|
* or disabled 'on the fly'.
|
|
*/
|
|
if (watchdog_enabled && watchdog_thresh)
|
|
err = watchdog_enable_all_cpus();
|
|
else
|
|
watchdog_disable_all_cpus();
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
/*
|
|
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
|
|
*
|
|
* caller | table->data points to | 'which' contains the flag(s)
|
|
* -------------------|-----------------------|-----------------------------
|
|
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
|
|
* | | with SOFT_WATCHDOG_ENABLED
|
|
* -------------------|-----------------------|-----------------------------
|
|
* proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED
|
|
* -------------------|-----------------------|-----------------------------
|
|
* proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
|
|
*/
|
|
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int err, old, new;
|
|
int *watchdog_param = (int *)table->data;
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&watchdog_proc_mutex);
|
|
|
|
if (watchdog_suspended) {
|
|
/* no parameter changes allowed while watchdog is suspended */
|
|
err = -EAGAIN;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* If the parameter is being read return the state of the corresponding
|
|
* bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
|
|
* run state of the lockup detectors.
|
|
*/
|
|
if (!write) {
|
|
*watchdog_param = (watchdog_enabled & which) != 0;
|
|
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
} else {
|
|
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
if (err)
|
|
goto out;
|
|
|
|
/*
|
|
* There is a race window between fetching the current value
|
|
* from 'watchdog_enabled' and storing the new value. During
|
|
* this race window, watchdog_nmi_enable() can sneak in and
|
|
* clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
|
|
* The 'cmpxchg' detects this race and the loop retries.
|
|
*/
|
|
do {
|
|
old = watchdog_enabled;
|
|
/*
|
|
* If the parameter value is not zero set the
|
|
* corresponding bit(s), else clear it(them).
|
|
*/
|
|
if (*watchdog_param)
|
|
new = old | which;
|
|
else
|
|
new = old & ~which;
|
|
} while (cmpxchg(&watchdog_enabled, old, new) != old);
|
|
|
|
/*
|
|
* Update the run state of the lockup detectors. There is _no_
|
|
* need to check the value returned by proc_watchdog_update()
|
|
* and to restore the previous value of 'watchdog_enabled' as
|
|
* both lockup detectors are disabled if proc_watchdog_update()
|
|
* returns an error.
|
|
*/
|
|
if (old == new)
|
|
goto out;
|
|
|
|
err = proc_watchdog_update();
|
|
}
|
|
out:
|
|
mutex_unlock(&watchdog_proc_mutex);
|
|
put_online_cpus();
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* /proc/sys/kernel/watchdog
|
|
*/
|
|
int proc_watchdog(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
|
|
table, write, buffer, lenp, ppos);
|
|
}
|
|
|
|
/*
|
|
* /proc/sys/kernel/nmi_watchdog
|
|
*/
|
|
int proc_nmi_watchdog(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
|
|
table, write, buffer, lenp, ppos);
|
|
}
|
|
|
|
/*
|
|
* /proc/sys/kernel/soft_watchdog
|
|
*/
|
|
int proc_soft_watchdog(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
|
|
table, write, buffer, lenp, ppos);
|
|
}
|
|
|
|
/*
|
|
* /proc/sys/kernel/watchdog_thresh
|
|
*/
|
|
int proc_watchdog_thresh(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int err, old, new;
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&watchdog_proc_mutex);
|
|
|
|
if (watchdog_suspended) {
|
|
/* no parameter changes allowed while watchdog is suspended */
|
|
err = -EAGAIN;
|
|
goto out;
|
|
}
|
|
|
|
old = ACCESS_ONCE(watchdog_thresh);
|
|
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
if (err || !write)
|
|
goto out;
|
|
|
|
/*
|
|
* Update the sample period. Restore on failure.
|
|
*/
|
|
new = ACCESS_ONCE(watchdog_thresh);
|
|
if (old == new)
|
|
goto out;
|
|
|
|
set_sample_period();
|
|
err = proc_watchdog_update();
|
|
if (err) {
|
|
watchdog_thresh = old;
|
|
set_sample_period();
|
|
}
|
|
out:
|
|
mutex_unlock(&watchdog_proc_mutex);
|
|
put_online_cpus();
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* The cpumask is the mask of possible cpus that the watchdog can run
|
|
* on, not the mask of cpus it is actually running on. This allows the
|
|
* user to specify a mask that will include cpus that have not yet
|
|
* been brought online, if desired.
|
|
*/
|
|
int proc_watchdog_cpumask(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
int err;
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&watchdog_proc_mutex);
|
|
|
|
if (watchdog_suspended) {
|
|
/* no parameter changes allowed while watchdog is suspended */
|
|
err = -EAGAIN;
|
|
goto out;
|
|
}
|
|
|
|
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
|
|
if (!err && write) {
|
|
/* Remove impossible cpus to keep sysctl output cleaner. */
|
|
cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
|
|
cpu_possible_mask);
|
|
|
|
if (watchdog_running) {
|
|
/*
|
|
* Failure would be due to being unable to allocate
|
|
* a temporary cpumask, so we are likely not in a
|
|
* position to do much else to make things better.
|
|
*/
|
|
if (smpboot_update_cpumask_percpu_thread(
|
|
&watchdog_threads, &watchdog_cpumask) != 0)
|
|
pr_err("cpumask update failed\n");
|
|
}
|
|
}
|
|
out:
|
|
mutex_unlock(&watchdog_proc_mutex);
|
|
put_online_cpus();
|
|
return err;
|
|
}
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
void __init lockup_detector_init(void)
|
|
{
|
|
set_sample_period();
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
if (tick_nohz_full_enabled()) {
|
|
pr_info("Disabling watchdog on nohz_full cores by default\n");
|
|
cpumask_copy(&watchdog_cpumask, housekeeping_mask);
|
|
} else
|
|
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
|
|
#else
|
|
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
|
|
#endif
|
|
|
|
if (watchdog_enabled)
|
|
watchdog_enable_all_cpus();
|
|
}
|