android_kernel_oneplus_msm8998/kernel/watchdog.c
Blagovest Kolenichev ab2a5c9e22 Merge branch 'android-4.4@8e53f7c' into branch 'msm-4.4'
This merge is done with '-s ours' strategy, because same changes
was already arrived through merged linaro's LSK tags. The reason
for these duplicates is that from some point in 2016 Linaro
changed policy of taking changes from android-4.4 via merge with
cherry-pick instead. The only discarded QC change is:

  ffff87b kernel/watchdog.c: fix compilation warning on Kernel 4.4

* refs/heads/tmp-8e53f7c
  sched: tune: Fix lacking spinlock initialization
  UPSTREAM: trace: Update documentation for mono, mono_raw and boot clock
  UPSTREAM: trace: Add an option for boot clock as trace clock
  UPSTREAM: timekeeping: Add a fast and NMI safe boot clock
  ANDROID: goldfish_pipe: fix allmodconfig build
  ANDROID: goldfish: goldfish_pipe: fix locking errors
  ANDROID: video: goldfishfb: fix platform_no_drv_owner.cocci warnings
  ANDROID: goldfish_pipe: fix call_kern.cocci warnings
  arm64: rename ranchu defconfig to ranchu64
  ANDROID: arch: x86: disable pic for Android toolchain
  ANDROID: goldfish_pipe: An implementation of more parallel pipe
  ANDROID: goldfish_pipe: bugfixes and performance improvements.
  ANDROID: goldfish: Add goldfish sync driver
  ANDROID: goldfish: add ranchu defconfigs
  ANDROID: goldfish_audio: Clear audio read buffer status after each read
  ANDROID: goldfish_events: no extra EV_SYN; register goldfish
  ANDROID: goldfish_fb: Set pixclock = 0
  ANDROID: goldfish: Enable ACPI-based enumeration for goldfish audio
  ANDROID: goldfish: Enable ACPI-based enumeration for goldfish framebuffer
  ANDROID: video: goldfishfb: add devicetree bindings
  BACKPORT: staging: goldfish: audio: fix compiliation on arm
  BACKPORT: Input: goldfish_events - enable ACPI-based enumeration for goldfish events
  BACKPORT: goldfish: Enable ACPI-based enumeration for goldfish battery
  BACKPORT: drivers: tty: goldfish: Add device tree bindings
  BACKPORT: tty: goldfish: support platform_device with id -1
  BACKPORT: Input: goldfish_events - add devicetree bindings
  BACKPORT: power: goldfish_battery: add devicetree bindings
  BACKPORT: staging: goldfish: audio: add devicetree bindings
  ANDROID: usb: gadget: function: cleanup: Add blank line after declaration
  cpufreq: sched: Fix kernel crash on accessing sysfs file
  UPSTREAM: ring-buffer: Prevent overflow of size in ring_buffer_resize()
  usb: gadget: f_mtp: simplify ptp NULL pointer check
  ANDROID: video: adf: Avoid directly referencing user pointers
  ANDROID: usb: gadget: audio_source: fix comparison of distinct pointer types
  android: binder: support for file-descriptor arrays.
  android: binder: support for scatter-gather.
  android: binder: add extra size to allocator.
  android: binder: refactor binder_transact()
  android: binder: support multiple /dev instances.
  android: binder: deal with contexts in debugfs.
  android: binder: support multiple context managers.
  android: binder: split flat_binder_object.
  disable aio support in recommended configuration
  [RFC]cgroup: Change from CAP_SYS_NICE to CAP_SYS_RESOURCE for cgroup migration permissions
  UPSTREAM: cpu/hotplug: Handle unbalanced hotplug enable/disable
  UPSTREAM: arm64: kaslr: fix breakage with CONFIG_MODVERSIONS=y
  UPSTREAM: arm64: kaslr: keep modules close to the kernel when DYNAMIC_FTRACE=y
  cgroup: Remove leftover instances of allow_attach
  BACKPORT: lib: harden strncpy_from_user
  CHROMIUM: cgroups: relax permissions on moving tasks between cgroups
  CHROMIUM: remove Android's cgroup generic permissions checks
  UPSTREAM: arm64: relocatable: deal with physically misaligned kernel images
  UPSTREAM: arm64: account for sparsemem section alignment when choosing vmemmap offset
  UPSTREAM: percpu: fix synchronization between synchronous map extension and chunk destruction
  UPSTREAM: percpu: fix synchronization between chunk->map_extend_work and chunk destruction
  ANDROID: binder: Clear binder and cookie when setting handle in flat binder struct
  ANDROID: binder: Add strong ref checks
  UPSTREAM: staging/android/ion : fix a race condition in the ion driver
  ANDROID: android-base: CONFIG_HARDENED_USERCOPY=y
  UPSTREAM: fs/proc/kcore.c: Add bounce buffer for ktext data
  UPSTREAM: fs/proc/kcore.c: Make bounce buffer global for read
  BACKPORT: arm64: Correctly bounds check virt_addr_valid
  BACKPORT: arm64: mm: Mark .rodata as RO
  Fix a build breakage in IO latency hist code.
  UPSTREAM: efi: include asm/early_ioremap.h not asm/efi.h to get early_memremap
  UPSTREAM: ia64: split off early_ioremap() declarations into asm/early_ioremap.h
  FROMLIST: arm64: Enable CONFIG_ARM64_SW_TTBR0_PAN
  FROMLIST: arm64: xen: Enable user access before a privcmd hvc call
  FROMLIST: arm64: Handle faults caused by inadvertent user access with PAN enabled
  FROMLIST: arm64: Disable TTBR0_EL1 during normal kernel execution
  FROMLIST: arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1
  FROMLIST: arm64: Factor out TTBR0_EL1 post-update workaround into a specific asm macro
  FROMLIST: arm64: Factor out PAN enabling/disabling into separate uaccess_* macros
  UPSTREAM: arm64: Handle el1 synchronous instruction aborts cleanly
  BACKPORT: arm64: kernel: Save and restore UAO and addr_limit on exception entry
  UPSTREAM: arm64: include alternative handling in dcache_by_line_op
  UPSTREAM: arm64: fix "dc cvau" cache operation on errata-affected core
  UPSTREAM: Revert "arm64: alternatives: add enable parameter to conditional asm macros"
  UPSTREAM: arm64: Add new asm macro copy_page
  UPSTREAM: arm64: kill ESR_LNX_EXEC
  UPSTREAM: arm64: add macro to extract ESR_ELx.EC
  UPSTREAM: arm64: mm: mark fault_info table const
  UPSTREAM: arm64: fix dump_instr when PAN and UAO are in use
  BACKPORT: arm64: Fold proc-macros.S into assembler.h
  UPSTREAM: arm64: introduce mov_q macro to move a constant into a 64-bit register
  UPSTREAM: arm64: Implement ptep_set_access_flags() for hardware AF/DBM
  UPSTREAM: arm64: choose memstart_addr based on minimum sparsemem section alignment
  UPSTREAM: arm64/mm: ensure memstart_addr remains sufficiently aligned
  UPSTREAM: arm64/kernel: fix incorrect EL0 check in inv_entry macro
  UPSTREAM: arm64: Add workaround for Cavium erratum 27456
  UPSTREAM: arm64: Add macros to read/write system registers
  UPSTREAM: arm64/efi: refactor EFI init and runtime code for reuse by 32-bit ARM
  UPSTREAM: arm64/efi: split off EFI init and runtime code for reuse by 32-bit ARM
  UPSTREAM: arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP
  BACKPORT: arm64: only consider memblocks with NOMAP cleared for linear mapping
  UPSTREAM: mm/memblock: add MEMBLOCK_NOMAP attribute to memblock memory table
  ANDROID: dm: android-verity: Remove fec_header location constraint
  BACKPORT: audit: consistently record PIDs with task_tgid_nr()
  android-base.cfg: Enable kernel ASLR
  UPSTREAM: vmlinux.lds.h: allow arch specific handling of ro_after_init data section
  UPSTREAM: ARM/vdso: Mark the vDSO code read-only after init
  UPSTREAM: x86/vdso: Mark the vDSO code read-only after init
  UPSTREAM: lkdtm: Verify that '__ro_after_init' works correctly
  UPSTREAM: arch: Introduce post-init read-only memory
  UPSTREAM: x86/mm: Always enable CONFIG_DEBUG_RODATA and remove the Kconfig option
  UPSTREAM: mm/init: Add 'rodata=off' boot cmdline parameter to disable read-only kernel mappings
  UPSTREAM: asm-generic: Consolidate mark_rodata_ro()
  UPSTREAM: arm64: spinlock: fix spin_unlock_wait for LSE atomics
  UPSTREAM: arm64: avoid TLB conflict with CONFIG_RANDOMIZE_BASE
  UPSTREAM: arm64: Only select ARM64_MODULE_PLTS if MODULES=y
  UPSTREAM: arm64: kasan: Use actual memory node when populating the kernel image shadow
  UPSTREAM: arm64: lse: deal with clobbered IP registers after branch via PLT
  UPSTREAM: arm64: mm: check at build time that PAGE_OFFSET divides the VA space evenly
  UPSTREAM: arm64: kasan: Fix zero shadow mapping overriding kernel image shadow
  UPSTREAM: arm64: consistently use p?d_set_huge
  UPSTREAM: arm64: fix KASLR boot-time I-cache maintenance
  UPSTREAM: arm64: hugetlb: partial revert of 66b3923a1a0f
  UPSTREAM: arm64: make irq_stack_ptr more robust
  UPSTREAM: arm64: efi: invoke EFI_RNG_PROTOCOL to supply KASLR randomness
  UPSTREAM: efi: stub: use high allocation for converted command line
  UPSTREAM: efi: stub: add implementation of efi_random_alloc()
  BACKPORT: efi: stub: implement efi_get_random_bytes() based on EFI_RNG_PROTOCOL
  BACKPORT: arm64: kaslr: randomize the linear region
  UPSTREAM: arm64: mm: treat memstart_addr as a signed quantity
  UPSTREAM: arm64: vmemmap: use virtual projection of linear region
  BACKPORT: arm64: add support for kernel ASLR
  UPSTREAM: arm64: add support for building vmlinux as a relocatable PIE binary
  UPSTREAM: arm64: switch to relative exception tables
  UPSTREAM: extable: add support for relative extables to search and sort routines
  UPSTREAM: scripts/sortextable: add support for ET_DYN binaries
  UPSTREAM: arm64: futex.h: Add missing PAN toggling
  UPSTREAM: arm64: make asm/elf.h available to asm files
  UPSTREAM: arm64: avoid dynamic relocations in early boot code
  UPSTREAM: arm64: avoid R_AARCH64_ABS64 relocations for Image header fields
  UPSTREAM: arm64: add support for module PLTs
  UPSTREAM: arm64: move brk immediate argument definitions to separate header
  UPSTREAM: arm64: mm: use bit ops rather than arithmetic in pa/va translations
  UPSTREAM: arm64: mm: only perform memstart_addr sanity check if DEBUG_VM
  UPSTREAM: arm64: User die() instead of panic() in do_page_fault()
  UPSTREAM: arm64: allow kernel Image to be loaded anywhere in physical memory
  UPSTREAM: arm64: defer __va translation of initrd_start and initrd_end
  UPSTREAM: arm64: move kernel image to base of vmalloc area
  BACKPORT: arm64: kvm: deal with kernel symbols outside of linear mapping
  UPSTREAM: arm64: decouple early fixmap init from linear mapping
  UPSTREAM: arm64: pgtable: implement static [pte|pmd|pud]_offset variants
  UPSTREAM: arm64: introduce KIMAGE_VADDR as the virtual base of the kernel region
  BACKPORT: arm64: add support for ioremap() block mappings
  BACKPORT: arm64: prevent potential circular header dependencies in asm/bug.h
  UPSTREAM: of/fdt: factor out assignment of initrd_start/initrd_end
  UPSTREAM: of/fdt: make memblock minimum physical address arch configurable
  UPSTREAM: arm64: Remove the get_thread_info() function
  BACKPORT: arm64: kernel: Don't toggle PAN on systems with UAO
  UPSTREAM: arm64: cpufeature: Test 'matches' pointer to find the end of the list
  UPSTREAM: arm64: kernel: Add support for User Access Override
  UPSTREAM: arm64: add ARMv8.2 id_aa64mmfr2 boiler plate
  UPSTREAM: arm64: cpufeature: Change read_cpuid() to use sysreg's mrs_s macro
  UPSTREAM: arm64: use local label prefixes for __reg_num symbols
  UPSTREAM: arm64: vdso: Mark vDSO code as read-only
  UPSTREAM: arm64: ubsan: select ARCH_HAS_UBSAN_SANITIZE_ALL
  UPSTREAM: arm64: ptdump: Indicate whether memory should be faulting
  UPSTREAM: arm64: Add support for ARCH_SUPPORTS_DEBUG_PAGEALLOC
  UPSTREAM: arm64: mm: avoid calling apply_to_page_range on empty range
  UPSTREAM: arm64: Drop alloc function from create_mapping
  UPSTREAM: arm64: prefetch: add missing #include for spin_lock_prefetch
  UPSTREAM: arm64: lib: patch in prfm for copy_page if requested
  UPSTREAM: arm64: lib: improve copy_page to deal with 128 bytes at a time
  UPSTREAM: arm64: prefetch: add alternative pattern for CPUs without a prefetcher
  UPSTREAM: arm64: prefetch: don't provide spin_lock_prefetch with LSE
  UPSTREAM: arm64: allow vmalloc regions to be set with set_memory_*
  BACKPORT: arm64: kernel: implement ACPI parking protocol
  sched: Add Kconfig option DEFAULT_USE_ENERGY_AWARE to set ENERGY_AWARE feature flag
  sched/fair: remove printk while schedule is in progress
  ANDROID: fs: FS tracepoints to track IO.
  sched/walt: Drop arch-specific timer access
  ANDROID: fiq_debugger: Pass task parameter to unwind_frame()
  eas/sched/fair: Fixing comments in find_best_target.
  input: keyreset: switch to orderly_reboot
  UPSTREAM: tun: fix transmit timestamp support
  BACKPORT: arm64: mm: create new fine-grained mappings at boot
  BACKPORT: arm64: ensure _stext and _etext are page-aligned
  UPSTREAM: arm64: mm: allow passing a pgdir to alloc_init_*
  UPSTREAM: arm64: mm: allocate pagetables anywhere
  UPSTREAM: arm64: mm: use fixmap when creating page tables
  UPSTREAM: arm64: mm: add functions to walk tables in fixmap
  UPSTREAM: arm64: mm: add __{pud,pgd}_populate
  UPSTREAM: arm64: mm: avoid redundant __pa(__va(x))
  UPSTREAM: arm64: mm: add functions to walk page tables by PA
  UPSTREAM: arm64: mm: move pte_* macros
  UPSTREAM: arm64: kasan: avoid TLB conflicts
  UPSTREAM: arm64: mm: add code to safely replace TTBR1_EL1
  UPSTREAM: arm64: add function to install the idmap
  UPSTREAM: arm64: unmap idmap earlier
  UPSTREAM: arm64: unify idmap removal
  UPSTREAM: arm64: mm: place empty_zero_page in bss
  UPSTREAM: arm64: mm: specialise pagetable allocators
  UPSTREAM: asm-generic: Fix local variable shadow in __set_fixmap_offset
  BACKPORT: Eliminate the .eh_frame sections from the aarch64 vmlinux and kernel modules
  UPSTREAM: arm64: Fix an enum typo in mm/dump.c
  UPSTREAM: arm64: kasan: ensure that the KASAN zero page is mapped read-only
  UPSTREAM: arch/arm/include/asm/pgtable-3level.h: add pmd_mkclean for THP
  UPSTREAM: arm64: hide __efistub_ aliases from kallsyms
  UPSTREAM: arm64: head.S: use memset to clear BSS
  UPSTREAM: efi: stub: define DISABLE_BRANCH_PROFILING for all architectures
  UPSTREAM: arm64: entry: remove pointless SPSR mode check
  UPSTREAM: arm64: mm: move pgd_cache initialisation to pgtable_cache_init
  UPSTREAM: arm64: traps: address fallout from printk -> pr_* conversion
  UPSTREAM: arm64: ftrace: fix a stack tracer's output under function graph tracer
  UPSTREAM: arm64: pass a task parameter to unwind_frame()
  UPSTREAM: arm64: ftrace: modify a stack frame in a safe way
  UPSTREAM: arm64: remove irq_count and do_softirq_own_stack()
  UPSTREAM: arm64: hugetlb: add support for PTE contiguous bit
  BACKPORT: arm64: Use PoU cache instr for I/D coherency
  BACKPORT: arm64: kernel: fix architected PMU registers unconditional access
  UPSTREAM: arm64: Defer dcache flush in __cpu_copy_user_page
  UPSTREAM: arm64: reduce stack use in irq_handler
  UPSTREAM: arm64: Documentation: add list of software workarounds for errata
  UPSTREAM: arm64: mm: place __cpu_setup in .text
  UPSTREAM: arm64: cmpxchg: Don't incldue linux/mmdebug.h
  UPSTREAM: arm64: mm: fold alternatives into .init
  BACKPORT: arm64: Remove redundant padding from linker script
  UPSTREAM: arm64: mm: remove pointless PAGE_MASKing
  net: inet: diag: expose the socket mark to privileged processes.
  net: diag: make udp_diag_destroy work for mapped addresses.
  net: diag: support SOCK_DESTROY for UDP sockets
  net: diag: allow socket bytecode filters to match socket marks
  net: diag: slightly refactor the inet_diag_bc_audit error checks.
  net: diag: Add support to filter on device index
  UPSTREAM: arm64: don't call C code with el0's fp register
  UPSTREAM: arm64: when walking onto the task stack, check sp & fp are in current->stack
  UPSTREAM: arm64: Add this_cpu_ptr() assembler macro for use in entry.S
  UPSTREAM: arm64: irq: fix walking from irq stack to task stack
  UPSTREAM: arm64: Add do_softirq_own_stack() and enable irq_stacks
  UPSTREAM: arm64: Modify stack trace and dump for use with irq_stack
  UPSTREAM: arm64: Store struct thread_info in sp_el0
  UPSTREAM: arm64: Add trace_hardirqs_off annotation in ret_to_user
  UPSTREAM: arm64: ftrace: fix the comments for ftrace_modify_code
  UPSTREAM: arm64: ftrace: stop using kstop_machine to enable/disable tracing
  UPSTREAM: arm64: spinlock: serialise spin_unlock_wait against concurrent lockers
  UPSTREAM: arm64: enable HAVE_IRQ_TIME_ACCOUNTING
  UPSTREAM: arm64: fix COMPAT_SHMLBA definition for large pages
  UPSTREAM: arm64: add __init/__initdata section marker to some functions/variables
  UPSTREAM: usb: gadget: f_fs: Fix use-after-free
  UPSTREAM: brcmfmac: avoid potential stack overflow in brcmf_cfg80211_start_ap()
  UPSTREAM: arm64: pgtable: implement pte_accessible()
  UPSTREAM: arm64: mm: allow sections for unaligned bases
  UPSTREAM: arm64: mm: detect bad __create_mapping uses
  UPSTREAM: x86: fix SMAP in 32-bit environments
  UPSTREAM: audit: fix a double fetch in audit_log_single_execve_arg()
  UPSTREAM: ARM: 8494/1: mm: Enable PXN when running non-LPAE kernel on LPAE processor
  FIXUP: sched/tune: update accouting before CPU capacity
  FIXUP: sched/tune: add fixes missing from a previous patch
  arm: Fix #if/#ifdef typo in topology.c
  arm: Fix build error "conflicting types for 'scale_cpu_capacity'"
  sched/walt: use do_div instead of division operator
  DEBUG: cpufreq: fix cpu_capacity tracing build for non-smp systems
  UPSTREAM: ALSA: usb-audio: Fix double-free in error paths after snd_usb_add_audio_stream() call
  BACKPORT: ALSA: usb-audio: Minor code cleanup in create_fixed_stream_quirk()
  sched/walt: include missing header for arm_timer_read_counter()
  cpufreq: Kconfig: Fixup incorrect selection by CPU_FREQ_DEFAULT_GOV_SCHED
  FROMLIST: pstore: drop pmsg bounce buffer
  UPSTREAM: usercopy: remove page-spanning test for now
  UPSTREAM: usercopy: force check_object_size() inline
  BACKPORT: usercopy: fold builtin_const check into inline function
  UPSTREAM: x86/uaccess: force copy_*_user() to be inlined
  UPSTREAM: HID: core: prevent out-of-bound readings
  UPSTREAM: block: fix use-after-free in sys_ioprio_get()
  Android: Fix build breakages.
  UPSTREAM: tty: Prevent ldisc drivers from re-using stale tty fields
  UPSTREAM: netfilter: nfnetlink: correctly validate length of batch messages
  cpuset: Make cpusets restore on hotplug
  UPSTREAM: mm/slub: support left redzone
  UPSTREAM: x86: reorganize SMAP handling in user space accesses
  UPSTREAM: Make the hardened user-copy code depend on having a hardened allocator
  Android: MMC/UFS IO Latency Histograms.
  UPSTREAM: usercopy: fix overlap check for kernel text
  UPSTREAM: usercopy: avoid potentially undefined behavior in pointer math
  UPSTREAM: mm: SLUB hardened usercopy support
  UPSTREAM: mm: SLAB hardened usercopy support
  BACKPORT: arm64/uaccess: Enable hardened usercopy
  BACKPORT: ARM: uaccess: Enable hardened usercopy
  BACKPORT: x86/uaccess: Enable hardened usercopy
  BACKPORT: mm: Hardened usercopy
  BACKPORT: mm: Implement stack frame object validation
  UPSTREAM: mm: Add is_migrate_cma_page
  UPSTREAM: unsafe_[get|put]_user: change interface to use a error target label
  BACKPORT: arm64: mm: fix location of _etext
  BACKPORT: ARM: 8583/1: mm: fix location of _etext
  UPSTREAM: Use the new batched user accesses in generic user string handling
  UPSTREAM: Add 'unsafe' user access functions for batched accesses
  BACKPORT: Don't show empty tag stats for unprivileged uids
  UPSTREAM: tcp: fix use after free in tcp_xmit_retransmit_queue()
  ANDROID: base-cfg: drop SECCOMP_FILTER config
  UPSTREAM: proc: prevent accessing /proc/<PID>/environ until it's ready
  UPSTREAM: [media] xc2028: unlock on error in xc2028_set_config()
  UPSTREAM: [media] xc2028: avoid use after free
  UPSTREAM: block: fix use-after-free in seq file
  UPSTREAM: assoc_array: don't call compare_object() on a node
  ANDROID: base-cfg: enable SECCOMP config
  ANDROID: rcu_sync: Export rcu_sync_lockdep_assert
  UPSTREAM: USB: cdc-acm: more sanity checking
  UPSTREAM: USB: iowarrior: fix oops with malicious USB descriptors
  UPSTREAM: USB: usb_driver_claim_interface: add sanity checking
  UPSTREAM: USB: mct_u232: add sanity checking in probe
  UPSTREAM: USB: cypress_m8: add endpoint sanity check
  UPSTREAM: Input: powermate - fix oops with malicious USB descriptors
  BACKPORT: tcp: enable per-socket rate limiting of all 'challenge acks'
  RFC: FROMLIST: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
  RFC: FROMLIST: cgroup: avoid synchronize_sched() in __cgroup_procs_write()
  RFC: FROMLIST: locking/percpu-rwsem: Optimize readers and reduce global impact
  net: ipv6: Fix ping to link-local addresses.
  ipv6: fix endianness error in icmpv6_err
  ANDROID: dm: android-verity: Allow android-verity to be compiled as an independent module
  Revert "Android: MMC/UFS IO Latency Histograms."
  Android: MMC/UFS IO Latency Histograms.
  UPSTREAM: af_unix: Guard against other == sk in unix_dgram_sendmsg
  UPSTREAM: ALSA: timer: Fix race among timer ioctls
  UPSTREAM: tcp: make challenge acks less predictable
  sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats()
  FIXUP: sched: scheduler-driven cpu frequency selection
  UPSTREAM: usb: gadget: configfs: add mutex lock before unregister gadget
  ANDROID: dm-verity: adopt changes made to dm callbacks
  UPSTREAM: ecryptfs: fix handling of directory opening
  UPSTREAM: ecryptfs: don't allow mmap when the lower fs doesn't support it
  UPSTREAM: Revert "ecryptfs: forbid opening files without mmap handler"
  ANDROID: net: core: fix UID-based routing
  ANDROID: net: fib: remove duplicate assignment
  FROMLIST: proc: Fix timerslack_ns CAP_SYS_NICE check when adjusting self
  sched/rt: Add Kconfig option to enable panicking for RT throttling
  sched/rt: print RT tasks when RT throttling is activated
  UPSTREAM: sched: Fix a race between __kthread_bind() and sched_setaffinity()
  sched/fair: Favor higher cpus only for boosted tasks
  vmstat: make vmstat_updater deferrable again and shut down on idle
  sched/fair: call OPP update when going idle after migration
  sched/cpufreq_sched: fix thermal capping events
  sched/fair: Picking cpus with low OPPs for tasks that prefer idle CPUs
  FIXUP: sched/tune: do initialization as a postcore_initicall
  DEBUG: sched: add tracepoint for RD overutilized
  sched/tune: Introducing a new schedtune attribute prefer_idle
  sched: use util instead of capacity to select busy cpu
  arch_timer: add error handling when the MPM global timer is cleared
  FIXUP: sched: Fix double-release of spinlock in move_queued_task
  FIXUP: sched/fair: Fix hang during suspend in sched_group_energy
  FIXUP: sched: fix SchedFreq integration for both PELT and WALT
  sched: EAS: Avoid causing spikes to max-freq unnecessarily
  FIXUP: sched: fix set_cfs_cpu_capacity when WALT is in use
  sched/walt: Accounting for number of irqs pending on each core
  sched: Introduce Window Assisted Load Tracking (WALT)
  sched/tune: fix PB and PC cuts indexes definition
  sched/fair: optimize idle cpu selection for boosted tasks
  FIXUP: sched/tune: fix accounting for runnable tasks
  sched/tune: use a single initialisation function
  sched/{fair,tune}: simplify fair.c code
  FIXUP: sched/tune: fix payoff calculation for boost region
  sched/tune: Add support for negative boost values
  FIX: sched/tune: move schedtune_nornalize_energy into fair.c
  FIX: sched/tune: update usage of boosted task utilisation on CPU selection
  sched/fair: add tunable to set initial task load
  sched/fair: add tunable to force selection at cpu granularity
  sched: EAS: take cstate into account when selecting idle core
  sched/cpufreq_sched: Consolidated update
  FIXUP: sched: fix build for non-SMP target
  DEBUG: sched/tune: add tracepoint on P-E space filtering
  DEBUG: sched/tune: add tracepoint for energy_diff() values
  DEBUG: sched/tune: add tracepoint for task boost signal
  arm: topology: Define TC2 energy and provide it to the scheduler
  CHROMIUM: sched: update the average of nr_running
  ANDROID: dm verity fec: pack the fec_header structure
  ANDROID: dm: android-verity: Verify header before fetching table
  ANDROID: dm: allow adb disable-verity only in userdebug
  ANDROID: dm: mount as linear target if eng build
  ANDROID: dm: use default verity public key
  ANDROID: dm: fix signature verification flag
  ANDROID: dm: use name_to_dev_t
  ANDROID: dm: rename dm-linear methods for dm-android-verity
  ANDROID: dm: Minor cleanup
  ANDROID: dm: Mounting root as linear device when verity disabled
  ANDROID: dm-android-verity: Rebase on top of 4.1
  ANDROID: dm: Add android verity target
  ANDROID: dm: fix dm_substitute_devices()
  ANDROID: dm: Rebase on top of 4.1
  CHROMIUM: dm: boot time specification of dm=
  UPSTREAM: net: Fix use after free in the recvmmsg exit path
  Implement memory_state_time, used by qcom,cpubw
  Revert "panic: Add board ID to panic output"
  usb: gadget: f_accessory: remove duplicate endpoint alloc
  BACKPORT: brcmfmac: defer DPC processing during probe
  FROMLIST: proc: Add LSM hook checks to /proc/<tid>/timerslack_ns
  FROMLIST: proc: Relax /proc/<tid>/timerslack_ns capability requirements
  UPSTREAM: sched: panic on corrupted stack end
  UPSTREAM: ecryptfs: forbid opening files without mmap handler
  UPSTREAM: proc: prevent stacking filesystems on top
  UPSTREAM: ppp: defer netns reference release for ppp channel
  cpuset: Add allow_attach hook for cpusets on android.
  Revert "CHROMIUM: android: binder: Fix potential scheduling-while-atomic"
  CHROMIUM: android: binder: Fix potential scheduling-while-atomic
  UPSTREAM: netfilter: x_tables: make sure e->next_offset covers remaining blob size
  UPSTREAM: netfilter: x_tables: validate e->target_offset early
  UPSTREAM: KEYS: potential uninitialized variable
  UPSTREAM: KEYS: Fix ASN.1 indefinite length object parsing
  UPSTREAM: ppp: take reference on channels netns
  UPSTREAM: netfilter: x_tables: fix unconditional helper
  ANDROID: sdcardfs: fix itnull.cocci warnings
  android-recommended.cfg: enable fstack-protector-strong
  sdcardfs: Truncate packages_gid.list on overflow
  UPSTREAM: cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind
  BACKPORT: ptrace: use fsuid, fsgid, effective creds for fs access checks
  BACKPORT: proc: add /proc/<pid>/timerslack_ns interface
  BACKPORT: timer: convert timer_slack_ns from unsigned long to u64
  netfilter: xt_quota2: make quota2_log work well
  Revert "usb: gadget: prevent change of Host MAC address of 'usb0' interface"
  BACKPORT: PM / sleep: Go direct_complete if driver has no callbacks
  ANDROID: base-cfg: enable UID_CPUTIME
  UPSTREAM: USB: usbfs: fix potential infoleak in devio
  UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_ccallback
  UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt
  UPSTREAM: ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS
  UPSTREAM: net: fix infoleak in rtnetlink
  ANDROID: configs: remove unused configs
  ANDROID: cpu: send KOBJ_ONLINE event when enabling cpus
  UPSTREAM: usbnet: cleanup after bind() in probe()
  ANDROID: dm verity fec: initialize recursion level
  ANDROID: dm verity fec: fix RS block calculation
  ANDROID: dm verity fec: add missing release from fec_ktype
  ANDROID: dm verity fec: limit error correction recursion
  ANDROID: restrict access to perf events
  FROMLIST: security,perf: Allow further restriction of perf_event_open
  BACKPORT: perf tools: Document the perf sysctls
  UPSTREAM: arm64: module: avoid undefined shift behavior in reloc_data()
  UPSTREAM: arm64: module: fix relocation of movz instruction with negative immediate
  Revert "armv6 dcc tty driver"
  Revert "arm: dcc_tty: fix armv6 dcc tty build failure"
  ARM64: Ignore Image-dtb from git point of view
  arm64: add option to build Image-dtb
  ANDROID: usb: gadget: f_midi: set fi->f to NULL when free f_midi function
  UPSTREAM: mac80211: fix "warning: ‘target_metric’ may be used uninitialized"
  UPSTREAM: tty: Fix unsafe ldisc reference via ioctl(TIOCGETD)
  Revert "drivers: power: use 'current' instead of 'get_current()'"
  cpufreq: interactive: drop cpufreq_{get,put}_global_kobject func calls
  Revert "cpufreq: interactive: build fixes for 4.4"
  xt_qtaguid: Fix panic caused by processing non-full socket.
  fiq_debugger: Add fiq_debugger.disable option
  UPSTREAM: procfs: fixes pthread cross-thread naming if !PR_DUMPABLE
  FROMLIST: wlcore: Disable filtering in AP role
  Revert "drivers: power: Add watchdog timer to catch drivers which lockup during suspend."
  DEBUG: schedtune: add tracepoint for schedtune_tasks_update() values
  DEBUG: schedtune: add tracepoint for CPU boost signal
  DEBUG: schedtune: add tracepoint for SchedTune configuration update
  DEBUG: sched: add energy procfs interface
  DEBUG: sched,cpufreq: add cpu_capacity change tracepoint
  DEBUG: sched: add tracepoint for CPU load/util signals
  DEBUG: sched: add tracepoint for task load/util signals
  DEBUG: sched: add tracepoint for cpu/freq scale invariance
  sched/fair: filter energy_diff() based on energy_payoff value
  sched/tune: add support to compute normalized energy
  sched/fair: keep track of energy/capacity variations
  sched/fair: add boosted task utilization
  sched/{fair,tune}: track RUNNABLE tasks impact on per CPU boost value
  sched/tune: compute and keep track of per CPU boost value
  sched/tune: add initial support for CGroups based boosting
  sched/fair: add boosted CPU usage
  sched/fair: add function to convert boost value into "margin"
  sched/tune: add sysctl interface to define a boost value
  sched/tune: add detailed documentation
  fixup! sched/fair: jump to max OPP when crossing UP threshold
  fixup! sched: scheduler-driven cpu frequency selection
  sched: rt scheduler sets capacity requirement
  sched: deadline: use deadline bandwidth in scale_rt_capacity
  sched: remove call of sched_avg_update from sched_rt_avg_update
  sched/cpufreq_sched: add trace events
  sched/fair: jump to max OPP when crossing UP threshold
  sched/fair: cpufreq_sched triggers for load balancing
  sched/{core,fair}: trigger OPP change request on fork()
  sched/fair: add triggers for OPP change requests
  sched: scheduler-driven cpu frequency selection
  cpufreq: introduce cpufreq_driver_is_slow
  sched: Consider misfit tasks when load-balancing
  sched: Add group_misfit_task load-balance type
  sched: Add per-cpu max capacity to sched_group_capacity
  sched: Do eas idle balance regardless of the rq avg idle value
  arm64: Enable max freq invariant scheduler load-tracking and capacity support
  arm: Enable max freq invariant scheduler load-tracking and capacity support
  sched: Update max cpu capacity in case of max frequency constraints
  cpufreq: Max freq invariant scheduler load-tracking and cpu capacity support
  arm64, topology: Updates to use DT bindings for EAS costing data
  sched: Support for extracting EAS energy costs from DT
  Documentation: DT bindings for energy model cost data required by EAS
  sched: Disable energy-unfriendly nohz kicks
  sched: Consider a not over-utilized energy-aware system as balanced
  sched: Energy-aware wake-up task placement
  sched: Determine the current sched_group idle-state
  sched, cpuidle: Track cpuidle state index in the scheduler
  sched: Add over-utilization/tipping point indicator
  sched: Estimate energy impact of scheduling decisions
  sched: Extend sched_group_energy to test load-balancing decisions
  sched: Calculate energy consumption of sched_group
  sched: Highest energy aware balancing sched_domain level pointer
  sched: Relocated cpu_util() and change return type
  sched: Compute cpu capacity available at current frequency
  arm64: Cpu invariant scheduler load-tracking and capacity support
  arm: Cpu invariant scheduler load-tracking and capacity support
  sched: Introduce SD_SHARE_CAP_STATES sched_domain flag
  sched: Initialize energy data structures
  sched: Introduce energy data structures
  sched: Make energy awareness a sched feature
  sched: Documentation for scheduler energy cost model
  sched: Prevent unnecessary active balance of single task in sched group
  sched: Enable idle balance to pull single task towards cpu with higher capacity
  sched: Consider spare cpu capacity at task wake-up
  sched: Add cpu capacity awareness to wakeup balancing
  sched: Store system-wide maximum cpu capacity in root domain
  arm: Update arch_scale_cpu_capacity() to reflect change to define
  arm64: Enable frequency invariant scheduler load-tracking support
  arm: Enable frequency invariant scheduler load-tracking support
  cpufreq: Frequency invariant scheduler load-tracking support
  sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task()
  fiq_debugger: Add option to apply uart overlay by FIQ_DEBUGGER_UART_OVERLAY
  Revert "Recreate asm/mach/mmc.h include file"
  Revert "ARM: Add 'card_present' state to mmc_platfrom_data"
  usb: dual-role: make stub functions inline
  Revert "mmc: Add status IRQ and status callback function to mmc platform data"
  quick selinux support for tracefs
  Revert "hid-multitouch: Filter collections by application usage."
  Revert "HID: steelseries: validate output report details"
  xt_qtaguid: Fix panic caused by synack processing
  Revert "mm: vmscan: Add a debug file for shrinkers"
  Revert "SELinux: Enable setting security contexts on rootfs inodes."
  Revert "SELinux: build fix for 4.1"
  fuse: Add support for d_canonical_path
  vfs: change d_canonical_path to take two paths
  android: recommended.cfg: remove CONFIG_UID_STAT
  netfilter: xt_qtaguid: seq_printf fixes
  Revert "misc: uidstat: Adding uid stat driver to collect network statistics."
  Revert "net: activity_stats: Add statistics for network transmission activity"
  Revert "net: activity_stats: Stop using obsolete create_proc_read_entry api"
  Revert "misc: uidstat: avoid create_stat() race and blockage."
  Revert "misc: uidstat: Remove use of obsolete create_proc_read_entry api"
  Revert "misc seq_printf fixes for 4.4"
  Revert "misc: uid_stat: Include linux/atomic.h instead of asm/atomic.h"
  Revert "net: socket ioctl to reset connections matching local address"
  Revert "net: fix iterating over hashtable in tcp_nuke_addr()"
  Revert "net: fix crash in tcp_nuke_addr()"
  Revert "Don't kill IPv4 sockets when killing IPv6 sockets was requested."
  Revert "tcp: Fix IPV6 module build errors"
  android: base-cfg: remove CONFIG_SWITCH
  Revert "switch: switch class and GPIO drivers."
  Revert "drivers: switch: remove S_IWUSR from dev_attr"
  ANDROID: base-cfg: enable CONFIG_IP_NF_NAT
  BACKPORT: selinux: restrict kernel module loading
  android: base-cfg: enable CONFIG_QUOTA
  ANDROID: mmc: Add CONFIG_MMC_SIMULATE_MAX_SPEED
  android: base-cfg: Add CONFIG_INET_DIAG_DESTROY
  cpufreq: interactive: only apply interactive boost when enabled
  cpufreq: interactive: fix policy locking
  ANDROID: dm verity fec: add sysfs attribute fec/corrected
  ANDROID: android: base-cfg: enable CONFIG_DM_VERITY_FEC
  UPSTREAM: dm verity: add ignore_zero_blocks feature
  UPSTREAM: dm verity: add support for forward error correction
  UPSTREAM: dm verity: factor out verity_for_bv_block()
  UPSTREAM: dm verity: factor out structures and functions useful to separate object
  UPSTREAM: dm verity: move dm-verity.c to dm-verity-target.c
  UPSTREAM: dm verity: separate function for parsing opt args
  UPSTREAM: dm verity: clean up duplicate hashing code
  UPSTREAM: dm: don't save and restore bi_private
  mm: Export do_munmap
  sdcardfs: remove unneeded __init and __exit
  sdcardfs: Remove unused code
  fs: Export d_absolute_path
  sdcardfs: remove effectless config option
  inotify: Fix erroneous update of bit count
  fs: sdcardfs: Declare LOOKUP_CASE_INSENSITIVE unconditionally
  trace: cpufreq: fix typo in min/max cpufreq
  sdcardfs: Add support for d_canonical_path
  vfs: add d_canonical_path for stacked filesystem support
  sdcardfs: Bring up to date with Android M permissions:
  Changed type-casting in packagelist management
  Port of sdcardfs to 4.4
  Included sdcardfs source code for kernel 3.0
  ANDROID: usb: gadget: Add support for MTP OS desc
  CHROMIUM: usb: gadget: f_accessory: add .raw_request callback
  CHROMIUM: usb: gadget: audio_source: add .free_func callback
  CHROMIUM: usb: gadget: f_mtp: fix usb_ss_ep_comp_descriptor
  CHROMIUM: usb: gadget: f_mtp: Add SuperSpeed support
  FROMLIST: mmc: block: fix ABI regression of mmc_blk_ioctl
  FROMLIST: mm: ASLR: use get_random_long()
  FROMLIST: drivers: char: random: add get_random_long()
  FROMLIST: pstore-ram: fix NULL reference when used with pdata
  usb: u_ether: Add missing rx_work init
  ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue
  Revert "ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue"
  UPSTREAM: ALSA: timer: Harden slave timer list handling
  ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue
  misc: uid_stat: Include linux/atomic.h instead of asm/atomic.h
  hid-sensor-hub.c: fix wrong do_div() usage
  power: Provide dummy log_suspend_abort_reason() if SUSPEND is disabled
  UPSTREAM: ARM: 8457/1: psci-smp is built only for SMP
  PM / suspend: Add dependency on RTC_LIB
  drivers: power: use 'current' instead of 'get_current()'
  video: adf: Set ADF_MEMBLOCK to boolean
  video: adf: Fix modular build
  net: ppp: Fix modular build for PPPOLAC and PPPOPNS
  net: pppolac/pppopns: Replace msg.msg_iov with iov_iter_kvec()
  ANDROID: mmc: sdio: Disable retuning in sdio_reset_comm()
  ANDROID: mmc: Move tracepoint creation and export symbols
  ANDROID: kernel/watchdog: fix unused variable warning
  ANDROID: usb: gadget: f_mtp: don't use le16 for u8 field
  ANDROID: lowmemorykiller: fix declaration order warnings
  ANDROID: net: fix 'const' warnings

Change-Id: I5765d6d5577c697c4a8990adb44cfac152f3f787
Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
2017-04-10 16:46:42 -07:00

1222 lines
34 KiB
C

/*
* Detect hard and soft lockups on a system
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
* Note: Most of this code is borrowed heavily from the original softlockup
* detector, so thanks to Ingo for the initial implementation.
* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
#define pr_fmt(fmt) "NMI watchdog: " fmt
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
#include <linux/tick.h>
#include <linux/workqueue.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#include <linux/perf_event.h>
#include <linux/kthread.h>
#include <soc/qcom/watchdog.h>
/*
* The run state of the lockup detectors is controlled by the content of the
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
*
* 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
* are variables that are only used as an 'interface' between the parameters
* in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
* 'watchdog_thresh' variable is handled differently because its value is not
* boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
* is equal zero.
*/
#define NMI_WATCHDOG_ENABLED_BIT 0
#define SOFT_WATCHDOG_ENABLED_BIT 1
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
static DEFINE_MUTEX(watchdog_proc_mutex);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
int __read_mostly nmi_watchdog_enabled;
int __read_mostly soft_watchdog_enabled;
int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10;
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
static struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
/* Helper for online, unparked cpus. */
#define for_each_watchdog_cpu(cpu) \
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
/*
* The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are
* unregistered/stopped, so it is an indicator whether the threads exist.
*/
static int __read_mostly watchdog_running;
/*
* If a subsystem has a need to deactivate the watchdog temporarily, it
* can use the suspend/resume interface to achieve this. The content of
* the 'watchdog_suspended' variable reflects this state. Existing threads
* are parked/unparked by the lockup_detector_{suspend|resume} functions
* (see comment blocks pertaining to those functions for further details).
*
* 'watchdog_suspended' also prevents threads from being registered/started
* or unregistered/stopped via parameters in /proc/sys/kernel, so the state
* of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers).
*/
static int __read_mostly watchdog_suspended;
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(unsigned int, watchdog_en);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
#endif
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
static cpumask_t __read_mostly watchdog_cpus;
#endif
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
static unsigned long soft_lockup_nmi_warn;
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
#ifdef CONFIG_HARDLOCKUP_DETECTOR
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long __maybe_unused hardlockup_allcpu_dumped;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void hardlockup_detector_disable(void)
{
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
watchdog_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
static int __init nosoftlockup_setup(char *str)
{
watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
#ifdef CONFIG_SMP
static int __init softlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_softlockup_all_cpu_backtrace =
!!simple_strtol(str, NULL, 0);
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_hardlockup_all_cpu_backtrace =
!!simple_strtol(str, NULL, 0);
return 1;
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
* want a higher threshold for soft lockups than for hard lockups. So we couple
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
static int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
static unsigned long get_timestamp(void)
{
return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
}
static void set_sample_period(void)
{
/*
* convert watchdog_thresh from seconds to ns
* the divide by 5 is to give hrtimer several chances (two
* or three with the current relation between the soft
* and hard thresholds) to increment before the
* hardlockup detector generates a warning
*/
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
}
/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
/**
* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
*
* Call when the scheduler may have stalled for legitimate reasons
* preventing the watchdog task from executing - e.g. the scheduler
* entering idle state. This should only be used for scheduler events.
* Use touch_softlockup_watchdog() for everything else.
*/
void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
* gets zeroed here, so use the raw_ operation.
*/
raw_cpu_write(watchdog_touch_ts, 0);
}
void touch_softlockup_watchdog(void)
{
touch_softlockup_watchdog_sched();
wq_watchdog_touch(raw_smp_processor_id());
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
{
int cpu;
/*
* this is done lockless
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
wq_watchdog_touch(-1);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
* preemption enabled. If preemption is enabled
* then interrupts should be enabled too, in which
* case we shouldn't have to worry about the watchdog
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
#endif
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/* watchdog detector functions */
static bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
#endif
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
static unsigned int watchdog_next_cpu(unsigned int cpu)
{
cpumask_t cpus = watchdog_cpus;
unsigned int next_cpu;
next_cpu = cpumask_next(cpu, &cpus);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(&cpus);
if (next_cpu == cpu)
return nr_cpu_ids;
return next_cpu;
}
static int is_hardlockup_other_cpu(unsigned int cpu)
{
unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
return 1;
per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
return 0;
}
static void watchdog_check_hardlockup_other_cpu(void)
{
unsigned int next_cpu;
/*
* Test for hardlockups every 3 samples. The sample period is
* watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
* watchdog_thresh (over by 20%).
*/
if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
return;
/* check for a hardlockup on the next cpu */
next_cpu = watchdog_next_cpu(smp_processor_id());
if (next_cpu >= nr_cpu_ids)
return;
smp_rmb();
if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
per_cpu(watchdog_nmi_touch, next_cpu) = false;
return;
}
if (is_hardlockup_other_cpu(next_cpu)) {
/* only warn once */
if (per_cpu(hard_watchdog_warn, next_cpu) == true)
return;
if (hardlockup_panic) {
pr_err("Watchdog detected hard LOCKUP on cpu %u",
next_cpu);
msm_trigger_wdog_bite();
}
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
per_cpu(hard_watchdog_warn, next_cpu) = true;
} else {
per_cpu(hard_watchdog_warn, next_cpu) = false;
}
}
#else
static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
#endif
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
/* Warn about unreasonable delays. */
if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
}
return 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
}
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
if (hardlockup_panic)
msm_trigger_wdog_bite();
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
/*
* Perform all-CPU dump only once to avoid multiple hardlockups
* generating interleaving traces
*/
if (sysctl_hardlockup_all_cpu_backtrace &&
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
panic("Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
}
__this_cpu_write(hard_watchdog_warn, false);
return;
}
#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
static int watchdog_nmi_enable(unsigned int cpu);
static void watchdog_nmi_disable(unsigned int cpu);
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* test for hardlockups on the next cpu */
watchdog_check_hardlockup_other_cpu();
/* kick the softlockup detector */
wake_up_process(__this_cpu_read(softlockup_watchdog));
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == 0) {
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true) {
/*
* When multiple processes are causing softlockups the
* softlockup detector only warns on the first one
* because the code relies on a full quiet cycle to
* re-arm. The second process prevents the quiet cycle
* and never gets reported. Use task pointers to detect
* this.
*/
if (__this_cpu_read(softlockup_task_ptr_saved) !=
current) {
__this_cpu_write(soft_watchdog_warn, false);
__touch_watchdog();
}
return HRTIMER_RESTART;
}
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
if (softlockup_panic)
msm_trigger_wdog_bite();
__this_cpu_write(softlockup_task_ptr_saved, current);
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
{
struct sched_param param = { .sched_priority = prio };
sched_setscheduler(current, policy, &param);
}
void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
unsigned int *enabled = raw_cpu_ptr(&watchdog_en);
if (*enabled)
return;
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
/* initialize timestamp */
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
__touch_watchdog();
/*
* Need to ensure above operations are observed by other CPUs before
* indicating that timer is enabled. This is to synchronize core
* isolation and hotplug. Core isolation will wait for this flag to be
* set.
*/
mb();
*enabled = 1;
}
void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
unsigned int *enabled = raw_cpu_ptr(&watchdog_en);
if (!*enabled)
return;
watchdog_set_prio(SCHED_NORMAL, 0);
hrtimer_cancel(hrtimer);
/* disable the perf event */
watchdog_nmi_disable(cpu);
/*
* No need for barrier here since disabling the watchdog is
* synchronized with hotplug lock
*/
*enabled = 0;
}
bool watchdog_configured(unsigned int cpu)
{
return *per_cpu_ptr(&watchdog_en, cpu);
}
static void watchdog_cleanup(unsigned int cpu, bool online)
{
watchdog_disable(cpu);
}
static int watchdog_should_run(unsigned int cpu)
{
return __this_cpu_read(hrtimer_interrupts) !=
__this_cpu_read(soft_lockup_hrtimer_cnt);
}
/*
* The watchdog thread function - touches the timestamp.
*
* It only runs once every sample_period seconds (4 seconds by
* default) to reset the softlockup timestamp. If this gets delayed
* for more than 2*watchdog_thresh seconds then the debug-printout
* triggers in watchdog_timer_fn().
*/
static void watchdog(unsigned int cpu)
{
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
/*
* watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
* failure path. Check for failures that can occur asynchronously -
* for example, when CPUs are on-lined - and shut down the hardware
* perf event on each CPU accordingly.
*
* The only non-obvious place this bit can be cleared is through
* watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
* pr_info here would be too noisy as it would result in a message
* every few seconds if the hardlockup was disabled but the softlockup
* enabled.
*/
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
watchdog_nmi_disable(cpu);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
static unsigned long cpu0_err;
static int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
goto out;
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
/* save cpu0 error for future comparision */
if (cpu == 0 && IS_ERR(event))
cpu0_err = PTR_ERR(event);
if (!IS_ERR(event)) {
/* only print for cpu0 or different than cpu0 */
if (cpu == 0 || cpu0_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
/*
* Disable the hard lockup detector if _any_ CPU fails to set up
* set up the hardware perf event. The watchdog() function checks
* the NMI_WATCHDOG_ENABLED bit periodically.
*
* The barriers are for syncing up watchdog_enabled across all the
* cpus, as clear_bit() does not use barriers.
*/
smp_mb__before_atomic();
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
smp_mb__after_atomic();
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
pr_info("Shutting down hard lockup detector on all cpus\n");
return PTR_ERR(event);
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
}
static void watchdog_nmi_disable(unsigned int cpu)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event) {
perf_event_disable(event);
per_cpu(watchdog_ev, cpu) = NULL;
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
}
if (cpu == 0) {
/* watchdog_nmi_enable() expects this to be zero initially. */
cpu0_err = 0;
}
}
#else
#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
static int watchdog_nmi_enable(unsigned int cpu)
{
/*
* The new cpu will be marked online before the first hrtimer interrupt
* runs on it. If another cpu tests for a hardlockup on the new cpu
* before it has run its first hrtimer, it will get a false positive.
* Touch the watchdog on the new cpu to delay the first check for at
* least 3 sampling periods to guarantee one hrtimer has run on the new
* cpu.
*/
per_cpu(watchdog_nmi_touch, cpu) = true;
smp_wmb();
cpumask_set_cpu(cpu, &watchdog_cpus);
return 0;
}
static void watchdog_nmi_disable(unsigned int cpu)
{
unsigned int next_cpu = watchdog_next_cpu(cpu);
/*
* Offlining this cpu will cause the cpu before this one to start
* checking the one after this one. If this cpu just finished checking
* the next cpu and updating hrtimer_interrupts_saved, and then the
* previous cpu checks it within one sample period, it will trigger a
* false positive. Touch the watchdog on the next cpu to prevent it.
*/
if (next_cpu < nr_cpu_ids)
per_cpu(watchdog_nmi_touch, next_cpu) = true;
smp_wmb();
cpumask_clear_cpu(cpu, &watchdog_cpus);
}
#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,
.thread_should_run = watchdog_should_run,
.thread_fn = watchdog,
.thread_comm = "watchdog/%u",
.setup = watchdog_enable,
.cleanup = watchdog_cleanup,
.park = watchdog_disable,
.unpark = watchdog_enable,
};
/*
* park all watchdog threads that are specified in 'watchdog_cpumask'
*
* This function returns an error if kthread_park() of a watchdog thread
* fails. In this situation, the watchdog threads of some CPUs can already
* be parked and the watchdog threads of other CPUs can still be runnable.
* Callers are expected to handle this special condition as appropriate in
* their context.
*
* This function may only be called in a context that is protected against
* races with CPU hotplug - for example, via get_online_cpus().
*/
static int watchdog_park_threads(void)
{
int cpu, ret = 0;
for_each_watchdog_cpu(cpu) {
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
if (ret)
break;
}
return ret;
}
/*
* unpark all watchdog threads that are specified in 'watchdog_cpumask'
*
* This function may only be called in a context that is protected against
* races with CPU hotplug - for example, via get_online_cpus().
*/
static void watchdog_unpark_threads(void)
{
int cpu;
for_each_watchdog_cpu(cpu)
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
}
/*
* Suspend the hard and soft lockup detector by parking the watchdog threads.
*/
int lockup_detector_suspend(void)
{
int ret = 0;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
/*
* Multiple suspend requests can be active in parallel (counted by
* the 'watchdog_suspended' variable). If the watchdog threads are
* running, the first caller takes care that they will be parked.
* The state of 'watchdog_running' cannot change while a suspend
* request is active (see related code in 'proc' handlers).
*/
if (watchdog_running && !watchdog_suspended)
ret = watchdog_park_threads();
if (ret == 0)
watchdog_suspended++;
else {
watchdog_disable_all_cpus();
pr_err("Failed to suspend lockup detectors, disabled\n");
watchdog_enabled = 0;
}
mutex_unlock(&watchdog_proc_mutex);
return ret;
}
/*
* Resume the hard and soft lockup detector by unparking the watchdog threads.
*/
void lockup_detector_resume(void)
{
mutex_lock(&watchdog_proc_mutex);
watchdog_suspended--;
/*
* The watchdog threads are unparked if they were previously running
* and if there is no more active suspend request.
*/
if (watchdog_running && !watchdog_suspended)
watchdog_unpark_threads();
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
}
static int update_watchdog_all_cpus(void)
{
int ret;
ret = watchdog_park_threads();
if (ret)
return ret;
watchdog_unpark_threads();
return 0;
}
static int watchdog_enable_all_cpus(void)
{
int err = 0;
if (!watchdog_running) {
err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
&watchdog_cpumask);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
else
watchdog_running = 1;
} else {
/*
* Enable/disable the lockup detectors or
* change the sample period 'on the fly'.
*/
err = update_watchdog_all_cpus();
if (err) {
watchdog_disable_all_cpus();
pr_err("Failed to update lockup detectors, disabled\n");
}
}
if (err)
watchdog_enabled = 0;
return err;
}
static void watchdog_disable_all_cpus(void)
{
if (watchdog_running) {
watchdog_running = 0;
smpboot_unregister_percpu_thread(&watchdog_threads);
}
}
#ifdef CONFIG_SYSCTL
/*
* Update the run state of the lockup detectors.
*/
static int proc_watchdog_update(void)
{
int err = 0;
/*
* Watchdog threads won't be started if they are already active.
* The 'watchdog_running' variable in watchdog_*_all_cpus() takes
* care of this. If those threads are already active, the sample
* period will be updated and the lockup detectors will be enabled
* or disabled 'on the fly'.
*/
if (watchdog_enabled && watchdog_thresh)
err = watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
return err;
}
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*
* caller | table->data points to | 'which' contains the flag(s)
* -------------------|-----------------------|-----------------------------
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
* | | with SOFT_WATCHDOG_ENABLED
* -------------------|-----------------------|-----------------------------
* proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED
* -------------------|-----------------------|-----------------------------
* proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
*/
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int err, old, new;
int *watchdog_param = (int *)table->data;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) {
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
/*
* If the parameter is being read return the state of the corresponding
* bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
* run state of the lockup detectors.
*/
if (!write) {
*watchdog_param = (watchdog_enabled & which) != 0;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
} else {
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err)
goto out;
/*
* There is a race window between fetching the current value
* from 'watchdog_enabled' and storing the new value. During
* this race window, watchdog_nmi_enable() can sneak in and
* clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
* The 'cmpxchg' detects this race and the loop retries.
*/
do {
old = watchdog_enabled;
/*
* If the parameter value is not zero set the
* corresponding bit(s), else clear it(them).
*/
if (*watchdog_param)
new = old | which;
else
new = old & ~which;
} while (cmpxchg(&watchdog_enabled, old, new) != old);
/*
* Update the run state of the lockup detectors. There is _no_
* need to check the value returned by proc_watchdog_update()
* and to restore the previous value of 'watchdog_enabled' as
* both lockup detectors are disabled if proc_watchdog_update()
* returns an error.
*/
if (old == new)
goto out;
err = proc_watchdog_update();
}
out:
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err;
}
/*
* /proc/sys/kernel/watchdog
*/
int proc_watchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/nmi_watchdog
*/
int proc_nmi_watchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/soft_watchdog
*/
int proc_soft_watchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/watchdog_thresh
*/
int proc_watchdog_thresh(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int err, old, new;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) {
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
old = ACCESS_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err || !write)
goto out;
/*
* Update the sample period. Restore on failure.
*/
new = ACCESS_ONCE(watchdog_thresh);
if (old == new)
goto out;
set_sample_period();
err = proc_watchdog_update();
if (err) {
watchdog_thresh = old;
set_sample_period();
}
out:
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err;
}
/*
* The cpumask is the mask of possible cpus that the watchdog can run
* on, not the mask of cpus it is actually running on. This allows the
* user to specify a mask that will include cpus that have not yet
* been brought online, if desired.
*/
int proc_watchdog_cpumask(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int err;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) {
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
if (!err && write) {
/* Remove impossible cpus to keep sysctl output cleaner. */
cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
cpu_possible_mask);
if (watchdog_running) {
/*
* Failure would be due to being unable to allocate
* a temporary cpumask, so we are likely not in a
* position to do much else to make things better.
*/
if (smpboot_update_cpumask_percpu_thread(
&watchdog_threads, &watchdog_cpumask) != 0)
pr_err("cpumask update failed\n");
}
}
out:
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err;
}
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
set_sample_period();
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled()) {
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask, housekeeping_mask);
} else
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#else
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#endif
if (watchdog_enabled)
watchdog_enable_all_cpus();
}