android_kernel_oneplus_msm8998/kernel/time/tick-sched.c
Runmin Wang 750075feff Merge remote-tracking branch 'origin/tmp-917a9a9133a6' into lsk
* tmp-917a9:
  ARM/vdso: Mark the vDSO code read-only after init
  x86/vdso: Mark the vDSO code read-only after init
  lkdtm: Verify that '__ro_after_init' works correctly
  arch: Introduce post-init read-only memory
  x86/mm: Always enable CONFIG_DEBUG_RODATA and remove the Kconfig option
  mm/init: Add 'rodata=off' boot cmdline parameter to disable read-only kernel mappings
  asm-generic: Consolidate mark_rodata_ro()
  Linux 4.4.6
  ld-version: Fix awk regex compile failure
  target: Drop incorrect ABORT_TASK put for completed commands
  block: don't optimize for non-cloned bio in bio_get_last_bvec()
  MIPS: smp.c: Fix uninitialised temp_foreign_map
  MIPS: Fix build error when SMP is used without GIC
  ovl: fix getcwd() failure after unsuccessful rmdir
  ovl: copy new uid/gid into overlayfs runtime inode
  userfaultfd: don't block on the last VM updates at exit time
  powerpc/powernv: Fix OPAL_CONSOLE_FLUSH prototype and usages
  powerpc/powernv: Add a kmsg_dumper that flushes console output on panic
  powerpc: Fix dedotify for binutils >= 2.26
  Revert "drm/radeon/pm: adjust display configuration after powerstate"
  drm/radeon: Fix error handling in radeon_flip_work_func.
  drm/amdgpu: Fix error handling in amdgpu_flip_work_func.
  Revert "drm/radeon: call hpd_irq_event on resume"
  x86/mm: Fix slow_virt_to_phys() for X86_PAE again
  gpu: ipu-v3: Do not bail out on missing optional port nodes
  mac80211: Fix Public Action frame RX in AP mode
  mac80211: check PN correctly for GCMP-encrypted fragmented MPDUs
  mac80211: minstrel_ht: fix a logic error in RTS/CTS handling
  mac80211: minstrel_ht: set default tx aggregation timeout to 0
  mac80211: fix use of uninitialised values in RX aggregation
  mac80211: minstrel: Change expected throughput unit back to Kbps
  iwlwifi: mvm: inc pending frames counter also when txing non-sta
  can: gs_usb: fixed disconnect bug by removing erroneous use of kfree()
  cfg80211/wext: fix message ordering
  wext: fix message delay/ordering
  ovl: fix working on distributed fs as lower layer
  ovl: ignore lower entries when checking purity of non-directory entries
  ASoC: wm8958: Fix enum ctl accesses in a wrong type
  ASoC: wm8994: Fix enum ctl accesses in a wrong type
  ASoC: samsung: Use IRQ safe spin lock calls
  ASoC: dapm: Fix ctl value accesses in a wrong type
  ncpfs: fix a braino in OOM handling in ncp_fill_cache()
  jffs2: reduce the breakage on recovery from halfway failed rename()
  dmaengine: at_xdmac: fix residue computation
  tracing: Fix check for cpu online when event is disabled
  s390/dasd: fix diag 0x250 inline assembly
  s390/mm: four page table levels vs. fork
  KVM: MMU: fix reserved bit check for ept=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0
  KVM: MMU: fix ept=0/pte.u=1/pte.w=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0 combo
  KVM: PPC: Book3S HV: Sanitize special-purpose register values on guest exit
  KVM: s390: correct fprs on SIGP (STOP AND) STORE STATUS
  KVM: VMX: disable PEBS before a guest entry
  kvm: cap halt polling at exactly halt_poll_ns
  PCI: Allow a NULL "parent" pointer in pci_bus_assign_domain_nr()
  ARM: OMAP2+: hwmod: Introduce ti,no-idle dt property
  ARM: dts: dra7: do not gate cpsw clock due to errata i877
  ARM: mvebu: fix overlap of Crypto SRAM with PCIe memory window
  arm64: account for sparsemem section alignment when choosing vmemmap offset
  Linux 4.4.5
  drm/amdgpu: fix topaz/tonga gmc assignment in 4.4 stable
  modules: fix longstanding /proc/kallsyms vs module insertion race.
  drm/i915: refine qemu south bridge detection
  drm/i915: more virtual south bridge detection
  block: get the 1st and last bvec via helpers
  block: check virt boundary in bio_will_gap()
  drm/amdgpu: Use drm_calloc_large for VM page_tables array
  thermal: cpu_cooling: fix out of bounds access in time_in_idle
  i2c: brcmstb: allocate correct amount of memory for regmap
  ubi: Fix out of bounds write in volume update code
  cxl: Fix PSL timebase synchronization detection
  MIPS: traps: Fix SIGFPE information leak from `do_ov' and `do_trap_or_bp'
  MIPS: scache: Fix scache init with invalid line size.
  USB: serial: option: add support for Quectel UC20
  USB: serial: option: add support for Telit LE922 PID 0x1045
  USB: qcserial: add Sierra Wireless EM74xx device ID
  USB: qcserial: add Dell Wireless 5809e Gobi 4G HSPA+ (rev3)
  USB: cp210x: Add ID for Parrot NMEA GPS Flight Recorder
  usb: chipidea: otg: change workqueue ci_otg as freezable
  ALSA: timer: Fix broken compat timer user status ioctl
  ALSA: hdspm: Fix zero-division
  ALSA: hdsp: Fix wrong boolean ctl value accesses
  ALSA: hdspm: Fix wrong boolean ctl value accesses
  ALSA: seq: oss: Don't drain at closing a client
  ALSA: pcm: Fix ioctls for X32 ABI
  ALSA: timer: Fix ioctls for X32 ABI
  ALSA: rawmidi: Fix ioctls X32 ABI
  ALSA: hda - Fix mic issues on Acer Aspire E1-472
  ALSA: ctl: Fix ioctls for X32 ABI
  ALSA: usb-audio: Add a quirk for Plantronics DA45
  adv7604: fix tx 5v detect regression
  dmaengine: pxa_dma: fix cyclic transfers
  Fix directory hardlinks from deleted directories
  jffs2: Fix page lock / f->sem deadlock
  Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"
  Btrfs: fix loading of orphan roots leading to BUG_ON
  pata-rb532-cf: get rid of the irq_to_gpio() call
  tracing: Do not have 'comm' filter override event 'comm' field
  ata: ahci: don't mark HotPlugCapable Ports as external/removable
  PM / sleep / x86: Fix crash on graph trace through x86 suspend
  arm64: vmemmap: use virtual projection of linear region
  Adding Intel Lewisburg device IDs for SATA
  writeback: flush inode cgroup wb switches instead of pinning super_block
  block: bio: introduce helpers to get the 1st and last bvec
  libata: Align ata_device's id on a cacheline
  libata: fix HDIO_GET_32BIT ioctl
  drm/amdgpu: return from atombios_dp_get_dpcd only when error
  drm/amdgpu/gfx8: specify which engine to wait before vm flush
  drm/amdgpu: apply gfx_v8 fixes to gfx_v7 as well
  drm/amdgpu/pm: update current crtc info after setting the powerstate
  drm/radeon/pm: update current crtc info after setting the powerstate
  drm/ast: Fix incorrect register check for DRAM width
  target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors
  iommu/vt-d: Use BUS_NOTIFY_REMOVED_DEVICE in hotplug path
  iommu/amd: Fix boot warning when device 00:00.0 is not iommu covered
  iommu/amd: Apply workaround for ATS write permission check
  arm/arm64: KVM: Fix ioctl error handling
  KVM: x86: fix root cause for missed hardware breakpoints
  vfio: fix ioctl error handling
  Fix cifs_uniqueid_to_ino_t() function for s390x
  CIFS: Fix SMB2+ interim response processing for read requests
  cifs: fix out-of-bounds access in lease parsing
  fbcon: set a default value to blink interval
  kvm: x86: Update tsc multiplier on change.
  mips/kvm: fix ioctl error handling
  parisc: Fix ptrace syscall number and return value modification
  PCI: keystone: Fix MSI code that retrieves struct pcie_port pointer
  block: Initialize max_dev_sectors to 0
  drm/amdgpu: mask out WC from BO on unsupported arches
  btrfs: async-thread: Fix a use-after-free error for trace
  btrfs: Fix no_space in write and rm loop
  Btrfs: fix deadlock running delayed iputs at transaction commit time
  drivers: sh: Restore legacy clock domain on SuperH platforms
  use ->d_seq to get coherency between ->d_inode and ->d_flags
  Linux 4.4.4
  iwlwifi: mvm: don't allow sched scans without matches to be started
  iwlwifi: update and fix 7265 series PCI IDs
  iwlwifi: pcie: properly configure the debug buffer size for 8000
  iwlwifi: dvm: fix WoWLAN
  security: let security modules use PTRACE_MODE_* with bitmasks
  IB/cma: Fix RDMA port validation for iWarp
  x86/irq: Plug vector cleanup race
  x86/irq: Call irq_force_move_complete with irq descriptor
  x86/irq: Remove outgoing CPU from vector cleanup mask
  x86/irq: Remove the cpumask allocation from send_cleanup_vector()
  x86/irq: Clear move_in_progress before sending cleanup IPI
  x86/irq: Remove offline cpus from vector cleanup
  x86/irq: Get rid of code duplication
  x86/irq: Copy vectormask instead of an AND operation
  x86/irq: Check vector allocation early
  x86/irq: Reorganize the search in assign_irq_vector
  x86/irq: Reorganize the return path in assign_irq_vector
  x86/irq: Do not use apic_chip_data.old_domain as temporary buffer
  x86/irq: Validate that irq descriptor is still active
  x86/irq: Fix a race in x86_vector_free_irqs()
  x86/irq: Call chip->irq_set_affinity in proper context
  x86/entry/compat: Add missing CLAC to entry_INT80_32
  x86/mpx: Fix off-by-one comparison with nr_registers
  hpfs: don't truncate the file when delete fails
  do_last(): ELOOP failure exit should be done after leaving RCU mode
  should_follow_link(): validate ->d_seq after having decided to follow
  xen/pcifront: Fix mysterious crashes when NUMA locality information was extracted.
  xen/pciback: Save the number of MSI-X entries to be copied later.
  xen/pciback: Check PF instead of VF for PCI_COMMAND_MEMORY
  xen/scsiback: correct frontend counting
  xen/arm: correctly handle DMA mapping of compound pages
  ARM: at91/dt: fix typo in sama5d2 pinmux descriptions
  ARM: OMAP2+: Fix onenand initialization to avoid filesystem corruption
  do_last(): don't let a bogus return value from ->open() et.al. to confuse us
  kernel/resource.c: fix muxed resource handling in __request_region()
  sunrpc/cache: fix off-by-one in qword_get()
  tracing: Fix showing function event in available_events
  powerpc/eeh: Fix partial hotplug criterion
  KVM: x86: MMU: fix ubsan index-out-of-range warning
  KVM: x86: fix conversion of addresses to linear in 32-bit protected mode
  KVM: x86: fix missed hardware breakpoints
  KVM: arm/arm64: vgic: Ensure bitmaps are long enough
  KVM: async_pf: do not warn on page allocation failures
  of/irq: Fix msi-map calculation for nonzero rid-base
  NFSv4: Fix a dentry leak on alias use
  nfs: fix nfs_size_to_loff_t
  block: fix use-after-free in dio_bio_complete
  bio: return EINTR if copying to user space got interrupted
  i2c: i801: Adding Intel Lewisburg support for iTCO
  phy: core: fix wrong err handle for phy_power_on
  writeback: keep superblock pinned during cgroup writeback association switches
  cgroup: make sure a parent css isn't offlined before its children
  cpuset: make mm migration asynchronous
  PCI/AER: Flush workqueue on device remove to avoid use-after-free
  ARCv2: SMP: Emulate IPI to self using software triggered interrupt
  ARCv2: STAR 9000950267: Handle return from intr to Delay Slot #2
  libata: fix sff host state machine locking while polling
  qla2xxx: Fix stale pointer access.
  spi: atmel: fix gpio chip-select in case of non-DT platform
  target: Fix race with SCF_SEND_DELAYED_TAS handling
  target: Fix remote-port TMR ABORT + se_cmd fabric stop
  target: Fix TAS handling for multi-session se_node_acls
  target: Fix LUN_RESET active TMR descriptor handling
  target: Fix LUN_RESET active I/O handling for ACK_KREF
  ALSA: hda - Fixing background noise on Dell Inspiron 3162
  ALSA: hda - Apply clock gate workaround to Skylake, too
  Revert "workqueue: make sure delayed work run in local cpu"
  workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup
  mac80211: Requeue work after scan complete for all VIF types.
  rfkill: fix rfkill_fop_read wait_event usage
  tick/nohz: Set the correct expiry when switching to nohz/lowres mode
  perf stat: Do not clean event's private stats
  cdc-acm:exclude Samsung phone 04e8:685d
  Revert "Staging: panel: usleep_range is preferred over udelay"
  Staging: speakup: Fix getting port information
  sd: Optimal I/O size is in bytes, not sectors
  libceph: don't spam dmesg with stray reply warnings
  libceph: use the right footer size when skipping a message
  libceph: don't bail early from try_read() when skipping a message
  libceph: fix ceph_msg_revoke()
  seccomp: always propagate NO_NEW_PRIVS on tsync
  cpufreq: Fix NULL reference crash while accessing policy->governor_data
  cpufreq: pxa2xx: fix pxa_cpufreq_change_voltage prototype
  hwmon: (ads1015) Handle negative conversion values correctly
  hwmon: (gpio-fan) Remove un-necessary speed_index lookup for thermal hook
  hwmon: (dell-smm) Blacklist Dell Studio XPS 8000
  Thermal: do thermal zone update after a cooling device registered
  Thermal: handle thermal zone device properly during system sleep
  Thermal: initialize thermal zone device correctly
  IB/mlx5: Expose correct maximum number of CQE capacity
  IB/qib: Support creating qps with GFP_NOIO flag
  IB/qib: fix mcast detach when qp not attached
  IB/cm: Fix a recently introduced deadlock
  dmaengine: dw: disable BLOCK IRQs for non-cyclic xfer
  dmaengine: at_xdmac: fix resume for cyclic transfers
  dmaengine: dw: fix cyclic transfer callbacks
  dmaengine: dw: fix cyclic transfer setup
  nfit: fix multi-interface dimm handling, acpi6.1 compatibility
  ACPI / PCI / hotplug: unlock in error path in acpiphp_enable_slot()
  ACPI: Revert "ACPI / video: Add Dell Inspiron 5737 to the blacklist"
  ACPI / video: Add disable_backlight_sysfs_if quirk for the Toshiba Satellite R830
  ACPI / video: Add disable_backlight_sysfs_if quirk for the Toshiba Portege R700
  lib: sw842: select crc32
  uapi: update install list after nvme.h rename
  ideapad-laptop: Add Lenovo Yoga 700 to no_hw_rfkill dmi list
  ideapad-laptop: Add Lenovo ideapad Y700-17ISK to no_hw_rfkill dmi list
  toshiba_acpi: Fix blank screen at boot if transflective backlight is supported
  make sure that freeing shmem fast symlinks is RCU-delayed
  drm/radeon/pm: adjust display configuration after powerstate
  drm/radeon: Don't hang in radeon_flip_work_func on disabled crtc. (v2)
  drm: Fix treatment of drm_vblank_offdelay in drm_vblank_on() (v2)
  drm: Fix drm_vblank_pre/post_modeset regression from Linux 4.4
  drm: Prevent vblank counter bumps > 1 with active vblank clients. (v2)
  drm: No-Op redundant calls to drm_vblank_off() (v2)
  drm/radeon: use post-decrement in error handling
  drm/qxl: use kmalloc_array to alloc reloc_info in qxl_process_single_command
  drm/i915: fix error path in intel_setup_gmbus()
  drm/i915/dsi: don't pass arbitrary data to sideband
  drm/i915/dsi: defend gpio table against out of bounds access
  drm/i915/skl: Don't skip mst encoders in skl_ddi_pll_select()
  drm/i915: Don't reject primary plane windowing with color keying enabled on SKL+
  drm/i915/dp: fall back to 18 bpp when sink capability is unknown
  drm/i915: Make sure DC writes are coherent on flush.
  drm/i915: Init power domains early in driver load
  drm/i915: intel_hpd_init(): Fix suspend/resume reprobing
  drm/i915: Restore inhibiting the load of the default context
  drm: fix missing reference counting decrease
  drm/radeon: hold reference to fences in radeon_sa_bo_new
  drm/radeon: mask out WC from BO on unsupported arches
  drm: add helper to check for wc memory support
  drm/radeon: fix DP audio support for APU with DCE4.1 display engine
  drm/radeon: Add a common function for DFS handling
  drm/radeon: cleaned up VCO output settings for DP audio
  drm/radeon: properly byte swap vce firmware setup
  drm/radeon: clean up fujitsu quirks
  drm/radeon: Fix "slow" audio over DP on DCE8+
  drm/radeon: call hpd_irq_event on resume
  drm/radeon: Fix off-by-one errors in radeon_vm_bo_set_addr
  drm/dp/mst: deallocate payload on port destruction
  drm/dp/mst: Reverse order of MST enable and clearing VC payload table.
  drm/dp/mst: move GUID storage from mgr, port to only mst branch
  drm/dp/mst: Calculate MST PBN with 31.32 fixed point
  drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil
  drm/dp/mst: fix in RAD element access
  drm/dp/mst: fix in MSTB RAD initialization
  drm/dp/mst: always send reply for UP request
  drm/dp/mst: process broadcast messages correctly
  drm/nouveau: platform: Fix deferred probe
  drm/nouveau/disp/dp: ensure sink is powered up before attempting link training
  drm/nouveau/display: Enable vblank irqs after display engine is on again.
  drm/nouveau/kms: take mode_config mutex in connector hotplug path
  drm/amdgpu/pm: adjust display configuration after powerstate
  drm/amdgpu: Don't hang in amdgpu_flip_work_func on disabled crtc.
  drm/amdgpu: use post-decrement in error handling
  drm/amdgpu: fix issue with overlapping userptrs
  drm/amdgpu: hold reference to fences in amdgpu_sa_bo_new (v2)
  drm/amdgpu: remove unnecessary forward declaration
  drm/amdgpu: fix s4 resume
  drm/amdgpu: remove exp hardware support from iceland
  drm/amdgpu: don't load MEC2 on topaz
  drm/amdgpu: drop topaz support from gmc8 module
  drm/amdgpu: pull topaz gmc bits into gmc_v7
  drm/amdgpu: The VI specific EXE bit should only apply to GMC v8.0 above
  drm/amdgpu: iceland use CI based MC IP
  drm/amdgpu: move gmc7 support out of CIK dependency
  drm/amdgpu: no need to load MC firmware on fiji
  drm/amdgpu: fix amdgpu_bo_pin_restricted VRAM placing v2
  drm/amdgpu: fix tonga smu resume
  drm/amdgpu: fix lost sync_to if scheduler is enabled.
  drm/amdgpu: call hpd_irq_event on resume
  drm/amdgpu: Fix off-by-one errors in amdgpu_vm_bo_map
  drm/vmwgfx: respect 'nomodeset'
  drm/vmwgfx: Fix a width / pitch mismatch on framebuffer updates
  drm/vmwgfx: Fix an incorrect lock check
  virtio_pci: fix use after free on release
  virtio_balloon: fix race between migration and ballooning
  virtio_balloon: fix race by fill and leak
  regulator: mt6311: MT6311_REGULATOR needs to select REGMAP_I2C
  regulator: axp20x: Fix GPIO LDO enable value for AXP22x
  clk: exynos: use irqsave version of spin_lock to avoid deadlock with irqs
  cxl: use correct operator when writing pcie config space values
  sparc64: fix incorrect sign extension in sys_sparc64_personality
  EDAC, mc_sysfs: Fix freeing bus' name
  EDAC: Robustify workqueues destruction
  MIPS: Fix buffer overflow in syscall_get_arguments()
  MIPS: Fix some missing CONFIG_CPU_MIPSR6 #ifdefs
  MIPS: hpet: Choose a safe value for the ETIME check
  MIPS: Loongson-3: Fix SMP_ASK_C0COUNT IPI handler
  Revert "MIPS: Fix PAGE_MASK definition"
  cputime: Prevent 32bit overflow in time[val|spec]_to_cputime()
  time: Avoid signed overflow in timekeeping_get_ns()
  Bluetooth: 6lowpan: Fix handling of uncompressed IPv6 packets
  Bluetooth: 6lowpan: Fix kernel NULL pointer dereferences
  Bluetooth: Fix incorrect removing of IRKs
  Bluetooth: Add support of Toshiba Broadcom based devices
  Bluetooth: Use continuous scanning when creating LE connections
  Drivers: hv: vmbus: Fix a Host signaling bug
  tools: hv: vss: fix the write()'s argument: error -> vss_msg
  mmc: sdhci: Allow override of get_cd() called from sdhci_request()
  mmc: sdhci: Allow override of mmc host operations
  mmc: sdhci-pci: Fix card detect race for Intel BXT/APL
  mmc: pxamci: fix again read-only gpio detection polarity
  mmc: sdhci-acpi: Fix card detect race for Intel BXT/APL
  mmc: mmci: fix an ages old detection error
  mmc: core: Enable tuning according to the actual timing
  mmc: sdhci: Fix sdhci_runtime_pm_bus_on/off()
  mmc: mmc: Fix incorrect use of driver strength switching HS200 and HS400
  mmc: sdio: Fix invalid vdd in voltage switch power cycle
  mmc: sdhci: Fix DMA descriptor with zero data length
  mmc: sdhci-pci: Do not default to 33 Ohm driver strength for Intel SPT
  mmc: usdhi6rol0: handle NULL data in timeout
  clockevents/tcb_clksrc: Prevent disabling an already disabled clock
  posix-clock: Fix return code on the poll method's error path
  irqchip/gic-v3-its: Fix double ICC_EOIR write for LPI in EOImode==1
  irqchip/atmel-aic: Fix wrong bit operation for IRQ priority
  irqchip/mxs: Add missing set_handle_irq()
  irqchip/omap-intc: Add support for spurious irq handling
  coresight: checking for NULL string in coresight_name_match()
  dm: fix dm_rq_target_io leak on faults with .request_fn DM w/ blk-mq paths
  dm snapshot: fix hung bios when copy error occurs
  dm space map metadata: remove unused variable in brb_pop()
  tda1004x: only update the frontend properties if locked
  vb2: fix a regression in poll() behavior for output,streams
  gspca: ov534/topro: prevent a division by 0
  si2157: return -EINVAL if firmware blob is too big
  media: dvb-core: Don't force CAN_INVERSION_AUTO in oneshot mode
  rc: sunxi-cir: Initialize the spinlock properly
  namei: ->d_inode of a pinned dentry is stable only for positives
  mei: validate request value in client notify request ioctl
  mei: fix fasync return value on error
  rtlwifi: rtl8723be: Fix module parameter initialization
  rtlwifi: rtl8188ee: Fix module parameter initialization
  rtlwifi: rtl8192se: Fix module parameter initialization
  rtlwifi: rtl8723ae: Fix initialization of module parameters
  rtlwifi: rtl8192de: Fix incorrect module parameter descriptions
  rtlwifi: rtl8192ce: Fix handling of module parameters
  rtlwifi: rtl8192cu: Add missing parameter setup
  rtlwifi: rtl_pci: Fix kernel panic
  locks: fix unlock when fcntl_setlk races with a close
  um: link with -lpthread
  uml: fix hostfs mknod()
  uml: flush stdout before forking
  s390/fpu: signals vs. floating point control register
  s390/compat: correct restore of high gprs on signal return
  s390/dasd: fix performance drop
  s390/dasd: fix refcount for PAV reassignment
  s390/dasd: prevent incorrect length error under z/VM after PAV changes
  s390: fix normalization bug in exception table sorting
  btrfs: initialize the seq counter in struct btrfs_device
  Btrfs: Initialize btrfs_root->highest_objectid when loading tree root and subvolume roots
  Btrfs: fix transaction handle leak on failure to create hard link
  Btrfs: fix number of transaction units required to create symlink
  Btrfs: send, don't BUG_ON() when an empty symlink is found
  btrfs: statfs: report zero available if metadata are exhausted
  Btrfs: igrab inode in writepage
  Btrfs: add missing brelse when superblock checksum fails
  KVM: s390: fix memory overwrites when vx is disabled
  s390/kvm: remove dependency on struct save_area definition
  clocksource/drivers/vt8500: Increase the minimum delta
  genirq: Validate action before dereferencing it in handle_irq_event_percpu()
  mm: numa: quickly fail allocations for NUMA balancing on full nodes
  mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED
  ocfs2: unlock inode if deleting inode from orphan fails
  drm/i915: shut up gen8+ SDE irq dmesg noise
  iw_cxgb3: Fix incorrectly returning error on success
  spi: omap2-mcspi: Prevent duplicate gpio_request
  drivers: android: correct the size of struct binder_uintptr_t for BC_DEAD_BINDER_DONE
  USB: option: add "4G LTE usb-modem U901"
  USB: option: add support for SIM7100E
  USB: cp210x: add IDs for GE B650V3 and B850V3 boards
  usb: dwc3: Fix assignment of EP transfer resources
  can: ems_usb: Fix possible tx overflow
  dm thin: fix race condition when destroying thin pool workqueue
  bcache: Change refill_dirty() to always scan entire disk if necessary
  bcache: prevent crash on changing writeback_running
  bcache: allows use of register in udev to avoid "device_busy" error.
  bcache: unregister reboot notifier if bcache fails to unregister device
  bcache: fix a leak in bch_cached_dev_run()
  bcache: clear BCACHE_DEV_UNLINK_DONE flag when attaching a backing device
  bcache: Add a cond_resched() call to gc
  bcache: fix a livelock when we cause a huge number of cache misses
  lib/ucs2_string: Correct ucs2 -> utf8 conversion
  efi: Add pstore variables to the deletion whitelist
  efi: Make efivarfs entries immutable by default
  efi: Make our variable validation list include the guid
  efi: Do variable name validation tests in utf8
  efi: Use ucs2_as_utf8 in efivarfs instead of open coding a bad version
  lib/ucs2_string: Add ucs2 -> utf8 helper functions
  ARM: 8457/1: psci-smp is built only for SMP
  drm/gma500: Use correct unref in the gem bo create function
  devm_memremap: Fix error value when memremap failed
  KVM: s390: fix guest fprs memory leak
  arm64: errata: Add -mpc-relative-literal-loads to build flags
  ARM: debug-ll: fix BCM63xx entry for multiplatform
  ext4: fix bh->b_state corruption
  sctp: Fix port hash table size computation
  unix_diag: fix incorrect sign extension in unix_lookup_by_ino
  tipc: unlock in error path
  rtnl: RTM_GETNETCONF: fix wrong return value
  IFF_NO_QUEUE: Fix for drivers not calling ether_setup()
  tcp/dccp: fix another race at listener dismantle
  route: check and remove route cache when we get route
  net_sched fix: reclassification needs to consider ether protocol changes
  pppoe: fix reference counting in PPPoE proxy
  l2tp: Fix error creating L2TP tunnels
  net/mlx4_en: Avoid changing dev->features directly in run-time
  net/mlx4_en: Choose time-stamping shift value according to HW frequency
  net/mlx4_en: Count HW buffer overrun only once
  qmi_wwan: add "4G LTE usb-modem U901"
  tcp: md5: release request socket instead of listener
  tipc: fix premature addition of node to lookup table
  af_unix: Guard against other == sk in unix_dgram_sendmsg
  af_unix: Don't set err in unix_stream_read_generic unless there was an error
  ipv4: fix memory leaks in ip_cmsg_send() callers
  bonding: Fix ARP monitor validation
  bpf: fix branch offset adjustment on backjumps after patching ctx expansion
  flow_dissector: Fix unaligned access in __skb_flow_dissector when used by eth_get_headlen
  net: Copy inner L3 and L4 headers as unaligned on GRE TEB
  sctp: translate network order to host order when users get a hmacid
  enic: increment devcmd2 result ring in case of timeout
  tg3: Fix for tg3 transmit queue 0 timed out when too many gso_segs
  net:Add sysctl_max_skb_frags
  tcp: do not drop syn_recv on all icmp reports
  unix: correctly track in-flight fds in sending process user_struct
  ipv6: fix a lockdep splat
  ipv6: addrconf: Fix recursive spin lock call
  ipv6/udp: use sticky pktinfo egress ifindex on connect()
  ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail()
  tcp: beware of alignments in tcp_get_info()
  switchdev: Require RTNL mutex to be held when sending FDB notifications
  inet: frag: Always orphan skbs inside ip_defrag()
  tipc: fix connection abort during subscription cancel
  net: dsa: fix mv88e6xxx switches
  sctp: allow setting SCTP_SACK_IMMEDIATELY by the application
  pptp: fix illegal memory access caused by multiple bind()s
  af_unix: fix struct pid memory leak
  tcp: fix NULL deref in tcp_v4_send_ack()
  lwt: fix rx checksum setting for lwt devices tunneling over ipv6
  tunnels: Allow IPv6 UDP checksums to be correctly controlled.
  net: dp83640: Fix tx timestamp overflow handling.
  gro: Make GRO aware of lightweight tunnels.
  af_iucv: Validate socket address length in iucv_sock_bind()

Conflicts:
	arch/arm64/Makefile
	arch/arm64/include/asm/cacheflush.h
	drivers/mmc/host/sdhci.c
	drivers/usb/dwc3/ep0.c
	drivers/usb/dwc3/gadget.c
	kernel/module.c
	sound/core/pcm_compat.c

CRs-Fixed: 1010239
Signed-off-by: Runmin Wang <runminw@codeaurora.org>
Change-Id: I41a28636fc9ad91f9d979b191784609476294cdf
2016-07-12 11:40:49 -07:00

1267 lines
31 KiB
C

/*
* linux/kernel/time/tick-sched.c
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
*
* No idle tick implementation for low and high resolution timers
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* Distribute under GPLv2.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
#include <linux/rq_stats.h>
#include <asm/irq_regs.h>
#include "tick-internal.h"
#include <trace/events/timer.h>
struct rq_data rq_info;
struct workqueue_struct *rq_wq;
spinlock_t rq_lock;
/*
* Per cpu nohz control structure
*/
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
/*
* The time, when the last jiffy update happened. Protected by jiffies_lock.
*/
static ktime_t last_jiffies_update;
u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns)
{
u64 cur_jiffies;
unsigned long seq;
do {
seq = read_seqbegin(&jiffies_lock);
*now = ktime_get_ns();
*jiffy_ktime_ns = ktime_to_ns(last_jiffies_update);
cur_jiffies = get_jiffies_64();
} while (read_seqretry(&jiffies_lock, seq));
return cur_jiffies;
}
struct tick_sched *tick_get_tick_sched(int cpu)
{
return &per_cpu(tick_cpu_sched, cpu);
}
/*
* Must be called with interrupts disabled !
*/
static void tick_do_update_jiffies64(ktime_t now)
{
unsigned long ticks = 0;
ktime_t delta;
/*
* Do a quick check without holding jiffies_lock:
*/
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 < tick_period.tv64)
return;
/* Reevalute with jiffies_lock held */
write_seqlock(&jiffies_lock);
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 >= tick_period.tv64) {
delta = ktime_sub(delta, tick_period);
last_jiffies_update = ktime_add(last_jiffies_update,
tick_period);
/* Slow path for long timeouts */
if (unlikely(delta.tv64 >= tick_period.tv64)) {
s64 incr = ktime_to_ns(tick_period);
ticks = ktime_divns(delta, incr);
last_jiffies_update = ktime_add_ns(last_jiffies_update,
incr * ticks);
}
do_timer(++ticks);
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
} else {
write_sequnlock(&jiffies_lock);
return;
}
write_sequnlock(&jiffies_lock);
update_wall_time();
}
/*
* Initialize and return retrieve the jiffies update.
*/
static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
write_seqlock(&jiffies_lock);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update.tv64 == 0)
last_jiffies_update = tick_next_period;
period = last_jiffies_update;
write_sequnlock(&jiffies_lock);
return period;
}
static void tick_sched_do_timer(ktime_t now)
{
int cpu = smp_processor_id();
#ifdef CONFIG_NO_HZ_COMMON
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the cpu in charge went
* into a long sleep. If two cpus happen to assign themself to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
*/
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
&& !tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
#endif
/* Check, if the jiffies need an update */
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
}
static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
#ifdef CONFIG_NO_HZ_COMMON
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
* time. This happens on complete idle SMP systems while
* waiting on the login prompt. We also increment the "start of
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
}
#endif
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running;
static bool can_stop_full_tick(void)
{
WARN_ON_ONCE(!irqs_disabled());
if (!sched_can_stop_tick()) {
trace_tick_stop(0, "more than 1 task in runqueue\n");
return false;
}
if (!posix_cpu_timers_can_stop_tick(current)) {
trace_tick_stop(0, "posix timers running\n");
return false;
}
if (!perf_event_can_stop_tick()) {
trace_tick_stop(0, "perf events running\n");
return false;
}
/* sched_clock_tick() needs us? */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
if (!sched_clock_stable()) {
trace_tick_stop(0, "unstable sched clock\n");
/*
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
return false;
}
#endif
return true;
}
static void nohz_full_kick_work_func(struct irq_work *work)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func,
};
/*
* Kick this CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary.
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe.
*/
void tick_nohz_full_kick(void)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
}
/*
* Kick the CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary.
*/
void tick_nohz_full_kick_cpu(int cpu)
{
if (!tick_nohz_full_cpu(cpu))
return;
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
static void nohz_full_kick_ipi(void *info)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
*/
void tick_nohz_full_kick_all(void)
{
if (!tick_nohz_full_running)
return;
preempt_disable();
smp_call_function_many(tick_nohz_full_mask,
nohz_full_kick_ipi, NULL, false);
tick_nohz_full_kick();
preempt_enable();
}
/*
* Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties:
* perf events, posix cpu timers, ...
*/
void __tick_nohz_task_switch(void)
{
unsigned long flags;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;
if (tick_nohz_tick_stopped() && !can_stop_full_tick())
tick_nohz_full_kick();
out:
local_irq_restore(flags);
}
/* Parse the boot-time nohz CPU list from the kernel parameters. */
static int __init tick_nohz_full_setup(char *str)
{
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
free_bootmem_cpumask_var(tick_nohz_full_mask);
return 1;
}
tick_nohz_full_running = true;
return 1;
}
__setup("nohz_full=", tick_nohz_full_setup);
static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
/*
* The boot CPU handles housekeeping duty (unbound timers,
* workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return NOTIFY_BAD;
break;
}
return NOTIFY_OK;
}
static int tick_nohz_init_all(void)
{
int err = -1;
#ifdef CONFIG_NO_HZ_FULL_ALL
if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n");
return err;
}
err = 0;
cpumask_setall(tick_nohz_full_mask);
tick_nohz_full_running = true;
#endif
return err;
}
void __init tick_nohz_init(void)
{
int cpu;
if (!tick_nohz_full_running) {
if (tick_nohz_init_all() < 0)
return;
}
if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) {
WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n");
cpumask_clear(tick_nohz_full_mask);
tick_nohz_full_running = false;
return;
}
/*
* Full dynticks uses irq work to drive the tick rescheduling on safe
* locking contexts. But then we need irq work to raise its own
* interrupts to avoid circular dependency on the tick
*/
if (!arch_irq_work_has_interrupt()) {
pr_warning("NO_HZ: Can't run full dynticks because arch doesn't "
"support irq work self-IPIs\n");
cpumask_clear(tick_nohz_full_mask);
cpumask_copy(housekeeping_mask, cpu_possible_mask);
tick_nohz_full_running = false;
return;
}
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
cpumask_andnot(housekeeping_mask,
cpu_possible_mask, tick_nohz_full_mask);
for_each_cpu(cpu, tick_nohz_full_mask)
context_tracking_cpu_set(cpu);
cpu_notifier(tick_nohz_cpu_down_callback, 0);
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
/*
* We need at least one CPU to handle housekeeping work such
* as timekeeping, unbound timers, workqueues, ...
*/
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
#endif
/*
* NOHZ - aka dynamic tick functionality
*/
#ifdef CONFIG_NO_HZ_COMMON
/*
* NO HZ enabled ?
*/
static int tick_nohz_enabled __read_mostly = 1;
unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz(char *str)
{
if (!strcmp(str, "off"))
tick_nohz_enabled = 0;
else if (!strcmp(str, "on"))
tick_nohz_enabled = 1;
else
return 0;
return 1;
}
__setup("nohz=", setup_tick_nohz);
int tick_nohz_tick_stopped(void)
{
return __this_cpu_read(tick_cpu_sched.tick_stopped);
}
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
*
* Called from interrupt entry when the CPU was idle
*
* In case the sched_tick was stopped on this CPU, we have to check if jiffies
* must be updated. Otherwise an interrupt handler could use a stale jiffy
* value. We do this unconditionally on any cpu, as we don't know whether the
* cpu, which has the update task assigned is in a long sleep.
*/
static void tick_nohz_update_jiffies(ktime_t now)
{
unsigned long flags;
__this_cpu_write(tick_cpu_sched.idle_waketime, now);
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
touch_softlockup_watchdog_sched();
}
/*
* Updates the per cpu time idle statistics counters
*/
static void
update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
{
ktime_t delta;
if (ts->idle_active) {
delta = ktime_sub(now, ts->idle_entrytime);
if (nr_iowait_cpu(cpu) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
else
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_entrytime = now;
}
if (last_update_time)
*last_update_time = ktime_to_us(now);
}
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
update_ts_time_stats(smp_processor_id(), ts, now, NULL);
ts->idle_active = 0;
sched_clock_idle_wakeup_event(0);
}
static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
{
ktime_t now = ktime_get();
ts->idle_entrytime = now;
ts->idle_active = 1;
sched_clock_idle_sleep_event();
return now;
}
/**
* get_cpu_idle_time_us - get the total idle time of a cpu
* @cpu: CPU number to query
* @last_update_time: variable to store update time in. Do not update
* counters if NULL.
*
* Return the cummulative idle time (since boot) for a given
* CPU, in microseconds.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, idle;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
idle = ts->idle_sleeptime;
} else {
if (ts->idle_active && !nr_iowait_cpu(cpu)) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
idle = ktime_add(ts->idle_sleeptime, delta);
} else {
idle = ts->idle_sleeptime;
}
}
return ktime_to_us(idle);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
/**
* get_cpu_iowait_time_us - get the total iowait time of a cpu
* @cpu: CPU number to query
* @last_update_time: variable to store update time in. Do not update
* counters if NULL.
*
* Return the cummulative iowait time (since boot) for a given
* CPU, in microseconds.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, iowait;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
iowait = ts->iowait_sleeptime;
} else {
if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
iowait = ktime_add(ts->iowait_sleeptime, delta);
} else {
iowait = ts->iowait_sleeptime;
}
}
return ktime_to_us(iowait);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
{
hrtimer_cancel(&ts->sched_timer);
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, tick_period);
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
else
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
unsigned long seq, basejiff;
ktime_t tick;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&jiffies_lock);
basemono = last_jiffies_update.tv64;
basejiff = jiffies;
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
if (rcu_needs_cpu(basemono, &next_rcu) ||
arch_needs_cpu() || irq_work_needs_cpu()) {
next_tick = basemono + TICK_NSEC;
} else {
/*
* Get the next pending timer. If high resolution
* timers are enabled this only takes the timer wheel
* timers into account. If high resolution timers are
* disabled this also looks at the next expiring
* hrtimer.
*/
next_tmr = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tmr;
/* Take the next rcu event into account */
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
}
/*
* If the tick is due in the next period, keep it ticking or
* restart it proper.
*/
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
tick.tv64 = 0;
if (!ts->tick_stopped)
goto out;
if (delta == 0) {
/* Tick is stopped, but required now. Enforce it */
tick_nohz_restart(ts, now);
goto out;
}
}
/*
* If this cpu is the one which updates jiffies, then give up
* the assignment and let it be taken by the cpu which runs
* the tick timer next, which might be this cpu as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last. If this cpu
* is the one which had the do_timer() duty last, we limit the
* sleep time to the timekeeping max_deferement value.
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
delta = KTIME_MAX;
ts->do_timer_last = 0;
} else if (!ts->do_timer_last) {
delta = KTIME_MAX;
}
#ifdef CONFIG_NO_HZ_FULL
/* Limit the tick delta to the maximum scheduler deferment */
if (!ts->inidle)
delta = min(delta, scheduler_tick_max_deferment());
#endif
/* Calculate the next expiry time */
if (delta < (KTIME_MAX - basemono))
expires = basemono + delta;
else
expires = KTIME_MAX;
expires = min_t(u64, expires, next_tick);
tick.tv64 = expires;
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && (expires == dev->next_event.tv64))
goto out;
/*
* nohz_stop_sched_tick can be called several times before
* the nohz_restart_sched_tick is called. This happens when
* interrupts arrive which do not cause a reschedule. In the
* first call we save the current tick time, so we can restart
* the scheduler tick in nohz_restart_sched_tick.
*/
if (!ts->tick_stopped) {
nohz_balance_enter_idle(cpu);
calc_load_enter_idle();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
trace_tick_stop(1, " ");
}
/*
* If the expiration time == KTIME_MAX, then we simply stop
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
goto out;
}
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
else
tick_program_event(tick, 1);
out:
/* Update the estimated sleep length */
ts->sleep_length = ktime_sub(dev->next_event, now);
return tick;
}
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
update_cpu_load_nohz();
calc_load_exit_idle();
touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
ts->idle_exittime = now;
tick_nohz_restart(ts, now);
}
static void tick_nohz_full_update_tick(struct tick_sched *ts)
{
#ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id();
if (!tick_nohz_full_cpu(cpu))
return;
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
if (can_stop_full_tick())
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
}
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
* If this cpu is offline and it is the one which updates
* jiffies, then give up the assignment and let it be taken by
* the cpu which runs the tick timer next. If we don't drop
* this here the jiffies might be stale and do_timer() never
* invoked.
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
return false;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ };
return false;
}
if (need_resched())
return false;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
if (ratelimit < 10 &&
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
pr_warn("NOHZ: local_softirq_pending %02x\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
return false;
}
if (tick_nohz_full_enabled()) {
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
if (tick_do_timer_cpu == cpu)
return false;
/*
* Boot safety: make sure the timekeeping duty has been
* assigned before entering dyntick-idle mode,
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
return false;
}
return true;
}
static void __tick_nohz_idle_enter(struct tick_sched *ts)
{
ktime_t now, expires;
int cpu = smp_processor_id();
now = tick_nohz_start_idle(ts);
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
ts->idle_calls++;
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
if (expires.tv64 > 0LL) {
ts->idle_sleeps++;
ts->idle_expires = expires;
}
if (!was_stopped && ts->tick_stopped)
ts->idle_jiffies = ts->last_jiffies;
}
}
/**
* tick_nohz_idle_enter - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
* Called when we start the idle loop.
*
* The arch is responsible of calling:
*
* - rcu_idle_enter() after its last use of RCU before the CPU is put
* to sleep.
* - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
*/
void tick_nohz_idle_enter(void)
{
struct tick_sched *ts;
WARN_ON_ONCE(irqs_disabled());
/*
* Update the idle state in the scheduler domain hierarchy
* when tick_nohz_stop_sched_tick() is called from the idle loop.
* State will be updated to busy during the first busy tick after
* exiting idle.
*/
set_cpu_sd_state_idle();
local_irq_disable();
ts = this_cpu_ptr(&tick_cpu_sched);
ts->inidle = 1;
__tick_nohz_idle_enter(ts);
local_irq_enable();
}
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
*
* When an interrupt fires while we are idle and it doesn't cause
* a reschedule, it may still add, modify or delete a timer, enqueue
* an RCU callback, etc...
* So we need to re-calculate and reprogram the next tick event.
*/
void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle)
__tick_nohz_idle_enter(ts);
else
tick_nohz_full_update_tick(ts);
}
/**
* tick_nohz_get_sleep_length - return the length of the current sleep
*
* Called from power state control code with interrupts disabled
*/
ktime_t tick_nohz_get_sleep_length(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
return ts->sleep_length;
}
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
if (vtime_accounting_enabled())
return;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting. Enforce that this is accounted to idle !
*/
ticks = jiffies - ts->idle_jiffies;
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
#endif
}
/**
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
* This also exit the RCU extended quiescent state. The CPU
* can use RCU again after this function is called.
*/
void tick_nohz_idle_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;
local_irq_disable();
WARN_ON_ONCE(!ts->inidle);
ts->inidle = 0;
if (ts->idle_active || ts->tick_stopped)
now = ktime_get();
if (ts->idle_active)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
local_irq_enable();
}
/*
* The nohz low res interrupt handler
*/
static void tick_nohz_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
dev->next_event.tv64 = KTIME_MAX;
tick_sched_do_timer(now);
tick_sched_handle(ts, regs);
/* No need to reprogram if we are running tickless */
if (unlikely(ts->tick_stopped))
return;
hrtimer_forward(&ts->sched_timer, now, tick_period);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
{
if (!tick_nohz_enabled)
return;
ts->nohz_mode = mode;
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
timers_update_migration(true);
}
/**
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t next;
if (!tick_nohz_enabled)
return;
if (tick_switch_to_oneshot(tick_nohz_handler))
return;
/*
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
/* Get the next period */
next = tick_init_jiffy_update();
hrtimer_set_expires(&ts->sched_timer, next);
hrtimer_forward_now(&ts->sched_timer, tick_period);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
}
/*
* When NOHZ is enabled and the tick is stopped, we need to kick the
* tick timer from irq_enter() so that the jiffies update is kept
* alive during long running softirqs. That's ugly as hell, but
* correctness is key even if we need to fix the offending softirq in
* the first place.
*
* Note, this is different to tick_nohz_restart. We just kick the
* timer and do not touch the other magic bits which need to be done
* when idle is left.
*/
static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
{
#if 0
/* Switch back to 2.6.27 behaviour */
ktime_t delta;
/*
* Do not touch the tick device, when the next expiry is either
* already reached or less/equal than the tick period.
*/
delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
if (delta.tv64 <= tick_period.tv64)
return;
tick_nohz_restart(ts, now);
#endif
}
static inline void tick_nohz_irq_enter(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;
if (!ts->idle_active && !ts->tick_stopped)
return;
now = ktime_get();
if (ts->idle_active)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
tick_nohz_update_jiffies(now);
tick_nohz_kick_tick(ts, now);
}
}
#else
static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
#endif /* CONFIG_NO_HZ_COMMON */
/*
* Called from irq_enter to notify about the possible interruption of idle()
*/
void tick_irq_enter(void)
{
tick_check_oneshot_broadcast_this_cpu();
tick_nohz_irq_enter();
}
/*
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
static void update_rq_stats(void)
{
unsigned long jiffy_gap = 0;
unsigned int rq_avg = 0;
unsigned long flags = 0;
jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
if (jiffy_gap >= rq_info.rq_poll_jiffies) {
spin_lock_irqsave(&rq_lock, flags);
if (!rq_info.rq_avg)
rq_info.rq_poll_total_jiffies = 0;
rq_avg = nr_running() * 10;
if (rq_info.rq_poll_total_jiffies) {
rq_avg = (rq_avg * jiffy_gap) +
(rq_info.rq_avg *
rq_info.rq_poll_total_jiffies);
do_div(rq_avg,
rq_info.rq_poll_total_jiffies + jiffy_gap);
}
rq_info.rq_avg = rq_avg;
rq_info.rq_poll_total_jiffies += jiffy_gap;
rq_info.rq_poll_last_jiffy = jiffies;
spin_unlock_irqrestore(&rq_lock, flags);
}
}
static void wakeup_user(void)
{
unsigned long jiffy_gap;
jiffy_gap = jiffies - rq_info.def_timer_last_jiffy;
if (jiffy_gap >= rq_info.def_timer_jiffies) {
rq_info.def_timer_last_jiffy = jiffies;
queue_work(rq_wq, &rq_info.def_timer_work);
}
}
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
*/
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
struct tick_sched *ts =
container_of(timer, struct tick_sched, sched_timer);
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
tick_sched_do_timer(now);
/*
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
if (regs) {
tick_sched_handle(ts, regs);
if (rq_info.init == 1 &&
tick_do_timer_cpu == smp_processor_id()) {
/*
* update run queue statistics
*/
update_rq_stats();
/*
* wakeup user if needed
*/
wakeup_user();
}
}
/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
hrtimer_forward(timer, now, tick_period);
return HRTIMER_RESTART;
}
static int sched_skew_tick;
static int __init skew_tick(char *str)
{
get_option(&str, &sched_skew_tick);
return 0;
}
early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
*/
void tick_setup_sched_timer(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now = ktime_get();
/*
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per cpu) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
/* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {
u64 offset = ktime_to_ns(tick_period) >> 1;
do_div(offset, num_possible_cpus());
offset *= smp_processor_id();
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
hrtimer_forward(&ts->sched_timer, now, tick_period);
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
#endif /* HIGH_RES_TIMERS */
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
# endif
memset(ts, 0, sizeof(*ts));
}
#endif
/**
* Async notification about clocksource changes
*/
void tick_clock_notify(void)
{
int cpu;
for_each_possible_cpu(cpu)
set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
}
/*
* Async notification about clock event changes
*/
void tick_oneshot_notify(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
set_bit(0, &ts->check_clocks);
}
/**
* Check, if a change happened, which makes oneshot possible.
*
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
* or runtime). Called with interrupts disabled.
*/
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
return 0;
if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
return 0;
if (!allow_nohz)
return 1;
tick_nohz_switch_to_nohz();
return 0;
}
ktime_t * get_next_event_cpu(unsigned int cpu)
{
return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
}