android_kernel_oneplus_msm8998/kernel/sched/cputime.c
Srinivasarao P 81a6413ed7 Merge android-4.4.127 (d6bbe8b) into msm-4.4
* refs/heads/tmp-d6bbe8b
  Linux 4.4.127
  Revert "ip6_vti: adjust vti mtu according to mtu of lower device"
  net: cavium: liquidio: fix up "Avoid dma_unmap_single on uninitialized ndata"
  spi: davinci: fix up dma_mapping_error() incorrect patch
  Revert "mtip32xx: use runtime tag to initialize command header"
  Revert "cpufreq: Fix governor module removal race"
  Revert "ARM: dts: omap3-n900: Fix the audio CODEC's reset pin"
  Revert "ARM: dts: am335x-pepper: Fix the audio CODEC's reset pin"
  Revert "PCI/MSI: Stop disabling MSI/MSI-X in pci_device_shutdown()"
  nospec: Kill array_index_nospec_mask_check()
  nospec: Move array_index_nospec() parameter checking into separate macro
  net: hns: Fix ethtool private flags
  md/raid10: reset the 'first' at the end of loop
  ARM: dts: am57xx-beagle-x15-common: Add overide powerhold property
  ARM: dts: dra7: Add power hold and power controller properties to palmas
  Documentation: pinctrl: palmas: Add ti,palmas-powerhold-override property definition
  vt: change SGR 21 to follow the standards
  Input: i8042 - enable MUX on Sony VAIO VGN-CS series to fix touchpad
  Input: i8042 - add Lenovo ThinkPad L460 to i8042 reset list
  staging: comedi: ni_mio_common: ack ai fifo error interrupts.
  fs/proc: Stop trying to report thread stacks
  crypto: x86/cast5-avx - fix ECB encryption when long sg follows short one
  crypto: ahash - Fix early termination in hash walk
  parport_pc: Add support for WCH CH382L PCI-E single parallel port card.
  media: usbtv: prevent double free in error case
  mei: remove dev_err message on an unsupported ioctl
  USB: serial: cp210x: add ELDAT Easywave RX09 id
  USB: serial: ftdi_sio: add support for Harman FirmwareHubEmulator
  USB: serial: ftdi_sio: add RT Systems VX-8 cable
  usb: dwc2: Improve gadget state disconnection handling
  scsi: virtio_scsi: always read VPD pages for multiqueue too
  llist: clang: introduce member_address_is_nonnull()
  Bluetooth: Fix missing encryption refresh on Security Request
  netfilter: x_tables: add and use xt_check_proc_name
  netfilter: bridge: ebt_among: add more missing match size checks
  xfrm: Refuse to insert 32 bit userspace socket policies on 64 bit systems
  net: xfrm: use preempt-safe this_cpu_read() in ipcomp_alloc_tfms()
  RDMA/ucma: Introduce safer rdma_addr_size() variants
  RDMA/ucma: Don't allow join attempts for unsupported AF family
  RDMA/ucma: Check that device exists prior to accessing it
  RDMA/ucma: Check that device is connected prior to access it
  RDMA/ucma: Ensure that CM_ID exists prior to access it
  RDMA/ucma: Fix use-after-free access in ucma_close
  RDMA/ucma: Check AF family prior resolving address
  xfrm_user: uncoditionally validate esn replay attribute struct
  arm64: avoid overflow in VA_START and PAGE_OFFSET
  selinux: Remove redundant check for unknown labeling behavior
  netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch
  tty: provide tty_name() even without CONFIG_TTY
  audit: add tty field to LOGIN event
  frv: declare jiffies to be located in the .data section
  jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp
  fs: compat: Remove warning from COMPATIBLE_IOCTL
  selinux: Remove unnecessary check of array base in selinux_set_mapping()
  cpumask: Add helper cpumask_available()
  genirq: Use cpumask_available() for check of cpumask variable
  netfilter: nf_nat_h323: fix logical-not-parentheses warning
  Input: mousedev - fix implicit conversion warning
  dm ioctl: remove double parentheses
  PCI: Make PCI_ROM_ADDRESS_MASK a 32-bit constant
  writeback: fix the wrong congested state variable definition
  ACPI, PCI, irq: remove redundant check for null string pointer
  kprobes/x86: Fix to set RWX bits correctly before releasing trampoline
  usb: gadget: f_hid: fix: Prevent accessing released memory
  usb: gadget: align buffer size when allocating for OUT endpoint
  usb: gadget: fix usb_ep_align_maybe endianness and new usb_ep_align
  usb: gadget: change len to size_t on alloc_ep_req()
  usb: gadget: define free_ep_req as universal function
  partitions/msdos: Unable to mount UFS 44bsd partitions
  perf/hwbp: Simplify the perf-hwbp code, fix documentation
  ALSA: pcm: potential uninitialized return values
  ALSA: pcm: Use dma_bytes as size parameter in dma_mmap_coherent()
  mtd: jedec_probe: Fix crash in jedec_read_mfr()
  Replace #define with enum for better compilation errors.
  Add missing include to drivers/tty/goldfish.c
  Fix whitespace in drivers/tty/goldfish.c
  ANDROID: fuse: Add null terminator to path in canonical path to avoid issue
  ANDROID: sdcardfs: Fix sdcardfs to stop creating cases-sensitive duplicate entries.
  ANDROID: add missing include to pdev_bus
  ANDROID: pdev_bus: replace writel with gf_write_ptr
  ANDROID: Cleanup type casting in goldfish.h
  ANDROID: Include missing headers in goldfish.h
  ANDROID: cpufreq: times: skip printing invalid frequencies
  ANDROID: xt_qtaguid: Remove unnecessary null checks to device's name
  ANDROID: xt_qtaguid: Remove unnecessary null checks to ifa_label
  ANDROID: cpufreq: times: allocate enough space for a uid_entry
  Linux 4.4.126
  net: systemport: Rewrite __bcm_sysport_tx_reclaim()
  net: fec: Fix unbalanced PM runtime calls
  ieee802154: 6lowpan: fix possible NULL deref in lowpan_device_event()
  s390/qeth: on channel error, reject further cmd requests
  s390/qeth: lock read device while queueing next buffer
  s390/qeth: when thread completes, wake up all waiters
  s390/qeth: free netdevice when removing a card
  team: Fix double free in error path
  skbuff: Fix not waking applications when errors are enqueued
  net: Only honor ifindex in IP_PKTINFO if non-0
  netlink: avoid a double skb free in genlmsg_mcast()
  net/iucv: Free memory obtained by kzalloc
  net: ethernet: ti: cpsw: add check for in-band mode setting with RGMII PHY interface
  net: ethernet: arc: Fix a potential memory leak if an optional regulator is deferred
  l2tp: do not accept arbitrary sockets
  ipv6: fix access to non-linear packet in ndisc_fill_redirect_hdr_option()
  dccp: check sk for closed state in dccp_sendmsg()
  net: Fix hlist corruptions in inet_evict_bucket()
  Revert "genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs"
  scsi: sg: don't return bogus Sg_requests
  Revert "genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs"
  UPSTREAM: drm: virtio-gpu: set atomic flag
  UPSTREAM: drm: virtio-gpu: transfer dumb buffers to host on plane update
  UPSTREAM: drm: virtio-gpu: ensure plane is flushed to host on atomic update
  UPSTREAM: drm: virtio-gpu: get the fb from the plane state for atomic updates
  Linux 4.4.125
  bpf, x64: increase number of passes
  bpf: skip unnecessary capability check
  kbuild: disable clang's default use of -fmerge-all-constants
  staging: lustre: ptlrpc: kfree used instead of kvfree
  perf/x86/intel: Don't accidentally clear high bits in bdw_limit_period()
  x86/entry/64: Don't use IST entry for #BP stack
  x86/boot/64: Verify alignment of the LOAD segment
  x86/build/64: Force the linker to use 2MB page size
  kvm/x86: fix icebp instruction handling
  tty: vt: fix up tabstops properly
  can: cc770: Fix use after free in cc770_tx_interrupt()
  can: cc770: Fix queue stall & dropped RTR reply
  can: cc770: Fix stalls on rt-linux, remove redundant IRQ ack
  staging: ncpfs: memory corruption in ncp_read_kernel()
  mtd: nand: fsl_ifc: Fix nand waitfunc return value
  tracing: probeevent: Fix to support minus offset from symbol
  rtlwifi: rtl8723be: Fix loss of signal
  brcmfmac: fix P2P_DEVICE ethernet address generation
  acpi, numa: fix pxm to online numa node associations
  drm: udl: Properly check framebuffer mmap offsets
  drm/radeon: Don't turn off DP sink when disconnected
  drm/vmwgfx: Fix a destoy-while-held mutex problem.
  x86/mm: implement free pmd/pte page interfaces
  mm/vmalloc: add interfaces to free unmapped page table
  libata: Modify quirks for MX100 to limit NCQ_TRIM quirk to MU01 version
  libata: Make Crucial BX100 500GB LPM quirk apply to all firmware versions
  libata: Apply NOLPM quirk to Crucial M500 480 and 960GB SSDs
  libata: Enable queued TRIM for Samsung SSD 860
  libata: disable LPM for Crucial BX100 SSD 500GB drive
  libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs
  libata: remove WARN() for DMA or PIO command without data
  libata: fix length validation of ATAPI-relayed SCSI commands
  Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174
  clk: bcm2835: Protect sections updating shared registers
  ahci: Add PCI-id for the Highpoint Rocketraid 644L card
  PCI: Add function 1 DMA alias quirk for Highpoint RocketRAID 644L
  mmc: dw_mmc: fix falling from idmac to PIO mode when dw_mci_reset occurs
  ALSA: hda/realtek - Always immediately update mute LED with pin VREF
  ALSA: aloop: Fix access to not-yet-ready substream via cable
  ALSA: aloop: Sync stale timer before release
  ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit
  iio: st_pressure: st_accel: pass correct platform data to init
  MIPS: ralink: Remove ralink_halt()
  ANDROID: cpufreq: times: fix proc_time_in_state_show
  dtc: turn off dtc unit address warnings by default
  Linux 4.4.124
  RDMA/ucma: Fix access to non-initialized CM_ID object
  dmaengine: ti-dma-crossbar: Fix event mapping for TPCC_EVT_MUX_60_63
  clk: si5351: Rename internal plls to avoid name collisions
  nfsd4: permit layoutget of executable-only files
  RDMA/ocrdma: Fix permissions for OCRDMA_RESET_STATS
  ip6_vti: adjust vti mtu according to mtu of lower device
  iommu/vt-d: clean up pr_irq if request_threaded_irq fails
  pinctrl: Really force states during suspend/resume
  coresight: Fix disabling of CoreSight TPIU
  pty: cancel pty slave port buf's work in tty_release
  drm/omap: DMM: Check for DMM readiness after successful transaction commit
  vgacon: Set VGA struct resource types
  IB/umem: Fix use of npages/nmap fields
  RDMA/cma: Use correct size when writing netlink stats
  IB/ipoib: Avoid memory leak if the SA returns a different DGID
  mmc: avoid removing non-removable hosts during suspend
  platform/chrome: Use proper protocol transfer function
  cros_ec: fix nul-termination for firmware build info
  media: [RESEND] media: dvb-frontends: Add delay to Si2168 restart
  media: bt8xx: Fix err 'bt878_probe()'
  rtlwifi: rtl_pci: Fix the bug when inactiveps is enabled.
  RDMA/iwpm: Fix uninitialized error code in iwpm_send_mapinfo()
  drm/msm: fix leak in failed get_pages
  media: c8sectpfe: fix potential NULL pointer dereference in c8sectpfe_timer_interrupt
  Bluetooth: hci_qca: Avoid setup failure on missing rampatch
  perf tests kmod-path: Don't fail if compressed modules aren't supported
  rtc: ds1374: wdt: Fix stop/start ioctl always returning -EINVAL
  rtc: ds1374: wdt: Fix issue with timeout scaling from secs to wdt ticks
  cifs: small underflow in cnvrtDosUnixTm()
  net: hns: fix ethtool_get_strings overflow in hns driver
  sm501fb: don't return zero on failure path in sm501fb_start()
  video: fbdev: udlfb: Fix buffer on stack
  tcm_fileio: Prevent information leak for short reads
  ia64: fix module loading for gcc-5.4
  md/raid10: skip spare disk as 'first' disk
  Input: twl4030-pwrbutton - use correct device for irq request
  power: supply: pda_power: move from timer to delayed_work
  bnx2x: Align RX buffers
  drm/nouveau/kms: Increase max retries in scanout position queries.
  ACPI / PMIC: xpower: Fix power_table addresses
  ipmi/watchdog: fix wdog hang on panic waiting for ipmi response
  ARM: DRA7: clockdomain: Change the CLKTRCTRL of CM_PCIE_CLKSTCTRL to SW_WKUP
  mmc: sdhci-of-esdhc: limit SD clock for ls1012a/ls1046a
  staging: wilc1000: fix unchecked return value
  staging: unisys: visorhba: fix s-Par to boot with option CONFIG_VMAP_STACK set to y
  mtip32xx: use runtime tag to initialize command header
  mfd: palmas: Reset the POWERHOLD mux during power off
  mac80211: don't parse encrypted management frames in ieee80211_frame_acked
  Btrfs: send, fix file hole not being preserved due to inline extent
  rndis_wlan: add return value validation
  mt7601u: check return value of alloc_skb
  iio: st_pressure: st_accel: Initialise sensor platform data properly
  NFS: don't try to cross a mountpount when there isn't one there.
  infiniband/uverbs: Fix integer overflows
  scsi: mac_esp: Replace bogus memory barrier with spinlock
  qlcnic: fix unchecked return value
  wan: pc300too: abort path on failure
  mmc: host: omap_hsmmc: checking for NULL instead of IS_ERR()
  openvswitch: Delete conntrack entry clashing with an expectation.
  netfilter: xt_CT: fix refcnt leak on error path
  Fix driver usage of 128B WQEs when WQ_CREATE is V1.
  ASoC: Intel: Skylake: Uninitialized variable in probe_codec()
  IB/mlx4: Change vma from shared to private
  IB/mlx4: Take write semaphore when changing the vma struct
  HSI: ssi_protocol: double free in ssip_pn_xmit()
  IB/ipoib: Update broadcast object if PKey value was changed in index 0
  IB/ipoib: Fix deadlock between ipoib_stop and mcast join flow
  ALSA: hda - Fix headset microphone detection for ASUS N551 and N751
  e1000e: fix timing for 82579 Gigabit Ethernet controller
  tcp: remove poll() flakes with FastOpen
  NFS: Fix missing pg_cleanup after nfs_pageio_cond_complete()
  md/raid10: wait up frozen array in handle_write_completed
  iommu/omap: Register driver before setting IOMMU ops
  ARM: 8668/1: ftrace: Fix dynamic ftrace with DEBUG_RODATA and !FRAME_POINTER
  KVM: PPC: Book3S PR: Exit KVM on failed mapping
  scsi: virtio_scsi: Always try to read VPD pages
  clk: ns2: Correct SDIO bits
  ath: Fix updating radar flags for coutry code India
  spi: dw: Disable clock after unregistering the host
  media/dvb-core: Race condition when writing to CAM
  net: ipv6: send unsolicited NA on admin up
  i2c: i2c-scmi: add a MS HID
  genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs
  cpufreq/sh: Replace racy task affinity logic
  ACPI/processor: Replace racy task affinity logic
  ACPI/processor: Fix error handling in __acpi_processor_start()
  time: Change posix clocks ops interfaces to use timespec64
  Input: ar1021_i2c - fix too long name in driver's device table
  rtc: cmos: Do not assume irq 8 for rtc when there are no legacy irqs
  x86: i8259: export legacy_pic symbol
  regulator: anatop: set default voltage selector for pcie
  platform/x86: asus-nb-wmi: Add wapf4 quirk for the X302UA
  staging: android: ashmem: Fix possible deadlock in ashmem_ioctl
  CIFS: Enable encryption during session setup phase
  SMB3: Validate negotiate request must always be signed
  tpm_tis: fix potential buffer overruns caused by bit glitches on the bus
  tpm: fix potential buffer overruns caused by bit glitches on the bus
  BACKPORT, FROMLIST: crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS
  Linux 4.4.123
  bpf: fix incorrect sign extension in check_alu_op()
  usb: gadget: bdc: 64-bit pointer capability check
  USB: gadget: udc: Add missing platform_device_put() on error in bdc_pci_probe()
  btrfs: Fix use-after-free when cleaning up fs_devs with a single stale device
  btrfs: alloc_chunk: fix DUP stripe size handling
  ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux
  scsi: sg: only check for dxfer_len greater than 256M
  scsi: sg: fix static checker warning in sg_is_valid_dxfer
  scsi: sg: fix SG_DXFER_FROM_DEV transfers
  irqchip/gic-v3-its: Ensure nr_ites >= nr_lpis
  fs/aio: Use RCU accessors for kioctx_table->table[]
  fs/aio: Add explicit RCU grace period when freeing kioctx
  lock_parent() needs to recheck if dentry got __dentry_kill'ed under it
  fs: Teach path_connected to handle nfs filesystems with multiple roots.
  drm/amdgpu/dce: Don't turn off DP sink when disconnected
  ALSA: seq: Clear client entry before deleting else at closing
  ALSA: seq: Fix possible UAF in snd_seq_check_queue()
  ALSA: hda - Revert power_save option default value
  ALSA: pcm: Fix UAF in snd_pcm_oss_get_formats()
  x86/mm: Fix vmalloc_fault to use pXd_large
  x86/vm86/32: Fix POPF emulation
  selftests/x86/entry_from_vm86: Add test cases for POPF
  selftests/x86: Add tests for the STR and SLDT instructions
  selftests/x86: Add tests for User-Mode Instruction Prevention
  selftests/x86/entry_from_vm86: Exit with 1 if we fail
  ima: relax requiring a file signature for new files with zero length
  rcutorture/configinit: Fix build directory error message
  ipvlan: add L2 check for packets arriving via virtual devices
  ASoC: nuc900: Fix a loop timeout test
  mac80211: remove BUG() when interface type is invalid
  mac80211_hwsim: enforce PS_MANUAL_POLL to be set after PS_ENABLED
  agp/intel: Flush all chipset writes after updating the GGTT
  drm/amdkfd: Fix memory leaks in kfd topology
  veth: set peer GSO values
  media: cpia2: Fix a couple off by one bugs
  scsi: dh: add new rdac devices
  scsi: devinfo: apply to HP XP the same flags as Hitachi VSP
  scsi: core: scsi_get_device_flags_keyed(): Always return device flags
  spi: sun6i: disable/unprepare clocks on remove
  tools/usbip: fixes build with musl libc toolchain
  ath10k: fix invalid STS_CAP_OFFSET_MASK
  clk: qcom: msm8916: fix mnd_width for codec_digcodec
  cpufreq: Fix governor module removal race
  ath10k: update tdls teardown state to target
  ARM: dts: omap3-n900: Fix the audio CODEC's reset pin
  ARM: dts: am335x-pepper: Fix the audio CODEC's reset pin
  mtd: nand: fix interpretation of NAND_CMD_NONE in nand_command[_lp]()
  net: xfrm: allow clearing socket xfrm policies.
  test_firmware: fix setting old custom fw path back on exit
  sched: Stop resched_cpu() from sending IPIs to offline CPUs
  sched: Stop switched_to_rt() from sending IPIs to offline CPUs
  ARM: dts: exynos: Correct Trats2 panel reset line
  HID: elo: clear BTN_LEFT mapping
  video/hdmi: Allow "empty" HDMI infoframes
  drm/edid: set ELD connector type in drm_edid_to_eld()
  wil6210: fix memory access violation in wil_memcpy_from/toio_32
  pwm: tegra: Increase precision in PWM rate calculation
  kprobes/x86: Set kprobes pages read-only
  kprobes/x86: Fix kprobe-booster not to boost far call instructions
  scsi: sg: close race condition in sg_remove_sfp_usercontext()
  scsi: sg: check for valid direction before starting the request
  perf session: Don't rely on evlist in pipe mode
  perf inject: Copy events when reordering events in pipe mode
  drivers/perf: arm_pmu: handle no platform_device
  usb: gadget: dummy_hcd: Fix wrong power status bit clear/reset in dummy_hub_control()
  usb: dwc2: Make sure we disconnect the gadget state
  md/raid6: Fix anomily when recovering a single device in RAID6.
  regulator: isl9305: fix array size
  MIPS: r2-on-r6-emu: Clear BLTZALL and BGEZALL debugfs counters
  MIPS: r2-on-r6-emu: Fix BLEZL and BGTZL identification
  MIPS: BPF: Fix multiple problems in JIT skb access helpers.
  MIPS: BPF: Quit clobbering callee saved registers in JIT code.
  coresight: Fixes coresight DT parse to get correct output port ID.
  drm/amdgpu: Fail fb creation from imported dma-bufs. (v2)
  drm/radeon: Fail fb creation from imported dma-bufs.
  video: ARM CLCD: fix dma allocation size
  iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range
  apparmor: Make path_max parameter readonly
  scsi: ses: don't get power status of SES device slot on probe
  fm10k: correctly check if interface is removed
  ALSA: firewire-digi00x: handle all MIDI messages on streaming packets
  reiserfs: Make cancel_old_flush() reliable
  ARM: dts: koelsch: Correct clock frequency of X2 DU clock input
  net/faraday: Add missing include of of.h
  powerpc: Avoid taking a data miss on every userspace instruction miss
  ARM: dts: r8a7791: Correct parent of SSI[0-9] clocks
  ARM: dts: r8a7790: Correct parent of SSI[0-9] clocks
  NFC: nfcmrvl: double free on error path
  NFC: nfcmrvl: Include unaligned.h instead of access_ok.h
  vxlan: vxlan dev should inherit lowerdev's gso_max_size
  drm/vmwgfx: Fixes to vmwgfx_fb
  braille-console: Fix value returned by _braille_console_setup
  bonding: refine bond_fold_stats() wrap detection
  f2fs: relax node version check for victim data in gc
  blk-throttle: make sure expire time isn't too big
  mm: Fix false-positive VM_BUG_ON() in page_cache_{get,add}_speculative()
  driver: (adm1275) set the m,b and R coefficients correctly for power
  dmaengine: imx-sdma: add 1ms delay to ensure SDMA channel is stopped
  tcp: sysctl: Fix a race to avoid unexpected 0 window from space
  spi: omap2-mcspi: poll OMAP2_MCSPI_CHSTAT_RXS for PIO transfer
  ASoC: rcar: ssi: don't set SSICR.CKDV = 000 with SSIWSR.CONT
  sched: act_csum: don't mangle TCP and UDP GSO packets
  Input: qt1070 - add OF device ID table
  sysrq: Reset the watchdog timers while displaying high-resolution timers
  timers, sched_clock: Update timeout for clock wrap
  media: i2c/soc_camera: fix ov6650 sensor getting wrong clock
  scsi: ipr: Fix missed EH wakeup
  solo6x10: release vb2 buffers in solo_stop_streaming()
  of: fix of_device_get_modalias returned length when truncating buffers
  batman-adv: handle race condition for claims between gateways
  ARM: dts: Adjust moxart IRQ controller and flags
  net/8021q: create device with all possible features in wanted_features
  HID: clamp input to logical range if no null state
  perf probe: Return errno when not hitting any event
  ath10k: disallow DFS simulation if DFS channel is not enabled
  drm: Defer disabling the vblank IRQ until the next interrupt (for instant-off)
  drivers: net: xgene: Fix hardware checksum setting
  perf tools: Make perf_event__synthesize_mmap_events() scale
  i40e: fix ethtool to get EEPROM data from X722 interface
  i40e: Acquire NVM lock before reads on all devices
  perf sort: Fix segfault with basic block 'cycles' sort dimension
  selinux: check for address length in selinux_socket_bind()
  PCI/MSI: Stop disabling MSI/MSI-X in pci_device_shutdown()
  ath10k: fix a warning during channel switch with multiple vaps
  drm: qxl: Don't alloc fbdev if emulation is not supported
  HID: reject input outside logical range only if null state is set
  staging: wilc1000: add check for kmalloc allocation failure.
  staging: speakup: Replace BUG_ON() with WARN_ON().
  Input: tsc2007 - check for presence and power down tsc2007 during probe
  blkcg: fix double free of new_blkg in blkcg_init_queue
  ANDROID: cpufreq: times: avoid prematurely freeing uid_entry
  ANDROID: Use standard logging functions in goldfish_pipe
  ANDROID: Fix whitespace in goldfish
  staging: android: ashmem: Fix possible deadlock in ashmem_ioctl
  llist: clang: introduce member_address_is_nonnull()
  Linux 4.4.122
  fixup: sctp: verify size of a new chunk in _sctp_make_chunk()
  serial: 8250_pci: Add Brainboxes UC-260 4 port serial device
  usb: gadget: f_fs: Fix use-after-free in ffs_fs_kill_sb()
  usb: usbmon: Read text within supplied buffer size
  USB: usbmon: remove assignment from IS_ERR argument
  usb: quirks: add control message delay for 1b1c:1b20
  USB: storage: Add JMicron bridge 152d:2567 to unusual_devs.h
  staging: android: ashmem: Fix lockdep issue during llseek
  staging: comedi: fix comedi_nsamples_left.
  uas: fix comparison for error code
  tty/serial: atmel: add new version check for usart
  serial: sh-sci: prevent lockup on full TTY buffers
  x86: Treat R_X86_64_PLT32 as R_X86_64_PC32
  x86/module: Detect and skip invalid relocations
  Revert "ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux"
  NFS: Fix an incorrect type in struct nfs_direct_req
  scsi: qla2xxx: Replace fcport alloc with qla2x00_alloc_fcport
  ubi: Fix race condition between ubi volume creation and udev
  ext4: inplace xattr block update fails to deduplicate blocks
  netfilter: x_tables: pack percpu counter allocations
  netfilter: x_tables: pass xt_counters struct to counter allocator
  netfilter: x_tables: pass xt_counters struct instead of packet counter
  netfilter: use skb_to_full_sk in ip_route_me_harder
  netfilter: ipv6: fix use-after-free Write in nf_nat_ipv6_manip_pkt
  netfilter: bridge: ebt_among: add missing match size checks
  netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets
  netfilter: IDLETIMER: be syzkaller friendly
  netfilter: nat: cope with negative port range
  netfilter: x_tables: fix missing timer initialization in xt_LED
  netfilter: add back stackpointer size checks
  tc358743: fix register i2c_rd/wr function fix
  Input: tca8418_keypad - remove double read of key event register
  ARM: omap2: hide omap3_save_secure_ram on non-OMAP3 builds
  netfilter: nfnetlink_queue: fix timestamp attribute
  watchdog: hpwdt: fix unused variable warning
  watchdog: hpwdt: Check source of NMI
  watchdog: hpwdt: SMBIOS check
  nospec: Include <asm/barrier.h> dependency
  ALSA: hda: add dock and led support for HP ProBook 640 G2
  ALSA: hda: add dock and led support for HP EliteBook 820 G3
  ALSA: seq: More protection for concurrent write and ioctl races
  ALSA: seq: Don't allow resizing pool in use
  ALSA: hda/realtek - Fix dock line-out volume on Dell Precision 7520
  x86/MCE: Serialize sysfs changes
  bcache: don't attach backing with duplicate UUID
  kbuild: Handle builtin dtb file names containing hyphens
  loop: Fix lost writes caused by missing flag
  Input: matrix_keypad - fix race when disabling interrupts
  MIPS: OCTEON: irq: Check for null return on kzalloc allocation
  MIPS: ath25: Check for kzalloc allocation failure
  MIPS: BMIPS: Do not mask IPIs during suspend
  drm/amdgpu: fix KV harvesting
  drm/radeon: fix KV harvesting
  drm/amdgpu: Notify sbios device ready before send request
  drm/amdgpu: Fix deadlock on runtime suspend
  drm/radeon: Fix deadlock on runtime suspend
  drm/nouveau: Fix deadlock on runtime suspend
  drm: Allow determining if current task is output poll worker
  workqueue: Allow retrieval of current task's work struct
  scsi: qla2xxx: Fix NULL pointer crash due to active timer for ABTS
  RDMA/mlx5: Fix integer overflow while resizing CQ
  RDMA/ucma: Check that user doesn't overflow QP state
  RDMA/ucma: Limit possible option size
  ANDROID: ranchu: 32 bit framebuffer support
  ANDROID: Address checkpatch warnings in goldfishfb
  ANDROID: Address checkpatch.pl warnings in goldfish_pipe
  ANDROID: sdcardfs: fix lock issue on 32 bit/SMP architectures
  ANDROID: goldfish: Fix typo in goldfish_cmd_locked() call
  ANDROID: Address checkpatch.pl warnings in goldfish_pipe_v2
  FROMLIST: f2fs: don't put dentry page in pagecache into highmem
  Linux 4.4.121
  btrfs: preserve i_mode if __btrfs_set_acl() fails
  bpf, x64: implement retpoline for tail call
  dm io: fix duplicate bio completion due to missing ref count
  mpls, nospec: Sanitize array index in mpls_label_ok()
  net: mpls: Pull common label check into helper
  sctp: verify size of a new chunk in _sctp_make_chunk()
  s390/qeth: fix IPA command submission race
  s390/qeth: fix SETIP command handling
  sctp: fix dst refcnt leak in sctp_v6_get_dst()
  sctp: fix dst refcnt leak in sctp_v4_get_dst
  udplite: fix partial checksum initialization
  ppp: prevent unregistered channels from connecting to PPP units
  netlink: ensure to loop over all netns in genlmsg_multicast_allns()
  net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68
  net: fix race on decreasing number of TX queues
  ipv6 sit: work around bogus gcc-8 -Wrestrict warning
  hdlc_ppp: carrier detect ok, don't turn off negotiation
  fib_semantics: Don't match route with mismatching tclassid
  bridge: check brport attr show in brport_show
  Revert "led: core: Fix brightness setting when setting delay_off=0"
  x86/spectre: Fix an error message
  leds: do not overflow sysfs buffer in led_trigger_show
  x86/apic/vector: Handle legacy irq data correctly
  ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux
  btrfs: Don't clear SGID when inheriting ACLs
  x86/syscall: Sanitize syscall table de-references under speculation fix
  KVM: mmu: Fix overlap between public and private memslots
  ARM: mvebu: Fix broken PL310_ERRATA_753970 selects
  nospec: Allow index argument to have const-qualified type
  media: m88ds3103: don't call a non-initalized function
  cpufreq: s3c24xx: Fix broken s3c_cpufreq_init()
  ALSA: hda: Add a power_save blacklist
  ALSA: usb-audio: Add a quirck for B&W PX headphones
  tpm_i2c_nuvoton: fix potential buffer overruns caused by bit glitches on the bus
  tpm_i2c_infineon: fix potential buffer overruns caused by bit glitches on the bus
  tpm: st33zp24: fix potential buffer overruns caused by bit glitches on the bus
  ANDROID: Delete the goldfish_nand driver.
  ANDROID: Add input support for Android Wear.
  ANDROID: proc: fix config & includes for /proc/uid
  FROMLIST: ARM: amba: Don't read past the end of sysfs "driver_override" buffer
  UPSTREAM: ANDROID: binder: remove WARN() for redundant txn error
  ANDROID: cpufreq: times: Add missing includes
  ANDROID: cpufreq: Add time_in_state to /proc/uid directories
  ANDROID: proc: Add /proc/uid directory
  ANDROID: cpufreq: times: track per-uid time in state
  ANDROID: cpufreq: track per-task time in state

Conflicts:
	drivers/gpu/drm/msm/msm_gem.c
	drivers/net/wireless/ath/regd.c
	kernel/sched/core.c

Change-Id: I9bb7b5a062415da6925a5a56a34e6eb066a53320
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2018-04-20 12:27:57 +05:30

902 lines
23 KiB
C

#include <linux/export.h>
#include <linux/sched.h>
#include <linux/tsacct_kern.h>
#include <linux/kernel_stat.h>
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include <linux/cpufreq_times.h>
#include "sched.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* There are no locks covering percpu hardirq/softirq time.
* They are only modified in vtime_account, on corresponding CPU
* with interrupts disabled. So, writes are safe.
* They are read and saved off onto struct rq in update_rq_clock().
* This may result in other CPU reading this CPU's irq time and can
* race with irq/vtime_account on this CPU. We would either get old
* or new value with a side effect of accounting a slice of irq time to wrong
* task when irq is in progress while we read rq->clock. That is a worthy
* compromise in place of having locks on each irq in account_system_time.
*/
DEFINE_PER_CPU(u64, cpu_hardirq_time);
DEFINE_PER_CPU(u64, cpu_softirq_time);
static DEFINE_PER_CPU(u64, irq_start_time);
static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void)
{
sched_clock_irqtime = 1;
}
void disable_sched_clock_irqtime(void)
{
sched_clock_irqtime = 0;
}
#ifndef CONFIG_64BIT
DEFINE_PER_CPU(seqcount_t, irq_time_seq);
#endif /* CONFIG_64BIT */
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
*/
void irqtime_account_irq(struct task_struct *curr)
{
unsigned long flags;
s64 delta;
int cpu;
u64 wallclock;
bool account = true;
if (!sched_clock_irqtime)
return;
local_irq_save(flags);
cpu = smp_processor_id();
wallclock = sched_clock_cpu(cpu);
delta = wallclock - __this_cpu_read(irq_start_time);
__this_cpu_add(irq_start_time, delta);
irq_time_write_begin();
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
if (hardirq_count())
__this_cpu_add(cpu_hardirq_time, delta);
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
else
account = false;
irq_time_write_end();
if (account)
sched_account_irqtime(cpu, curr, delta, wallclock);
else if (curr != this_cpu_ksoftirqd())
sched_account_irqstart(cpu, curr, wallclock);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
static int irqtime_account_hi_update(void)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
u64 latest_ns;
int ret = 0;
local_irq_save(flags);
latest_ns = this_cpu_read(cpu_hardirq_time);
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
ret = 1;
local_irq_restore(flags);
return ret;
}
static int irqtime_account_si_update(void)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
u64 latest_ns;
int ret = 0;
local_irq_save(flags);
latest_ns = this_cpu_read(cpu_softirq_time);
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
ret = 1;
local_irq_restore(flags);
return ret;
}
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
#define sched_clock_irqtime (0)
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
static inline void task_group_account_field(struct task_struct *p, int index,
u64 tmp)
{
/*
* Since all updates are sure to touch the root cgroup, we
* get ourselves ahead and touch it first. If the root cgroup
* is the only cgroup, then nothing else should be necessary.
*
*/
__this_cpu_add(kernel_cpustat.cpustat[index], tmp);
cpuacct_account_field(p, index, tmp);
}
/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_user_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
int index;
/* Add user time to process. */
p->utime += cputime;
p->utimescaled += cputime_scaled;
account_group_user_time(p, cputime);
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
/* Add user time to cpustat. */
task_group_account_field(p, index, (__force u64) cputime);
/* Account for user time used */
acct_account_cputime(p);
#ifdef CONFIG_CPU_FREQ_TIMES
/* Account power usage for user time */
cpufreq_acct_update_power(p, cputime);
#endif
}
/*
* Account guest cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
static void account_guest_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
/* Add guest time to process. */
p->utime += cputime;
p->utimescaled += cputime_scaled;
account_group_user_time(p, cputime);
p->gtime += cputime;
/* Add guest time to cpustat. */
if (task_nice(p) > 0) {
cpustat[CPUTIME_NICE] += (__force u64) cputime;
cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
} else {
cpustat[CPUTIME_USER] += (__force u64) cputime;
cpustat[CPUTIME_GUEST] += (__force u64) cputime;
}
}
/*
* Account system cpu time to a process and desired cpustat field
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in kernel space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
* @target_cputime64: pointer to cpustat field that has to be updated
*/
static inline
void __account_system_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled, int index)
{
/* Add system time to process. */
p->stime += cputime;
p->stimescaled += cputime_scaled;
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
task_group_account_field(p, index, (__force u64) cputime);
/* Account for system time used */
acct_account_cputime(p);
#ifdef CONFIG_CPU_FREQ_TIMES
/* Account power usage for system time */
cpufreq_acct_update_power(p, cputime);
#endif
}
/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
int index;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
return;
}
if (hardirq_count() - hardirq_offset)
index = CPUTIME_IRQ;
else if (in_serving_softirq())
index = CPUTIME_SOFTIRQ;
else
index = CPUTIME_SYSTEM;
__account_system_time(p, cputime, cputime_scaled, index);
}
/*
* Account for involuntary wait time.
* @cputime: the cpu time spent in involuntary wait
*/
void account_steal_time(cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
cpustat[CPUTIME_STEAL] += (__force u64) cputime;
}
/*
* Account for idle time.
* @cputime: the cpu time spent in idle wait
*/
void account_idle_time(cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
struct rq *rq = this_rq();
if (atomic_read(&rq->nr_iowait) > 0)
cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
else
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
}
static __always_inline bool steal_account_process_tick(void)
{
#ifdef CONFIG_PARAVIRT
if (static_key_false(&paravirt_steal_enabled)) {
u64 steal;
unsigned long steal_jiffies;
steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time;
/*
* steal is in nsecs but our caller is expecting steal
* time in jiffies. Lets cast the result to jiffies
* granularity and account the rest on the next rounds.
*/
steal_jiffies = nsecs_to_jiffies(steal);
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
account_steal_time(jiffies_to_cputime(steal_jiffies));
return steal_jiffies;
}
#endif
return false;
}
/*
* Accumulate raw cputime values of dead tasks (sig->[us]time) and live
* tasks (sum on group iteration) belonging to @tsk's group.
*/
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
struct signal_struct *sig = tsk->signal;
cputime_t utime, stime;
struct task_struct *t;
unsigned int seq, nextseq;
unsigned long flags;
rcu_read_lock();
/* Attempt a lockless read on the first round. */
nextseq = 0;
do {
seq = nextseq;
flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
times->utime = sig->utime;
times->stime = sig->stime;
times->sum_exec_runtime = sig->sum_sched_runtime;
for_each_thread(tsk, t) {
task_cputime(t, &utime, &stime);
times->utime += utime;
times->stime += stime;
times->sum_exec_runtime += task_sched_runtime(t);
}
/* If lockless access failed, take the lock. */
nextseq = 1;
} while (need_seqretry(&sig->stats_lock, seq));
done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
rcu_read_unlock();
}
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
* @p: the process that the cpu time gets accounted to
* @user_tick: is the tick from userspace
* @rq: the pointer to rq
*
* Tick demultiplexing follows the order
* - pending hardirq update
* - pending softirq update
* - user_time
* - idle_time
* - system time
* - check for guest_time
* - else account as system_time
*
* Check for hardirq is done both for system and user time as there is
* no timer going off while we are on hardirq and hence we may never get an
* opportunity to update it solely in system time.
* p->stime and friends are only updated on system time and not on irq
* softirq as those do not count in task exec_runtime any more.
*/
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int ticks)
{
cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
u64 cputime = (__force u64) cputime_one_jiffy;
u64 *cpustat = kcpustat_this_cpu->cpustat;
if (steal_account_process_tick())
return;
cputime *= ticks;
scaled *= ticks;
if (irqtime_account_hi_update()) {
cpustat[CPUTIME_IRQ] += cputime;
} else if (irqtime_account_si_update()) {
cpustat[CPUTIME_SOFTIRQ] += cputime;
} else if (this_cpu_ksoftirqd() == p) {
/*
* ksoftirqd time do not get accounted in cpu_softirq_time.
* So, we have to handle it separately here.
* Also, p->stime needs to be updated for ksoftirqd.
*/
__account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
} else if (user_tick) {
account_user_time(p, cputime, scaled);
} else if (p == rq->idle) {
account_idle_time(cputime);
} else if (p->flags & PF_VCPU) { /* System time or guest time */
account_guest_time(p, cputime, scaled);
} else {
__account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
}
}
static void irqtime_account_idle_ticks(int ticks)
{
struct rq *rq = this_rq();
irqtime_account_process_tick(current, 0, rq, ticks);
}
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
static inline void irqtime_account_idle_ticks(int ticks) {}
static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int nr_ticks) {}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
void vtime_common_task_switch(struct task_struct *prev)
{
if (is_idle_task(prev))
vtime_account_idle(prev);
else
vtime_account_system(prev);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
vtime_account_user(prev);
#endif
arch_vtime_task_switch(prev);
}
#endif
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
* vtime_account_system() and vtime_account_idle(). Archs that
* have other meaning of the idle time (s390 only includes the
* time spent by the CPU when it's in low power mode) must override
* vtime_account().
*/
#ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_common_account_irq_enter(struct task_struct *tsk)
{
if (!in_interrupt()) {
/*
* If we interrupted user, context_tracking_in_user()
* is 1 because the context tracking don't hook
* on irq entry/exit. This way we know if
* we need to flush user time on kernel entry.
*/
if (context_tracking_in_user()) {
vtime_account_user(tsk);
return;
}
if (is_idle_task(tsk)) {
vtime_account_idle(tsk);
return;
}
}
vtime_account_system(tsk);
}
EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
*ut = p->utime;
*st = p->stime;
}
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
thread_group_cputime(p, &cputime);
*ut = cputime.utime;
*st = cputime.stime;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
* @user_tick: indicates if the tick is a user or a system tick
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
if (vtime_accounting_enabled())
return;
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq, 1);
return;
}
if (steal_account_process_tick())
return;
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
one_jiffy_scaled);
else
account_idle_time(cputime_one_jiffy);
}
/*
* Account multiple ticks of steal time.
* @p: the process from which the cpu time has been stolen
* @ticks: number of stolen ticks
*/
void account_steal_ticks(unsigned long ticks)
{
account_steal_time(jiffies_to_cputime(ticks));
}
/*
* Account multiple ticks of idle time.
* @ticks: number of stolen ticks
*/
void account_idle_ticks(unsigned long ticks)
{
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
account_idle_time(jiffies_to_cputime(ticks));
}
/*
* Perform (stime * rtime) / total, but avoid multiplication overflow by
* loosing precision when the numbers are big.
*/
static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
{
u64 scaled;
for (;;) {
/* Make sure "rtime" is the bigger of stime/rtime */
if (stime > rtime)
swap(rtime, stime);
/* Make sure 'total' fits in 32 bits */
if (total >> 32)
goto drop_precision;
/* Does rtime (and thus stime) fit in 32 bits? */
if (!(rtime >> 32))
break;
/* Can we just balance rtime/stime rather than dropping bits? */
if (stime >> 31)
goto drop_precision;
/* We can grow stime and shrink rtime and try to make them both fit */
stime <<= 1;
rtime >>= 1;
continue;
drop_precision:
/* We drop from rtime, it has more bits than stime */
rtime >>= 1;
total >>= 1;
}
/*
* Make sure gcc understands that this is a 32x32->64 multiply,
* followed by a 64/32->64 divide.
*/
scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
return (__force cputime_t) scaled;
}
/*
* Adjust tick based cputime random precision against scheduler runtime
* accounting.
*
* Tick based cputime accounting depend on random scheduling timeslices of a
* task to be interrupted or not by the timer. Depending on these
* circumstances, the number of these interrupts may be over or
* under-optimistic, matching the real user and system cputime with a variable
* precision.
*
* Fix this by scaling these tick based values against the total runtime
* accounted by the CFS scheduler.
*
* This code provides the following guarantees:
*
* stime + utime == rtime
* stime_i+1 >= stime_i, utime_i+1 >= utime_i
*
* Assuming that rtime_i+1 >= rtime_i.
*/
static void cputime_adjust(struct task_cputime *curr,
struct prev_cputime *prev,
cputime_t *ut, cputime_t *st)
{
cputime_t rtime, stime, utime;
unsigned long flags;
/* Serialize concurrent callers such that we can honour our guarantees */
raw_spin_lock_irqsave(&prev->lock, flags);
rtime = nsecs_to_cputime(curr->sum_exec_runtime);
/*
* This is possible under two circumstances:
* - rtime isn't monotonic after all (a bug);
* - we got reordered by the lock.
*
* In both cases this acts as a filter such that the rest of the code
* can assume it is monotonic regardless of anything else.
*/
if (prev->stime + prev->utime >= rtime)
goto out;
stime = curr->stime;
utime = curr->utime;
/*
* If either stime or both stime and utime are 0, assume all runtime is
* userspace. Once a task gets some ticks, the monotonicy code at
* 'update' will ensure things converge to the observed ratio.
*/
if (stime == 0) {
utime = rtime;
goto update;
}
if (utime == 0) {
stime = rtime;
goto update;
}
stime = scale_stime((__force u64)stime, (__force u64)rtime,
(__force u64)(stime + utime));
update:
/*
* Make sure stime doesn't go backwards; this preserves monotonicity
* for utime because rtime is monotonic.
*
* utime_i+1 = rtime_i+1 - stime_i
* = rtime_i+1 - (rtime_i - utime_i)
* = (rtime_i+1 - rtime_i) + utime_i
* >= utime_i
*/
if (stime < prev->stime)
stime = prev->stime;
utime = rtime - stime;
/*
* Make sure utime doesn't go backwards; this still preserves
* monotonicity for stime, analogous argument to above.
*/
if (utime < prev->utime) {
utime = prev->utime;
stime = rtime - utime;
}
prev->stime = stime;
prev->utime = utime;
out:
*ut = prev->utime;
*st = prev->stime;
raw_spin_unlock_irqrestore(&prev->lock, flags);
}
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime = {
.sum_exec_runtime = p->se.sum_exec_runtime,
};
task_cputime(p, &cputime.utime, &cputime.stime);
cputime_adjust(&cputime, &p->prev_cputime, ut, st);
}
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
thread_group_cputime(p, &cputime);
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static unsigned long long vtime_delta(struct task_struct *tsk)
{
unsigned long long clock;
clock = local_clock();
if (clock < tsk->vtime_snap)
return 0;
return clock - tsk->vtime_snap;
}
static cputime_t get_vtime_delta(struct task_struct *tsk)
{
unsigned long long delta = vtime_delta(tsk);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
tsk->vtime_snap += delta;
/* CHECKME: always safe to convert nsecs to cputime? */
return nsecs_to_cputime(delta);
}
static void __vtime_account_system(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
}
void vtime_account_system(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
write_seqlock(&tsk->vtime_seqlock);
delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_user_enter(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER;
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_guest_enter(struct task_struct *tsk)
{
/*
* The flags must be updated under the lock with
* the vtime_snap flush and update.
* That enforces a right ordering and update sequence
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
void vtime_account_idle(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
account_idle_time(delta_cpu);
}
void arch_vtime_task_switch(struct task_struct *prev)
{
write_seqlock(&prev->vtime_seqlock);
prev->vtime_snap_whence = VTIME_SLEEPING;
write_sequnlock(&prev->vtime_seqlock);
write_seqlock(&current->vtime_seqlock);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id());
write_sequnlock(&current->vtime_seqlock);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
write_seqlock_irqsave(&t->vtime_seqlock, flags);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu);
write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
}
cputime_t task_gtime(struct task_struct *t)
{
unsigned int seq;
cputime_t gtime;
if (!context_tracking_is_enabled())
return t->gtime;
do {
seq = read_seqbegin(&t->vtime_seqlock);
gtime = t->gtime;
if (t->flags & PF_VCPU)
gtime += vtime_delta(t);
} while (read_seqretry(&t->vtime_seqlock, seq));
return gtime;
}
/*
* Fetch cputime raw values from fields of task_struct and
* add up the pending nohz execution time since the last
* cputime snapshot.
*/
static void
fetch_task_cputime(struct task_struct *t,
cputime_t *u_dst, cputime_t *s_dst,
cputime_t *u_src, cputime_t *s_src,
cputime_t *udelta, cputime_t *sdelta)
{
unsigned int seq;
unsigned long long delta;
do {
*udelta = 0;
*sdelta = 0;
seq = read_seqbegin(&t->vtime_seqlock);
if (u_dst)
*u_dst = *u_src;
if (s_dst)
*s_dst = *s_src;
/* Task is sleeping, nothing to add */
if (t->vtime_snap_whence == VTIME_SLEEPING ||
is_idle_task(t))
continue;
delta = vtime_delta(t);
/*
* Task runs either in user or kernel space, add pending nohz time to
* the right place.
*/
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
*udelta = delta;
} else {
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
} while (read_seqretry(&t->vtime_seqlock, seq));
}
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t udelta, sdelta;
fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta);
if (utime)
*utime += udelta;
if (stime)
*stime += sdelta;
}
void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled)
{
cputime_t udelta, sdelta;
fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled)
*utimescaled += cputime_to_scaled(udelta);
if (stimescaled)
*stimescaled += cputime_to_scaled(sdelta);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */