android_kernel_oneplus_msm8998/kernel/exit.c
Runmin Wang 9cc5c789d9 Merge remote-tracking branch 'msm4.4/tmp-da9a92f' into msm-4.4
* origin/tmp-da9a92f:
  arm64: kaslr: increase randomization granularity
  arm64: relocatable: deal with physically misaligned kernel images
  arm64: don't map TEXT_OFFSET bytes below the kernel if we can avoid it
  arm64: kernel: replace early 64-bit literal loads with move-immediates
  arm64: introduce mov_q macro to move a constant into a 64-bit register
  arm64: kernel: perform relocation processing from ID map
  arm64: kernel: use literal for relocated address of __secondary_switched
  arm64: kernel: don't export local symbols from head.S
  arm64: simplify kernel segment mapping granularity
  arm64: cover the .head.text section in the .text segment mapping
  arm64: move early boot code to the .init segment
  arm64: use 'segment' rather than 'chunk' to describe mapped kernel regions
  arm64: mm: Mark .rodata as RO
  Linux 4.4.16
  ovl: verify upper dentry before unlink and rename
  drm/i915: Revert DisplayPort fast link training feature
  tmpfs: fix regression hang in fallocate undo
  tmpfs: don't undo fallocate past its last page
  crypto: qat - make qat_asym_algs.o depend on asn1 headers
  xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7
  File names with trailing period or space need special case conversion
  cifs: dynamic allocation of ntlmssp blob
  Fix reconnect to not defer smb3 session reconnect long after socket reconnect
  53c700: fix BUG on untagged commands
  s390: fix test_fp_ctl inline assembly contraints
  scsi: fix race between simultaneous decrements of ->host_failed
  ovl: verify upper dentry in ovl_remove_and_whiteout()
  ovl: Copy up underlying inode's ->i_mode to overlay inode
  ARM: mvebu: fix HW I/O coherency related deadlocks
  ARM: dts: armada-38x: fix MBUS_ID for crypto SRAM on Armada 385 Linksys
  ARM: sunxi/dt: make the CHIP inherit from allwinner,sun5i-a13
  ALSA: hda: add AMD Stoney PCI ID with proper driver caps
  ALSA: hda - fix use-after-free after module unload
  ALSA: ctl: Stop notification after disconnection
  ALSA: pcm: Free chmap at PCM free callback, too
  ALSA: hda/realtek - add new pin definition in alc225 pin quirk table
  ALSA: hda - fix read before array start
  ALSA: hda - Add PCI ID for Kabylake-H
  ALSA: hda/realtek: Add Lenovo L460 to docking unit fixup
  ALSA: timer: Fix negative queue usage by racy accesses
  ALSA: echoaudio: Fix memory allocation
  ALSA: au88x0: Fix calculation in vortex_wtdma_bufshift()
  ALSA: hda / realtek - add two more Thinkpad IDs (5050,5053) for tpt460 fixup
  ALSA: hda - Fix the headset mic jack detection on Dell machine
  ALSA: dummy: Fix a use-after-free at closing
  hwmon: (dell-smm) Cache fan_type() calls and change fan detection
  hwmon: (dell-smm) Disallow fan_type() calls on broken machines
  hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default
  tty/vt/keyboard: fix OOB access in do_compute_shiftstate()
  tty: vt: Fix soft lockup in fbcon cursor blink timer.
  iio:ad7266: Fix probe deferral for vref
  iio:ad7266: Fix support for optional regulators
  iio:ad7266: Fix broken regulator error handling
  iio: accel: kxsd9: fix the usage of spi_w8r8()
  staging: iio: accel: fix error check
  iio: hudmidity: hdc100x: fix incorrect shifting and scaling
  iio: humidity: hdc100x: fix IIO_TEMP channel reporting
  iio: humidity: hdc100x: correct humidity integration time mask
  iio: proximity: as3935: fix buffer stack trashing
  iio: proximity: as3935: remove triggered buffer processing
  iio: proximity: as3935: correct IIO_CHAN_INFO_RAW output
  iio: light apds9960: Add the missing dev.parent
  iio:st_pressure: fix sampling gains (bring inline with ABI)
  iio: Fix error handling in iio_trigger_attach_poll_func
  xen/balloon: Fix declared-but-not-defined warning
  perf/x86: Fix undefined shift on 32-bit kernels
  memory: omap-gpmc: Fix omap gpmc EXTRADELAY timing
  drm/vmwgfx: Fix error paths when mapping framebuffer
  drm/vmwgfx: Delay pinning fbdev framebuffer until after mode set
  drm/vmwgfx: Check pin count before attempting to move a buffer
  drm/vmwgfx: Work around mode set failure in 2D VMs
  drm/vmwgfx: Add an option to change assumed FB bpp
  drm/ttm: Make ttm_bo_mem_compat available
  drm: atmel-hlcdc: actually disable scaling when no scaling is required
  drm: make drm_atomic_set_mode_prop_for_crtc() more reliable
  drm: add missing drm_mode_set_crtcinfo call
  drm/i915: Update CDCLK_FREQ register on BDW after changing cdclk frequency
  drm/i915: Update ifdeffery for mutex->owner
  drm/i915: Refresh cached DP port register value on resume
  drm/i915/ilk: Don't disable SSC source if it's in use
  drm/nouveau/disp/sor/gf119: select correct sor when poking training pattern
  drm/nouveau: fix for disabled fbdev emulation
  drm/nouveau/fbcon: fix out-of-bounds memory accesses
  drm/nouveau/gr/gf100-: update sm error decoding from gk20a nvgpu headers
  drm/nouveau/disp/sor/gf119: both links use the same training register
  virtio_balloon: fix PFN format for virtio-1
  drm/dp/mst: Always clear proposed vcpi table for port.
  drm/amdkfd: destroy dbgmgr in notifier release
  drm/amdkfd: unbind only existing processes
  ubi: Make recover_peb power cut aware
  drm/amdgpu/gfx7: fix broken condition check
  drm/radeon: fix asic initialization for virtualized environments
  btrfs: account for non-CoW'd blocks in btrfs_abort_transaction
  percpu: fix synchronization between synchronous map extension and chunk destruction
  percpu: fix synchronization between chunk->map_extend_work and chunk destruction
  af_unix: fix hard linked sockets on overlay
  vfs: add d_real_inode() helper
  arm64: Rework valid_user_regs
  ipmi: Remove smi_msg from waiting_rcv_msgs list before handle_one_recv_msg()
  drm/mgag200: Black screen fix for G200e rev 4
  iommu/amd: Fix unity mapping initialization race
  iommu/vt-d: Enable QI on all IOMMUs before setting root entry
  iommu/arm-smmu: Wire up map_sg for arm-smmu-v3
  base: make module_create_drivers_dir race-free
  tracing: Handle NULL formats in hold_module_trace_bprintk_format()
  HID: multitouch: enable palm rejection for Windows Precision Touchpad
  HID: hiddev: validate num_values for HIDIOCGUSAGES, HIDIOCSUSAGES commands
  HID: elo: kill not flush the work
  KVM: nVMX: VMX instructions: fix segment checks when L1 is in long mode.
  kvm: Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES
  KEYS: potential uninitialized variable
  ARCv2: LLSC: software backoff is NOT needed starting HS2.1c
  ARCv2: Check for LL-SC livelock only if LLSC is enabled
  ipv6: Fix mem leak in rt6i_pcpu
  cdc_ncm: workaround for EM7455 "silent" data interface
  net_sched: fix mirrored packets checksum
  packet: Use symmetric hash for PACKET_FANOUT_HASH.
  sched/fair: Fix cfs_rq avg tracking underflow
  UBIFS: Implement ->migratepage()
  mm: Export migrate_page_move_mapping and migrate_page_copy
  MIPS: KVM: Fix modular KVM under QEMU
  ARM: 8579/1: mm: Fix definition of pmd_mknotpresent
  ARM: 8578/1: mm: ensure pmd_present only checks the valid bit
  ARM: imx6ul: Fix Micrel PHY mask
  NFS: Fix another OPEN_DOWNGRADE bug
  make nfs_atomic_open() call d_drop() on all ->open_context() errors.
  nfsd: check permissions when setting ACLs
  posix_acl: Add set_posix_acl
  nfsd: Extend the mutex holding region around in nfsd4_process_open2()
  nfsd: Always lock state exclusively.
  nfsd4/rpc: move backchannel create logic into rpc code
  writeback: use higher precision calculation in domain_dirty_limits()
  thermal: cpu_cooling: fix improper order during initialization
  uvc: Forward compat ioctls to their handlers directly
  Revert "gpiolib: Split GPIO flags parsing and GPIO configuration"
  x86/amd_nb: Fix boot crash on non-AMD systems
  kprobes/x86: Clear TF bit in fault on single-stepping
  x86, build: copy ldlinux.c32 to image.iso
  locking/static_key: Fix concurrent static_key_slow_inc()
  locking/qspinlock: Fix spin_unlock_wait() some more
  locking/ww_mutex: Report recursive ww_mutex locking early
  of: irq: fix of_irq_get[_byname]() kernel-doc
  of: fix autoloading due to broken modalias with no 'compatible'
  mnt: If fs_fully_visible fails call put_filesystem.
  mnt: Account for MS_RDONLY in fs_fully_visible
  mnt: fs_fully_visible test the proper mount for MNT_LOCKED
  usb: common: otg-fsm: add license to usb-otg-fsm
  USB: EHCI: declare hostpc register as zero-length array
  usb: dwc2: fix regression on big-endian PowerPC/ARM systems
  powerpc/tm: Always reclaim in start_thread() for exec() class syscalls
  powerpc/pseries: Fix IBM_ARCH_VEC_NRCORES_OFFSET since POWER8NVL was added
  powerpc/pseries: Fix PCI config address for DDW
  powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism
  IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs
  IB/cm: Fix a recently introduced locking bug
  EDAC, sb_edac: Fix rank lookup on Broadwell
  mac80211: Fix mesh estab_plinks counting in STA removal case
  mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL
  mac80211: mesh: flush mesh paths unconditionally
  mac80211: fix fast_tx header alignment
  Linux 4.4.15
  usb: dwc3: exynos: Fix deferred probing storm.
  usb: host: ehci-tegra: Grab the correct UTMI pads reset
  usb: gadget: fix spinlock dead lock in gadgetfs
  USB: mos7720: delete parport
  xhci: Fix handling timeouted commands on hosts in weird states.
  USB: xhci: Add broken streams quirk for Frescologic device id 1009
  usb: xhci-plat: properly handle probe deferral for devm_clk_get()
  xhci: Cleanup only when releasing primary hcd
  usb: musb: host: correct cppi dma channel for isoch transfer
  usb: musb: Ensure rx reinit occurs for shared_fifo endpoints
  usb: musb: Stop bulk endpoint while queue is rotated
  usb: musb: only restore devctl when session was set in backup
  usb: quirks: Add no-lpm quirk for Acer C120 LED Projector
  usb: quirks: Fix sorting
  USB: uas: Fix slave queue_depth not being set
  crypto: user - re-add size check for CRYPTO_MSG_GETALG
  crypto: ux500 - memmove the right size
  crypto: vmx - Increase priority of aes-cbc cipher
  AX.25: Close socket connection on session completion
  bpf: try harder on clones when writing into skb
  net: alx: Work around the DMA RX overflow issue
  net: macb: fix default configuration for GMAC on AT91
  neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit()
  bpf, perf: delay release of BPF prog after grace period
  sock_diag: do not broadcast raw socket destruction
  Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address
  ipmr/ip6mr: Initialize the last assert time of mfc entries.
  netem: fix a use after free
  esp: Fix ESN generation under UDP encapsulation
  sit: correct IP protocol used in ipip6_err
  net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG
  net_sched: fix pfifo_head_drop behavior vs backlog
  sdcardfs: Truncate packages_gid.list on overflow
  UPSTREAM: cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind
  BACKPORT: proc: add /proc/<pid>/timerslack_ns interface
  BACKPORT: timer: convert timer_slack_ns from unsigned long to u64
  netfilter: xt_quota2: make quota2_log work well
  Revert "usb: gadget: prevent change of Host MAC address of 'usb0' interface"
  BACKPORT: PM / sleep: Go direct_complete if driver has no callbacks
  ANDROID: base-cfg: enable UID_CPUTIME
  UPSTREAM: USB: usbfs: fix potential infoleak in devio
  UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_ccallback
  UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt
  UPSTREAM: ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS
  ANDROID: configs: remove unused configs
  ANDROID: cpu: send KOBJ_ONLINE event when enabling cpus
  ANDROID: dm verity fec: initialize recursion level
  ANDROID: dm verity fec: fix RS block calculation
  Linux 4.4.14
  netfilter: x_tables: introduce and use xt_copy_counters_from_user
  netfilter: x_tables: do compat validation via translate_table
  netfilter: x_tables: xt_compat_match_from_user doesn't need a retval
  netfilter: ip6_tables: simplify translate_compat_table args
  netfilter: ip_tables: simplify translate_compat_table args
  netfilter: arp_tables: simplify translate_compat_table args
  netfilter: x_tables: don't reject valid target size on some architectures
  netfilter: x_tables: validate all offsets and sizes in a rule
  netfilter: x_tables: check for bogus target offset
  netfilter: x_tables: check standard target size too
  netfilter: x_tables: add compat version of xt_check_entry_offsets
  netfilter: x_tables: assert minimum target size
  netfilter: x_tables: kill check_entry helper
  netfilter: x_tables: add and use xt_check_entry_offsets
  netfilter: x_tables: validate targets of jumps
  netfilter: x_tables: don't move to non-existent next rule
  drm/core: Do not preserve framebuffer on rmfb, v4.
  crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq
  netfilter: x_tables: fix unconditional helper
  netfilter: x_tables: make sure e->next_offset covers remaining blob size
  netfilter: x_tables: validate e->target_offset early
  MIPS: Fix 64k page support for 32 bit kernels.
  sparc64: Fix return from trap window fill crashes.
  sparc: Harden signal return frame checks.
  sparc64: Take ctx_alloc_lock properly in hugetlb_setup().
  sparc64: Reduce TLB flushes during hugepte changes
  sparc/PCI: Fix for panic while enabling SR-IOV
  sparc64: Fix sparc64_set_context stack handling.
  sparc64: Fix numa node distance initialization
  sparc64: Fix bootup regressions on some Kconfig combinations.
  sparc: Fix system call tracing register handling.
  fix d_walk()/non-delayed __d_free() race
  sched: panic on corrupted stack end
  proc: prevent stacking filesystems on top
  x86/entry/traps: Don't force in_interrupt() to return true in IST handlers
  wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel
  ecryptfs: forbid opening files without mmap handler
  memcg: add RCU locking around css_for_each_descendant_pre() in memcg_offline_kmem()
  parisc: Fix pagefault crash in unaligned __get_user() call
  pinctrl: mediatek: fix dual-edge code defect
  powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call
  powerpc: Use privileged SPR number for MMCR2
  powerpc: Fix definition of SIAR and SDAR registers
  powerpc/pseries/eeh: Handle RTAS delay requests in configure_bridge
  arm64: mm: always take dirty state from new pte in ptep_set_access_flags
  arm64: Provide "model name" in /proc/cpuinfo for PER_LINUX32 tasks
  crypto: ccp - Fix AES XTS error for request sizes above 4096
  crypto: public_key: select CRYPTO_AKCIPHER
  irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask
  s390/bpf: reduce maximum program size to 64 KB
  s390/bpf: fix recache skb->data/hlen for skb_vlan_push/pop
  gpio: bcm-kona: fix bcm_kona_gpio_reset() warnings
  ARM: fix PTRACE_SETVFPREGS on SMP systems
  ALSA: hda/realtek: Add T560 docking unit fixup
  ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703
  ALSA: hda/realtek - ALC256 speaker noise issue
  ALSA: hda - Fix headset mic detection problem for Dell machine
  ALSA: hda - Add PCI ID for Kabylake
  KVM: irqfd: fix NULL pointer dereference in kvm_irq_map_gsi
  KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS
  vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices
  geneve: Relax MTU constraints
  vxlan: Relax MTU constraints
  ipv6: Skip XFRM lookup if dst_entry in socket cache is valid
  l2tp: fix configuration passed to setup_udp_tunnel_sock()
  bridge: Don't insert unnecessary local fdb entry on changing mac address
  tcp: record TLP and ER timer stats in v6 stats
  vxlan: Accept user specified MTU value when create new vxlan link
  team: don't call netdev_change_features under team->lock
  sfc: on MC reset, clear PIO buffer linkage in TXQs
  bpf, inode: disallow userns mounts
  uapi glibc compat: fix compilation when !__USE_MISC in glibc
  udp: prevent skbs lingering in tunnel socket queues
  bpf: Use mount_nodev not mount_ns to mount the bpf filesystem
  tuntap: correctly wake up process during uninit
  switchdev: pass pointer to fib_info instead of copy
  tipc: fix nametable publication field in nl compat
  netlink: Fix dump skb leak/double free
  tipc: check nl sock before parsing nested attributes
  scsi: Add QEMU CD-ROM to VPD Inquiry Blacklist
  scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands
  cs-etm: associating output packet with CPU they executed on
  cs-etm: removing unecessary structure field
  cs-etm: account for each trace buffer in the queue
  cs-etm: avoid casting variable
  perf tools: fixing Makefile problems
  perf tools: new naming convention for openCSD
  perf scripts: Add python scripts for CoreSight traces
  perf tools: decoding capailitity for CoreSight traces
  perf symbols: Check before overwriting build_id
  perf tools: pushing driver configuration down to the kernel
  perf tools: add infrastructure for PMU specific configuration
  coresight: etm-perf: incorporating sink definition from the cmd line
  coresight: adding sink parameter to function coresight_build_path()
  perf: passing struct perf_event to function setup_aux()
  perf/core: adding PMU driver specific configuration
  perf tools: adding coresight etm PMU record capabilities
  perf tools: making coresight PMU listable
  coresight: tmc: implementing TMC-ETR AUX space API
  coresight: Add support for Juno platform
  coresight: Handle build path error
  coresight: Fix erroneous memset in tmc_read_unprepare_etr
  coresight: Fix tmc_read_unprepare_etr
  coresight: Fix NULL pointer dereference in _coresight_build_path
  ANDROID: dm verity fec: add missing release from fec_ktype
  ANDROID: dm verity fec: limit error correction recursion
  ANDROID: restrict access to perf events
  FROMLIST: security,perf: Allow further restriction of perf_event_open
  BACKPORT: perf tools: Document the perf sysctls
  Revert "armv6 dcc tty driver"
  Revert "arm: dcc_tty: fix armv6 dcc tty build failure"
  ARM64: Ignore Image-dtb from git point of view
  arm64: add option to build Image-dtb
  ANDROID: usb: gadget: f_midi: set fi->f to NULL when free f_midi function
  Linux 4.4.13
  xfs: handle dquot buffer readahead in log recovery correctly
  xfs: print name of verifier if it fails
  xfs: skip stale inodes in xfs_iflush_cluster
  xfs: fix inode validity check in xfs_iflush_cluster
  xfs: xfs_iflush_cluster fails to abort on error
  xfs: Don't wrap growfs AGFL indexes
  xfs: disallow rw remount on fs with unknown ro-compat features
  gcov: disable tree-loop-im to reduce stack usage
  scripts/package/Makefile: rpmbuild add support of RPMOPTS
  dma-debug: avoid spinlock recursion when disabling dma-debug
  PM / sleep: Handle failures in device_suspend_late() consistently
  ext4: silence UBSAN in ext4_mb_init()
  ext4: address UBSAN warning in mb_find_order_for_block()
  ext4: fix oops on corrupted filesystem
  ext4: clean up error handling when orphan list is corrupted
  ext4: fix hang when processing corrupted orphaned inode list
  drm/imx: Match imx-ipuv3-crtc components using device node in platform data
  drm/i915: Don't leave old junk in ilk active watermarks on readout
  drm/atomic: Verify connector->funcs != NULL when clearing states
  drm/fb_helper: Fix references to dev->mode_config.num_connector
  drm/i915/fbdev: Fix num_connector references in intel_fb_initial_config()
  drm/amdgpu: Fix hdmi deep color support.
  drm/amdgpu: use drm_mode_vrefresh() rather than mode->vrefresh
  drm/vmwgfx: Fix order of operation
  drm/vmwgfx: use vmw_cmd_dx_cid_check for query commands.
  drm/vmwgfx: Enable SVGA_3D_CMD_DX_SET_PREDICATION
  drm/gma500: Fix possible out of bounds read
  sunrpc: fix stripping of padded MIC tokens
  xen: use same main loop for counting and remapping pages
  xen/events: Don't move disabled irqs
  powerpc/eeh: Restore initial state in eeh_pe_reset_and_recover()
  Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell"
  powerpc/eeh: Don't report error in eeh_pe_reset_and_recover()
  powerpc/book3s64: Fix branching to OOL handlers in relocatable kernel
  pipe: limit the per-user amount of pages allocated in pipes
  QE-UART: add "fsl,t1040-ucc-uart" to of_device_id
  wait/ptrace: assume __WALL if the child is traced
  mm: use phys_addr_t for reserve_bootmem_region() arguments
  media: v4l2-compat-ioctl32: fix missing reserved field copy in put_v4l2_create32
  PCI: Disable all BAR sizing for devices with non-compliant BARs
  pinctrl: exynos5440: Use off-stack memory for pinctrl_gpio_range
  clk: bcm2835: divider value has to be 1 or more
  clk: bcm2835: pll_off should only update CM_PLL_ANARST
  clk: at91: fix check of clk_register() returned value
  clk: bcm2835: Fix PLL poweron
  cpuidle: Fix cpuidle_state_is_coupled() argument in cpuidle_enter()
  cpuidle: Indicate when a device has been unregistered
  PM / Runtime: Fix error path in pm_runtime_force_resume()
  mfd: intel_soc_pmic_core: Terminate panel control GPIO lookup table correctly
  mfd: intel-lpss: Save register context on suspend
  hwmon: (ads7828) Enable internal reference
  aacraid: Fix for KDUMP driver hang
  aacraid: Fix for aac_command_thread hang
  aacraid: Relinquish CPU during timeout wait
  rtlwifi: pci: use dev_kfree_skb_irq instead of kfree_skb in rtl_pci_reset_trx_ring
  rtlwifi: Fix logic error in enter/exit power-save mode
  rtlwifi: btcoexist: Implement antenna selection
  rtlwifi: rtl8723be: Add antenna select module parameter
  hwrng: exynos - Fix unbalanced PM runtime put on timeout error path
  ath5k: Change led pin configuration for compaq c700 laptop
  ath10k: fix kernel panic, move arvifs list head init before htt init
  ath10k: fix rx_channel during hw reconfigure
  ath10k: fix firmware assert in monitor mode
  ath10k: fix debugfs pktlog_filter write
  ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards.
  ath9k: Add a module parameter to invert LED polarity.
  ARM: dts: imx35: restore existing used clock enumeration
  ARM: dts: exynos: Add interrupt line to MAX8997 PMIC on exynos4210-trats
  ARM: dts: at91: fix typo in sama5d2 PIN_PD24 description
  ARM: mvebu: fix GPIO config on the Linksys boards
  Input: uinput - handle compat ioctl for UI_SET_PHYS
  ASoC: ak4642: Enable cache usage to fix crashes on resume
  affs: fix remount failure when there are no options changed
  MIPS: VDSO: Build with `-fno-strict-aliasing'
  MIPS: lib: Mark intrinsics notrace
  MIPS: Build microMIPS VDSO for microMIPS kernels
  MIPS: Fix sigreturn via VDSO on microMIPS kernel
  MIPS: ptrace: Prevent writes to read-only FCSR bits
  MIPS: ptrace: Fix FP context restoration FCSR regression
  MIPS: Disable preemption during prctl(PR_SET_FP_MODE, ...)
  MIPS: Prevent "restoration" of MSA context in non-MSA kernels
  MIPS: Fix MSA ld_*/st_* asm macros to use PTR_ADDU
  MIPS: Use copy_s.fmt rather than copy_u.fmt
  MIPS: Loongson-3: Reserve 32MB for RS780E integrated GPU
  MIPS: Reserve nosave data for hibernation
  MIPS: ath79: make bootconsole wait for both THRE and TEMT
  MIPS: Sync icache & dcache in set_pte_at
  MIPS: Handle highmem pages in __update_cache
  MIPS: Flush highmem pages in __flush_dcache_page
  MIPS: Fix watchpoint restoration
  MIPS: Fix uapi include in exported asm/siginfo.h
  MIPS: Fix siginfo.h to use strict posix types
  MIPS: Avoid using unwind_stack() with usermode
  MIPS: Don't unwind to user mode with EVA
  MIPS: MSA: Fix a link error on `_init_msa_upper' with older GCC
  MIPS: math-emu: Fix jalr emulation when rd == $0
  MIPS64: R6: R2 emulation bugfix
  coresight: etb10: adjust read pointer only when needed
  coresight: configuring ETF in FIFO mode when acting as link
  coresight: tmc: implementing TMC-ETF AUX space API
  coresight: moving struct cs_buffers to header file
  coresight: tmc: keep track of memory width
  coresight: tmc: make sysFS and Perf mode mutually exclusive
  coresight: tmc: dump system memory content only when needed
  coresight: tmc: adding mode of operation for link/sinks
  coresight: tmc: getting rid of multiple read access
  coresight: tmc: allocating memory when needed
  coresight: tmc: making prepare/unprepare functions generic
  coresight: tmc: splitting driver in ETB/ETF and ETR components
  coresight: tmc: cleaning up header file
  coresight: tmc: introducing new header file
  coresight: tmc: clearly define number of transfers per burst
  coresight: tmc: re-implementing tmc_read_prepare/unprepare() functions
  coresight: tmc: waiting for TMCReady bit before programming
  coresight: tmc: modifying naming convention
  coresight: tmc: adding sysFS management entries
  coresight: etm4x: add tracer ID for A72 Maia processor.
  coresight: etb10: fixing the right amount of words to read
  coresight: stm: adding driver for CoreSight STM component
  coresight: adding path for STM device
  coresight: etm4x: modify q_support type
  coresight: no need to do the forced type conversion
  coresight: removing gratuitous boot time log messages
  coresight: etb10: splitting sysFS "status" entry
  coresight: moving coresight_simple_func() to header file
  coresight: etm4x: implementing the perf PMU API
  coresight: etm4x: implementing user/kernel mode tracing
  coresight: etm4x: moving etm_drvdata::enable to atomic field
  coresight: etm4x: unlocking tracers in default arch init
  coresight: etm4x: splitting etmv4 default configuration
  coresight: etm4x: splitting struct etmv4_drvdata
  coresight: etm4x: adding config and traceid registers
  coresight: etm4x: moving sysFS entries to a dedicated file
  stm class: Support devices that override software assigned masters
  stm class: Remove unnecessary pointer increment
  stm class: Fix stm device initialization order
  stm class: Do not leak the chrdev in error path
  stm class: Remove a pointless line
  stm class: stm_heartbeat: Make nr_devs parameter read-only
  stm class: dummy_stm: Make nr_dummies parameter read-only
  MAINTAINERS: Add a git tree for the stm class
  perf/ring_buffer: Document AUX API usage
  perf/core: Free AUX pages in unmap path
  perf/ring_buffer: Refuse to begin AUX transaction after rb->aux_mmap_count drops
  perf auxtrace: Add perf_evlist pointer to *info_priv_size()
  perf session: Simplify tool stubs
  perf inject: Hit all DSOs for AUX data in JIT and other cases
  perf tools: tracepoint_error() can receive e=NULL, robustify it
  perf evlist: Make perf_evlist__open() open evsels with their cpus and threads (like perf record does)
  perf evsel: Introduce disable() method
  perf cpumap: Auto initialize cpu__max_{node,cpu}
  drivers/hwtracing: make coresight-etm-perf.c explicitly non-modular
  drivers/hwtracing: make coresight-* explicitly non-modular
  coresight: introducing a global trace ID function
  coresight: etm-perf: new PMU driver for ETM tracers
  coresight: etb10: implementing AUX API
  coresight: etb10: adding operation mode for sink->enable()
  coresight: etb10: moving to local atomic operations
  coresight: etm3x: implementing perf_enable/disable() API
  coresight: etm3x: implementing user/kernel mode tracing
  coresight: etm3x: consolidating initial config
  coresight: etm3x: changing default trace configuration
  coresight: etm3x: set progbit to stop trace collection
  coresight: etm3x: adding operation mode for etm_enable()
  coresight: etm3x: splitting struct etm_drvdata
  coresight: etm3x: unlocking tracers in default arch init
  coresight: etm3x: moving sysFS entries to dedicated file
  coresight: etm3x: moving etm_readl/writel to header file
  coresight: moving PM runtime operations to core framework
  coresight: add API to get sink from path
  coresight: associating path with session rather than tracer
  coresight: etm4x: Check every parameter used by dma_xx_coherent.
  coresight: "DEVICE_ATTR_RO" should defined as static.
  coresight: implementing 'cpu_id()' API
  coresight: removing bind/unbind options from sysfs
  coresight: remove csdev's link from topology
  coresight: release reference taken by 'bus_find_device()'
  coresight: coresight_unregister() function cleanup
  coresight: fixing lockdep error
  coresight: fixing indentation problem
  coresight: Fix a typo in Kconfig
  coresight: checking for NULL string in coresight_name_match()
  perf/core: Disable the event on a truncated AUX record
  perf/core: Don't leak event in the syscall error path
  perf/core: Fix perf_sched_count derailment
  stm class: dummy_stm: Add link callback for fault injection
  stm class: Plug stm device's unlink callback
  stm class: Fix a race in unlinking
  stm class: Fix unbalanced module/device refcounting
  stm class: Guard output assignment against concurrency
  stm class: Fix unlocking braino in the error path
  stm class: Add heartbeat stm source device
  stm class: dummy_stm: Create multiple devices
  stm class: Support devices with multiple instances
  stm class: Use driver's packet callback return value
  stm class: Prevent user-controllable allocations
  stm class: Fix link list locking
  stm class: Fix locking in unbinding policy path
  stm class: Select CONFIG_SRCU
  stm class: Hide STM-specific options if STM is disabled
  perf: Synchronously free aux pages in case of allocation failure
  Linux 4.4.12
  kbuild: move -Wunused-const-variable to W=1 warning level
  Revert "scsi: fix soft lockup in scsi_remove_target() on module removal"
  scsi: Add intermediate STARGET_REMOVE state to scsi_target_state
  hpfs: implement the show_options method
  hpfs: fix remount failure when there are no options changed
  UBI: Fix static volume checks when Fastmap is used
  SIGNAL: Move generic copy_siginfo() to signal.h
  thunderbolt: Fix double free of drom buffer
  IB/srp: Fix a debug kernel crash
  ALSA: hda - Fix headset mic detection problem for one Dell machine
  ALSA: hda/realtek - Add support for ALC295/ALC3254
  ALSA: hda - Fix headphone noise on Dell XPS 13 9360
  ALSA: hda/realtek - New codecs support for ALC234/ALC274/ALC294
  mcb: Fixed bar number assignment for the gdd
  clk: bcm2835: add locking to pll*_on/off methods
  locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()
  serial: samsung: Reorder the sequence of clock control when call s3c24xx_serial_set_termios()
  serial: 8250_mid: recognize interrupt source in handler
  serial: 8250_mid: use proper bar for DNV platform
  serial: 8250_pci: fix divide error bug if baud rate is 0
  Fix OpenSSH pty regression on close
  tty/serial: atmel: fix hardware handshake selection
  TTY: n_gsm, fix false positive WARN_ON
  tty: vt, return error when con_startup fails
  xen/x86: actually allocate legacy interrupts on PV guests
  KVM: x86: mask CPUID(0xD,0x1).EAX against host value
  MIPS: KVM: Fix timer IRQ race when writing CP0_Compare
  MIPS: KVM: Fix timer IRQ race when freezing timer
  KVM: x86: fix ordering of cr0 initialization code in vmx_cpu_reset
  KVM: MTRR: remove MSR 0x2f8
  staging: comedi: das1800: fix possible NULL dereference
  usb: gadget: udc: core: Fix argument of dev_err() in usb_gadget_map_request()
  USB: leave LPM alone if possible when binding/unbinding interface drivers
  usb: misc: usbtest: fix pattern tests for scatterlists.
  usb: f_mass_storage: test whether thread is running before starting another
  usb: gadget: f_fs: Fix EFAULT generation for async read operations
  USB: serial: option: add even more ZTE device ids
  USB: serial: option: add more ZTE device ids
  USB: serial: option: add support for Cinterion PH8 and AHxx
  USB: serial: io_edgeport: fix memory leaks in probe error path
  USB: serial: io_edgeport: fix memory leaks in attach error path
  USB: serial: quatech2: fix use-after-free in probe error path
  USB: serial: keyspan: fix use-after-free in probe error path
  USB: serial: mxuport: fix use-after-free in probe error path
  mei: bus: call mei_cl_read_start under device lock
  mei: amthif: discard not read messages
  mei: fix NULL dereferencing during FW initiated disconnection
  Bluetooth: vhci: Fix race at creating hci device
  Bluetooth: vhci: purge unhandled skbs
  Bluetooth: vhci: fix open_timeout vs. hdev race
  mmc: sdhci-pci: Remove MMC_CAP_BUS_WIDTH_TEST for Intel controllers
  mmc: longer timeout for long read time quirk
  dell-rbtn: Ignore ACPI notifications if device is suspended
  ACPI / osi: Fix an issue that acpi_osi=!* cannot disable ACPICA internal strings
  mmc: sdhci-acpi: Remove MMC_CAP_BUS_WIDTH_TEST for Intel controllers
  mmc: mmc: Fix partition switch timeout for some eMMCs
  can: fix handling of unmodifiable configuration options
  irqchip/gic-v3: Configure all interrupts as non-secure Group-1
  irqchip/gic: Ensure ordering between read of INTACK and shared data
  Input: pwm-beeper - fix - scheduling while atomic
  mfd: omap-usb-tll: Fix scheduling while atomic BUG
  sched/loadavg: Fix loadavg artifacts on fully idle and on fully loaded systems
  clk: qcom: msm8916: Fix crypto clock flags
  crypto: sun4i-ss - Replace spinlock_bh by spin_lock_irq{save|restore}
  crypto: talitos - fix ahash algorithms registration
  crypto: caam - fix caam_jr_alloc() ret code
  ring-buffer: Prevent overflow of size in ring_buffer_resize()
  ring-buffer: Use long for nr_pages to avoid overflow failures
  asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions
  fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication
  fs/cifs: correctly to anonymous authentication for the NTLM(v1) authentication
  fs/cifs: correctly to anonymous authentication for the LANMAN authentication
  fs/cifs: correctly to anonymous authentication via NTLMSSP
  remove directory incorrectly tries to set delete on close on non-empty directories
  kvm: arm64: Fix EC field in inject_abt64
  arm/arm64: KVM: Enforce Break-Before-Make on Stage-2 page tables
  arm64: cpuinfo: Missing NULL terminator in compat_hwcap_str
  arm64: Implement pmdp_set_access_flags() for hardware AF/DBM
  arm64: Implement ptep_set_access_flags() for hardware AF/DBM
  arm64: Ensure pmd_present() returns false after pmd_mknotpresent()
  arm64: Fix typo in the pmdp_huge_get_and_clear() definition
  ext4: iterate over buffer heads correctly in move_extent_per_page()
  perf test: Fix build of BPF and LLVM on older glibc libraries
  perf/core: Fix perf_event_open() vs. execve() race
  perf/x86/intel/pt: Generate PMI in the STOP region as well
  Btrfs: don't use src fd for printk
  UPSTREAM: mac80211: fix "warning: ‘target_metric’ may be used uninitialized"
  Revert "drivers: power: use 'current' instead of 'get_current()'"
  cpufreq: interactive: drop cpufreq_{get,put}_global_kobject func calls
  Revert "cpufreq: interactive: build fixes for 4.4"
  xt_qtaguid: Fix panic caused by processing non-full socket.
  fiq_debugger: Add fiq_debugger.disable option
  UPSTREAM: procfs: fixes pthread cross-thread naming if !PR_DUMPABLE
  FROMLIST: wlcore: Disable filtering in AP role
  Revert "drivers: power: Add watchdog timer to catch drivers which lockup during suspend."
  fiq_debugger: Add option to apply uart overlay by FIQ_DEBUGGER_UART_OVERLAY
  Revert "Recreate asm/mach/mmc.h include file"
  Revert "ARM: Add 'card_present' state to mmc_platfrom_data"
  usb: dual-role: make stub functions inline
  Revert "mmc: Add status IRQ and status callback function to mmc platform data"
  quick selinux support for tracefs
  Revert "hid-multitouch: Filter collections by application usage."
  Revert "HID: steelseries: validate output report details"
  xt_qtaguid: Fix panic caused by synack processing
  Revert "mm: vmscan: Add a debug file for shrinkers"
  Revert "SELinux: Enable setting security contexts on rootfs inodes."
  Revert "SELinux: build fix for 4.1"
  fuse: Add support for d_canonical_path
  vfs: change d_canonical_path to take two paths
  android: recommended.cfg: remove CONFIG_UID_STAT
  netfilter: xt_qtaguid: seq_printf fixes
  Revert "misc: uidstat: Adding uid stat driver to collect network statistics."
  Revert "net: activity_stats: Add statistics for network transmission activity"
  Revert "net: activity_stats: Stop using obsolete create_proc_read_entry api"
  Revert "misc: uidstat: avoid create_stat() race and blockage."
  Revert "misc: uidstat: Remove use of obsolete create_proc_read_entry api"
  Revert "misc seq_printf fixes for 4.4"
  Revert "misc: uid_stat: Include linux/atomic.h instead of asm/atomic.h"
  Revert "net: socket ioctl to reset connections matching local address"
  Revert "net: fix iterating over hashtable in tcp_nuke_addr()"
  Revert "net: fix crash in tcp_nuke_addr()"
  Revert "Don't kill IPv4 sockets when killing IPv6 sockets was requested."
  Revert "tcp: Fix IPV6 module build errors"
  android: base-cfg: remove CONFIG_SWITCH
  Revert "switch: switch class and GPIO drivers."
  Revert "drivers: switch: remove S_IWUSR from dev_attr"
  ANDROID: base-cfg: enable CONFIG_IP_NF_NAT
  BACKPORT: selinux: restrict kernel module loading
  android: base-cfg: enable CONFIG_QUOTA

Conflicts:
	Documentation/sysctl/kernel.txt
	drivers/cpufreq/cpufreq_interactive.c
	drivers/hwtracing/coresight/Kconfig
	drivers/hwtracing/coresight/Makefile
	drivers/hwtracing/coresight/coresight-etm4x.c
	drivers/hwtracing/coresight/coresight-etm4x.h
	drivers/hwtracing/coresight/coresight-priv.h
	drivers/hwtracing/coresight/coresight-stm.c
	drivers/hwtracing/coresight/coresight-tmc.c
	drivers/mmc/core/core.c
	include/linux/coresight-stm.h
	include/linux/coresight.h
	include/linux/msm_mdp.h
	include/uapi/linux/coresight-stm.h
	kernel/events/core.c
	kernel/sched/fair.c
	net/Makefile
	net/ipv4/netfilter/arp_tables.c
	net/ipv4/netfilter/ip_tables.c
	net/ipv4/tcp.c
	net/ipv6/netfilter/ip6_tables.c
	net/netfilter/xt_quota2.c
	sound/core/pcm.c

Change-Id: I17aa0002815014e9bddc47e67769a53c15768a99
Signed-off-by: Runmin Wang <runminw@codeaurora.org>
2016-10-28 10:48:35 -07:00

1660 lines
42 KiB
C

/*
* linux/kernel/exit.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/security.h>
#include <linux/cpu.h>
#include <linux/acct.h>
#include <linux/tsacct_kern.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/freezer.h>
#include <linux/binfmts.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <linux/kthread.h>
#include <linux/mempolicy.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
#include <linux/cgroup.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
#include <linux/posix-timers.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
#include <linux/hw_breakpoint.h>
#include <linux/oom.h>
#include <linux/writeback.h>
#include <linux/shm.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
static void exit_mm(struct task_struct *tsk);
static void __unhash_process(struct task_struct *p, bool group_dead)
{
nr_threads--;
detach_pid(p, PIDTYPE_PID);
if (group_dead) {
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
__this_cpu_dec(process_counts);
}
list_del_rcu(&p->thread_group);
list_del_rcu(&p->thread_node);
}
/*
* This function expects the tasklist_lock write-locked.
*/
static void __exit_signal(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
cputime_t utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
if (group_dead) {
posix_cpu_timers_exit_group(tsk);
tty = sig->tty;
sig->tty = NULL;
} else {
/*
* This can only happen if the caller is de_thread().
* FIXME: this is the temporary hack, we should teach
* posix-cpu-timers to handle this case correctly.
*/
if (unlikely(has_group_leader_pid(tsk)))
posix_cpu_timers_exit_group(tsk);
/*
* If there is any task waiting for the group exit
* then notify it:
*/
if (sig->notify_count > 0 && !--sig->notify_count)
wake_up_process(sig->group_exit_task);
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
}
/*
* Accumulate here the counters for all threads as they die. We could
* skip the group leader because it is the last user of signal_struct,
* but we want to avoid the race with thread_group_cputime() which can
* see the empty ->thread_head list.
*/
task_cputime(tsk, &utime, &stime);
write_seqlock(&sig->stats_lock);
sig->utime += utime;
sig->stime += stime;
sig->gtime += task_gtime(tsk);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig->nr_threads--;
__unhash_process(tsk, group_dead);
write_sequnlock(&sig->stats_lock);
/*
* Do this under ->siglock, we can race with another thread
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
*/
flush_sigqueue(&tsk->pending);
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
if (group_dead) {
flush_sigqueue(&sig->shared_pending);
tty_kref_put(tty);
}
}
static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
void release_task(struct task_struct *p)
{
struct task_struct *leader;
int zap_leader;
repeat:
/* don't need to get the RCU readlock here - the process is dead and
* can't be modifying its own credentials. But shut RCU-lockdep up */
rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
rcu_read_unlock();
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
__exit_signal(p);
/*
* If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the
* group leader's parent process. (if it wants notification.)
*/
zap_leader = 0;
leader = p->group_leader;
if (leader != p && thread_group_empty(leader)
&& leader->exit_state == EXIT_ZOMBIE) {
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
* then we are the one who should release the leader.
*/
zap_leader = do_notify_parent(leader, leader->exit_signal);
if (zap_leader)
leader->exit_state = EXIT_DEAD;
}
write_unlock_irq(&tasklist_lock);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
p = leader;
if (unlikely(zap_leader))
goto repeat;
}
/*
* Determine if a process group is "orphaned", according to the POSIX
* definition in 2.2.2.52. Orphaned process groups are not to be affected
* by terminal-generated stop signals. Newly orphaned process groups are
* to receive a SIGHUP and a SIGCONT.
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
static int will_become_orphaned_pgrp(struct pid *pgrp,
struct task_struct *ignored_task)
{
struct task_struct *p;
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
if ((p == ignored_task) ||
(p->exit_state && thread_group_empty(p)) ||
is_global_init(p->real_parent))
continue;
if (task_pgrp(p->real_parent) != pgrp &&
task_session(p->real_parent) == task_session(p))
return 0;
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
return 1;
}
int is_current_pgrp_orphaned(void)
{
int retval;
read_lock(&tasklist_lock);
retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
read_unlock(&tasklist_lock);
return retval;
}
static bool has_stopped_jobs(struct pid *pgrp)
{
struct task_struct *p;
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
if (p->signal->flags & SIGNAL_STOP_STOPPED)
return true;
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
return false;
}
/*
* Check to see if any process groups have become orphaned as
* a result of our exiting, and if they have any stopped jobs,
* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
static void
kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
{
struct pid *pgrp = task_pgrp(tsk);
struct task_struct *ignored_task = tsk;
if (!parent)
/* exit: our father is in a different pgrp than
* we are and we were the only connection outside.
*/
parent = tsk->real_parent;
else
/* reparent: our child is in a different pgrp than
* we are, and it was the only connection outside.
*/
ignored_task = NULL;
if (task_pgrp(parent) != pgrp &&
task_session(parent) == task_session(tsk) &&
will_become_orphaned_pgrp(pgrp, ignored_task) &&
has_stopped_jobs(pgrp)) {
__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
}
#ifdef CONFIG_MEMCG
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
void mm_update_next_owner(struct mm_struct *mm)
{
struct task_struct *c, *g, *p = current;
retry:
/*
* If the exiting or execing task is not the owner, it's
* someone else's problem.
*/
if (mm->owner != p)
return;
/*
* The current owner is exiting/execing and there are no other
* candidates. Do not leave the mm pointing to a possibly
* freed task structure.
*/
if (atomic_read(&mm->mm_users) <= 1) {
mm->owner = NULL;
return;
}
read_lock(&tasklist_lock);
/*
* Search in the children
*/
list_for_each_entry(c, &p->children, sibling) {
if (c->mm == mm)
goto assign_new_owner;
}
/*
* Search in the siblings
*/
list_for_each_entry(c, &p->real_parent->children, sibling) {
if (c->mm == mm)
goto assign_new_owner;
}
/*
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
if (c->mm == mm)
goto assign_new_owner;
if (c->mm)
break;
}
}
read_unlock(&tasklist_lock);
/*
* We found no owner yet mm_users > 1: this implies that we are
* most likely racing with swapoff (try_to_unuse()) or /proc or
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
mm->owner = NULL;
return;
assign_new_owner:
BUG_ON(c == p);
get_task_struct(c);
/*
* The task_lock protects c->mm from changing.
* We always want mm->owner->mm == mm
*/
task_lock(c);
/*
* Delay read_unlock() till we have the task_lock()
* to ensure that c does not slip away underneath us
*/
read_unlock(&tasklist_lock);
if (c->mm != mm) {
task_unlock(c);
put_task_struct(c);
goto retry;
}
mm->owner = c;
task_unlock(c);
put_task_struct(c);
}
#endif /* CONFIG_MEMCG */
/*
* Turn us into a lazy TLB process if we
* aren't already..
*/
static void exit_mm(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;
int mm_released;
mm_release(tsk, mm);
if (!mm)
return;
sync_mm_rss(mm);
/*
* Serialize with any possible pending coredump.
* We must hold mmap_sem around checking core_state
* and clearing tsk->mm. The core-inducing thread
* will increment ->nr_threads for each thread in the
* group with ->mm != NULL.
*/
down_read(&mm->mmap_sem);
core_state = mm->core_state;
if (core_state) {
struct core_thread self;
up_read(&mm->mmap_sem);
self.task = tsk;
self.next = xchg(&core_state->dumper.next, &self);
/*
* Implies mb(), the result of xchg() must be visible
* to core_state->dumper.
*/
if (atomic_dec_and_test(&core_state->nr_threads))
complete(&core_state->startup);
for (;;) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!self.task) /* see coredump_finish() */
break;
freezable_schedule();
}
__set_task_state(tsk, TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
BUG_ON(mm != tsk->active_mm);
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
task_unlock(tsk);
mm_update_next_owner(mm);
mm_released = mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
exit_oom_victim();
if (mm_released)
set_tsk_thread_flag(tsk, TIF_MM_RELEASED);
}
static struct task_struct *find_alive_thread(struct task_struct *p)
{
struct task_struct *t;
for_each_thread(p, t) {
if (!(t->flags & PF_EXITING))
return t;
}
return NULL;
}
static struct task_struct *find_child_reaper(struct task_struct *father)
__releases(&tasklist_lock)
__acquires(&tasklist_lock)
{
struct pid_namespace *pid_ns = task_active_pid_ns(father);
struct task_struct *reaper = pid_ns->child_reaper;
if (likely(reaper != father))
return reaper;
reaper = find_alive_thread(father);
if (reaper) {
pid_ns->child_reaper = reaper;
return reaper;
}
write_unlock_irq(&tasklist_lock);
if (unlikely(pid_ns == &init_pid_ns)) {
panic("Attempted to kill init! exitcode=0x%08x\n",
father->signal->group_exit_code ?: father->exit_code);
}
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
return father;
}
/*
* When we die, we re-parent all our children, and try to:
* 1. give them to another thread in our thread group, if such a member exists
* 2. give it to the first ancestor process which prctl'd itself as a
* child_subreaper for its children (like a service manager)
* 3. give it to the init process (PID 1) in our pid namespace
*/
static struct task_struct *find_new_reaper(struct task_struct *father,
struct task_struct *child_reaper)
{
struct task_struct *thread, *reaper;
thread = find_alive_thread(father);
if (thread)
return thread;
if (father->signal->has_child_subreaper) {
/*
* Find the first ->is_child_subreaper ancestor in our pid_ns.
* We start from father to ensure we can not look into another
* namespace, this is safe because all its threads are dead.
*/
for (reaper = father;
!same_thread_group(reaper, child_reaper);
reaper = reaper->real_parent) {
/* call_usermodehelper() descendants need this check */
if (reaper == &init_task)
break;
if (!reaper->signal->is_child_subreaper)
continue;
thread = find_alive_thread(reaper);
if (thread)
return thread;
}
}
return child_reaper;
}
/*
* Any that need to be release_task'd are put on the @dead list.
*/
static void reparent_leader(struct task_struct *father, struct task_struct *p,
struct list_head *dead)
{
if (unlikely(p->exit_state == EXIT_DEAD))
return;
/* We don't want people slaying init. */
p->exit_signal = SIGCHLD;
/* If it has exited notify the new parent about this child's death. */
if (!p->ptrace &&
p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
if (do_notify_parent(p, p->exit_signal)) {
p->exit_state = EXIT_DEAD;
list_add(&p->ptrace_entry, dead);
}
}
kill_orphaned_pgrp(p, father);
}
/*
* This does two things:
*
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
static void forget_original_parent(struct task_struct *father,
struct list_head *dead)
{
struct task_struct *p, *t, *reaper;
if (unlikely(!list_empty(&father->ptraced)))
exit_ptrace(father, dead);
/* Can drop and reacquire tasklist_lock */
reaper = find_child_reaper(father);
if (list_empty(&father->children))
return;
reaper = find_new_reaper(father, reaper);
list_for_each_entry(p, &father->children, sibling) {
for_each_thread(p, t) {
t->real_parent = reaper;
BUG_ON((!t->ptrace) != (t->parent == father));
if (likely(!t->ptrace))
t->parent = t->real_parent;
if (t->pdeath_signal)
group_send_sig_info(t->pdeath_signal,
SEND_SIG_NOINFO, t);
}
/*
* If this is a threaded reparent there is no need to
* notify anyone anything has happened.
*/
if (!same_thread_group(reaper, father))
reparent_leader(father, p, dead);
}
list_splice_tail_init(&father->children, &reaper->children);
}
/*
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
static void exit_notify(struct task_struct *tsk, int group_dead)
{
bool autoreap;
struct task_struct *p, *n;
LIST_HEAD(dead);
write_lock_irq(&tasklist_lock);
forget_original_parent(tsk, &dead);
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
!ptrace_reparented(tsk) ?
tsk->exit_signal : SIGCHLD;
autoreap = do_notify_parent(tsk, sig);
} else if (thread_group_leader(tsk)) {
autoreap = thread_group_empty(tsk) &&
do_notify_parent(tsk, tsk->exit_signal);
} else {
autoreap = true;
}
tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
if (tsk->exit_state == EXIT_DEAD)
list_add(&tsk->ptrace_entry, &dead);
/* mt-exec, de_thread() is waiting for group leader */
if (unlikely(tsk->signal->notify_count < 0))
wake_up_process(tsk->signal->group_exit_task);
write_unlock_irq(&tasklist_lock);
list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
list_del_init(&p->ptrace_entry);
release_task(p);
}
}
#ifdef CONFIG_DEBUG_STACK_USAGE
static void check_stack_usage(void)
{
static DEFINE_SPINLOCK(low_water_lock);
static int lowest_to_date = THREAD_SIZE;
unsigned long free;
int islower = false;
free = stack_not_used(current);
if (free >= lowest_to_date)
return;
spin_lock(&low_water_lock);
if (free < lowest_to_date) {
lowest_to_date = free;
islower = true;
}
spin_unlock(&low_water_lock);
if (islower) {
printk(KERN_WARNING "%s (%d) used greatest stack depth: "
"%lu bytes left\n",
current->comm, task_pid_nr(current), free);
}
}
#else
static inline void check_stack_usage(void) {}
#endif
void do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
TASKS_RCU(int tasks_rcu_i);
profile_task_exit(tsk);
WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
/*
* If do_exit is called because this processes oopsed, it's possible
* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
* continuing. Amongst other possible reasons, this is to prevent
* mm_release()->clear_child_tid() from writing to a user-controlled
* kernel address.
*/
set_fs(USER_DS);
ptrace_event(PTRACE_EVENT_EXIT, code);
validate_creds_for_do_exit(tsk);
/*
* We're taking recursive faults here in do_exit. Safest is to just
* leave this task alone and wait for reboot.
*/
if (unlikely(tsk->flags & PF_EXITING)) {
pr_alert("Fixing recursive fault but reboot is needed!\n");
/*
* We can do this unlocked here. The futex code uses
* this flag just to verify whether the pi state
* cleanup has been done or not. In the worst case it
* loops once more. We pretend that the cleanup was
* done as there is no way to return. Either the
* OWNER_DIED bit is set by now or we push the blocked
* task into the wait for ever nirwana as well.
*/
tsk->flags |= PF_EXITPIDONE;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
exit_signals(tsk); /* sets PF_EXITING */
sched_exit(tsk);
/*
* tsk->flags are checked in the futex code to protect against
* an exiting task cleaning up the robust pi futexes.
*/
smp_mb();
raw_spin_unlock_wait(&tsk->pi_lock);
if (unlikely(in_atomic())) {
pr_info("note: %s[%d] exited with preempt_count %d\n",
current->comm, task_pid_nr(current),
preempt_count());
preempt_count_set(PREEMPT_ENABLED);
}
/* sync mm's RSS info before statistics gathering */
if (tsk->mm)
sync_mm_rss(tsk->mm);
acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
if (tsk->mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
}
acct_collect(code, group_dead);
if (group_dead)
tty_audit_exit();
audit_free(tsk);
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
exit_mm(tsk);
if (group_dead)
acct_process();
trace_sched_process_exit(tsk);
exit_sem(tsk);
exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
if (group_dead)
disassociate_ctty(1);
exit_task_namespaces(tsk);
exit_task_work(tsk);
exit_thread();
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
*
* because of cgroup mode, must be called before cgroup_exit()
*/
perf_event_exit_task(tsk);
cgroup_exit(tsk);
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
TASKS_RCU(preempt_disable());
TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
TASKS_RCU(preempt_enable());
exit_notify(tsk, group_dead);
proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
task_lock(tsk);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
task_unlock(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
/*
* Make sure we are holding no locks:
*/
debug_check_no_locks_held();
/*
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
* or not. In the worst case it loops once more.
*/
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context(tsk);
if (tsk->splice_pipe)
free_pipe_info(tsk->splice_pipe);
if (tsk->task_frag.page)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
check_stack_usage();
preempt_disable();
if (tsk->nr_dirtied)
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
exit_rcu();
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
/*
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
* when the following two conditions become true.
* - There is race condition of mmap_sem (It is acquired by
* exit_mm()), and
* - SMI occurs before setting TASK_RUNINNG.
* (or hypervisor of virtual machine switches to other guest)
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
*
* To avoid it, we have to wait for releasing tsk->pi_lock which
* is held by try_to_wake_up()
*/
smp_mb();
raw_spin_unlock_wait(&tsk->pi_lock);
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
schedule();
BUG();
/* Avoid "noreturn function does return". */
for (;;)
cpu_relax(); /* For when BUG is null */
}
EXPORT_SYMBOL_GPL(do_exit);
void complete_and_exit(struct completion *comp, long code)
{
if (comp)
complete(comp);
do_exit(code);
}
EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
{
do_exit((error_code&0xff)<<8);
}
/*
* Take down every thread in the group. This is called by fatal signals
* as well as by sys_exit_group (below).
*/
void
do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
BUG_ON(exit_code & 0x80); /* core dumps don't get here */
if (signal_group_exit(sig))
exit_code = sig->group_exit_code;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
spin_lock_irq(&sighand->siglock);
if (signal_group_exit(sig))
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
else {
sig->group_exit_code = exit_code;
sig->flags = SIGNAL_GROUP_EXIT;
zap_other_threads(current);
}
spin_unlock_irq(&sighand->siglock);
}
do_exit(exit_code);
/* NOTREACHED */
}
/*
* this kills every thread in the thread group. Note that any externally
* wait4()-ing process will get the correct exit code - even if this
* thread is not the thread group leader.
*/
SYSCALL_DEFINE1(exit_group, int, error_code)
{
do_group_exit((error_code & 0xff) << 8);
/* NOTREACHED */
return 0;
}
struct wait_opts {
enum pid_type wo_type;
int wo_flags;
struct pid *wo_pid;
struct siginfo __user *wo_info;
int __user *wo_stat;
struct rusage __user *wo_rusage;
wait_queue_t child_wait;
int notask_error;
};
static inline
struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
{
if (type != PIDTYPE_PID)
task = task->group_leader;
return task->pids[type].pid;
}
static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
{
return wo->wo_type == PIDTYPE_MAX ||
task_pid_type(p, wo->wo_type) == wo->wo_pid;
}
static int
eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
{
if (!eligible_pid(wo, p))
return 0;
/*
* Wait for all children (clone and not) if __WALL is set or
* if it is traced by us.
*/
if (ptrace || (wo->wo_flags & __WALL))
return 1;
/*
* Otherwise, wait for clone children *only* if __WCLONE is set;
* otherwise, wait for non-clone children *only*.
*
* Note: a "clone" child here is one that reports to its parent
* using a signal other than SIGCHLD, or a non-leader thread which
* we can only see if it is traced by us.
*/
if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
return 0;
return 1;
}
static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
pid_t pid, uid_t uid, int why, int status)
{
struct siginfo __user *infop;
int retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
put_task_struct(p);
infop = wo->wo_info;
if (infop) {
if (!retval)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval)
retval = put_user(0, &infop->si_errno);
if (!retval)
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(pid, &infop->si_pid);
if (!retval)
retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = put_user(status, &infop->si_status);
}
if (!retval)
retval = pid;
return retval;
}
/*
* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
{
int state, retval, status;
pid_t pid = task_pid_vnr(p);
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
struct siginfo __user *infop;
if (!likely(wo->wo_flags & WEXITED))
return 0;
if (unlikely(wo->wo_flags & WNOWAIT)) {
int exit_code = p->exit_code;
int why;
get_task_struct(p);
read_unlock(&tasklist_lock);
sched_annotate_sleep();
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
status = exit_code >> 8;
} else {
why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
status = exit_code & 0x7f;
}
return wait_noreap_copyout(wo, p, pid, uid, why, status);
}
/*
* Move the task's state to DEAD/TRACE, only one thread can do this.
*/
state = (ptrace_reparented(p) && thread_group_leader(p)) ?
EXIT_TRACE : EXIT_DEAD;
if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
return 0;
/*
* We own this thread, nobody else can reap it.
*/
read_unlock(&tasklist_lock);
sched_annotate_sleep();
/*
* Check thread_group_leader() to exclude the traced sub-threads.
*/
if (state == EXIT_DEAD && thread_group_leader(p)) {
struct signal_struct *sig = p->signal;
struct signal_struct *psig = current->signal;
unsigned long maxrss;
cputime_t tgutime, tgstime;
/*
* The resource counters for the group leader are in its
* own task_struct. Those for dead threads in the group
* are in its signal_struct, as are those for the child
* processes it has previously reaped. All these
* accumulate in the parent's signal_struct c* fields.
*
* We don't bother to take a lock here to protect these
* p->signal fields because the whole thread group is dead
* and nobody can change them.
*
* psig->stats_lock also protects us from our sub-theads
* which can reap other children at the same time. Until
* we change k_getrusage()-like users to rely on this lock
* we have to take ->siglock as well.
*
* We use thread_group_cputime_adjusted() to get times for
* the thread group, which consolidates times for all threads
* in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
spin_lock_irq(&current->sighand->siglock);
write_seqlock(&psig->stats_lock);
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
p->maj_flt + sig->maj_flt + sig->cmaj_flt;
psig->cnvcsw +=
p->nvcsw + sig->nvcsw + sig->cnvcsw;
psig->cnivcsw +=
p->nivcsw + sig->nivcsw + sig->cnivcsw;
psig->cinblock +=
task_io_get_inblock(p) +
sig->inblock + sig->cinblock;
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
maxrss = max(sig->maxrss, sig->cmaxrss);
if (psig->cmaxrss < maxrss)
psig->cmaxrss = maxrss;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
write_sequnlock(&psig->stats_lock);
spin_unlock_irq(&current->sighand->siglock);
}
retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
if (!retval && wo->wo_stat)
retval = put_user(status, wo->wo_stat);
infop = wo->wo_info;
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop) {
int why;
if ((status & 0x7f) == 0) {
why = CLD_EXITED;
status >>= 8;
} else {
why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
status &= 0x7f;
}
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(status, &infop->si_status);
}
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = pid;
if (state == EXIT_TRACE) {
write_lock_irq(&tasklist_lock);
/* We dropped tasklist, ptracer could die and untrace */
ptrace_unlink(p);
/* If parent wants a zombie, don't release it now */
state = EXIT_ZOMBIE;
if (do_notify_parent(p, p->exit_signal))
state = EXIT_DEAD;
p->exit_state = state;
write_unlock_irq(&tasklist_lock);
}
if (state == EXIT_DEAD)
release_task(p);
return retval;
}
static int *task_stopped_code(struct task_struct *p, bool ptrace)
{
if (ptrace) {
if (task_is_stopped_or_traced(p) &&
!(p->jobctl & JOBCTL_LISTENING))
return &p->exit_code;
} else {
if (p->signal->flags & SIGNAL_STOP_STOPPED)
return &p->signal->group_exit_code;
}
return NULL;
}
/**
* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
* @wo: wait options
* @ptrace: is the wait for ptrace
* @p: task to wait for
*
* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
*
* CONTEXT:
* read_lock(&tasklist_lock), which is released if return value is
* non-zero. Also, grabs and releases @p->sighand->siglock.
*
* RETURNS:
* 0 if wait condition didn't exist and search for other wait conditions
* should continue. Non-zero return, -errno on failure and @p's pid on
* success, implies that tasklist_lock is released and wait condition
* search should terminate.
*/
static int wait_task_stopped(struct wait_opts *wo,
int ptrace, struct task_struct *p)
{
struct siginfo __user *infop;
int retval, exit_code, *p_code, why;
uid_t uid = 0; /* unneeded, required by compiler */
pid_t pid;
/*
* Traditionally we see ptrace'd stopped tasks regardless of options.
*/
if (!ptrace && !(wo->wo_flags & WUNTRACED))
return 0;
if (!task_stopped_code(p, ptrace))
return 0;
exit_code = 0;
spin_lock_irq(&p->sighand->siglock);
p_code = task_stopped_code(p, ptrace);
if (unlikely(!p_code))
goto unlock_sig;
exit_code = *p_code;
if (!exit_code)
goto unlock_sig;
if (!unlikely(wo->wo_flags & WNOWAIT))
*p_code = 0;
uid = from_kuid_munged(current_user_ns(), task_uid(p));
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
return 0;
/*
* Now we are pretty sure this task is interesting.
* Make sure it doesn't get reaped out from under us while we
* give up the lock and then examine it below. We don't want to
* keep holding onto the tasklist_lock while we call getrusage and
* possibly take page faults for user memory.
*/
get_task_struct(p);
pid = task_pid_vnr(p);
why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
read_unlock(&tasklist_lock);
sched_annotate_sleep();
if (unlikely(wo->wo_flags & WNOWAIT))
return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
if (!retval && wo->wo_stat)
retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
infop = wo->wo_info;
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop)
retval = put_user((short)why, &infop->si_code);
if (!retval && infop)
retval = put_user(exit_code, &infop->si_status);
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = pid;
put_task_struct(p);
BUG_ON(!retval);
return retval;
}
/*
* Handle do_wait work for one task in a live, non-stopped state.
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
{
int retval;
pid_t pid;
uid_t uid;
if (!unlikely(wo->wo_flags & WCONTINUED))
return 0;
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
return 0;
spin_lock_irq(&p->sighand->siglock);
/* Re-check with the lock held. */
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
spin_unlock_irq(&p->sighand->siglock);
return 0;
}
if (!unlikely(wo->wo_flags & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
uid = from_kuid_munged(current_user_ns(), task_uid(p));
spin_unlock_irq(&p->sighand->siglock);
pid = task_pid_vnr(p);
get_task_struct(p);
read_unlock(&tasklist_lock);
sched_annotate_sleep();
if (!wo->wo_info) {
retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
put_task_struct(p);
if (!retval && wo->wo_stat)
retval = put_user(0xffff, wo->wo_stat);
if (!retval)
retval = pid;
} else {
retval = wait_noreap_copyout(wo, p, pid, uid,
CLD_CONTINUED, SIGCONT);
BUG_ON(retval == 0);
}
return retval;
}
/*
* Consider @p for a wait by @parent.
*
* -ECHILD should be in ->notask_error before the first call.
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue;
* then ->notask_error is 0 if @p is an eligible child,
* or another error from security_task_wait(), or still -ECHILD.
*/
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
{
/*
* We can race with wait_task_zombie() from another thread.
* Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
* can't confuse the checks below.
*/
int exit_state = ACCESS_ONCE(p->exit_state);
int ret;
if (unlikely(exit_state == EXIT_DEAD))
return 0;
ret = eligible_child(wo, ptrace, p);
if (!ret)
return ret;
ret = security_task_wait(p);
if (unlikely(ret < 0)) {
/*
* If we have not yet seen any eligible child,
* then let this error code replace -ECHILD.
* A permission error will give the user a clue
* to look for security policy problems, rather
* than for mysterious wait bugs.
*/
if (wo->notask_error)
wo->notask_error = ret;
return 0;
}
if (unlikely(exit_state == EXIT_TRACE)) {
/*
* ptrace == 0 means we are the natural parent. In this case
* we should clear notask_error, debugger will notify us.
*/
if (likely(!ptrace))
wo->notask_error = 0;
return 0;
}
if (likely(!ptrace) && unlikely(p->ptrace)) {
/*
* If it is traced by its real parent's group, just pretend
* the caller is ptrace_do_wait() and reap this child if it
* is zombie.
*
* This also hides group stop state from real parent; otherwise
* a single stop can be reported twice as group and ptrace stop.
* If a ptracer wants to distinguish these two events for its
* own children it should create a separate process which takes
* the role of real parent.
*/
if (!ptrace_reparented(p))
ptrace = 1;
}
/* slay zombie? */
if (exit_state == EXIT_ZOMBIE) {
/* we don't reap group leaders with subthreads */
if (!delay_group_leader(p)) {
/*
* A zombie ptracee is only visible to its ptracer.
* Notification and reaping will be cascaded to the
* real parent when the ptracer detaches.
*/
if (unlikely(ptrace) || likely(!p->ptrace))
return wait_task_zombie(wo, p);
}
/*
* Allow access to stopped/continued state via zombie by
* falling through. Clearing of notask_error is complex.
*
* When !@ptrace:
*
* If WEXITED is set, notask_error should naturally be
* cleared. If not, subset of WSTOPPED|WCONTINUED is set,
* so, if there are live subthreads, there are events to
* wait for. If all subthreads are dead, it's still safe
* to clear - this function will be called again in finite
* amount time once all the subthreads are released and
* will then return without clearing.
*
* When @ptrace:
*
* Stopped state is per-task and thus can't change once the
* target task dies. Only continued and exited can happen.
* Clear notask_error if WCONTINUED | WEXITED.
*/
if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
wo->notask_error = 0;
} else {
/*
* @p is alive and it's gonna stop, continue or exit, so
* there always is something to wait for.
*/
wo->notask_error = 0;
}
/*
* Wait for stopped. Depending on @ptrace, different stopped state
* is used and the two don't interact with each other.
*/
ret = wait_task_stopped(wo, ptrace, p);
if (ret)
return ret;
/*
* Wait for continued. There's only one continued state and the
* ptracer can consume it which can confuse the real parent. Don't
* use WCONTINUED from ptracer. You don't need or want it.
*/
return wait_task_continued(wo, p);
}
/*
* Do the work of do_wait() for one thread in the group, @tsk.
*
* -ECHILD should be in ->notask_error before the first call.
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue; then
* ->notask_error is 0 if there were any eligible children,
* or another error from security_task_wait(), or still -ECHILD.
*/
static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
{
struct task_struct *p;
list_for_each_entry(p, &tsk->children, sibling) {
int ret = wait_consider_task(wo, 0, p);
if (ret)
return ret;
}
return 0;
}
static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
{
struct task_struct *p;
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
int ret = wait_consider_task(wo, 1, p);
if (ret)
return ret;
}
return 0;
}
static int child_wait_callback(wait_queue_t *wait, unsigned mode,
int sync, void *key)
{
struct wait_opts *wo = container_of(wait, struct wait_opts,
child_wait);
struct task_struct *p = key;
if (!eligible_pid(wo, p))
return 0;
if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
return 0;
return default_wake_function(wait, mode, sync, key);
}
void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
{
__wake_up_sync_key(&parent->signal->wait_chldexit,
TASK_INTERRUPTIBLE, 1, p);
}
static long do_wait(struct wait_opts *wo)
{
struct task_struct *tsk;
int retval;
trace_sched_process_wait(wo->wo_pid);
init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
wo->child_wait.private = current;
add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
repeat:
/*
* If there is nothing that can match our criteria, just get out.
* We will clear ->notask_error to zero if we see any child that
* might later match our criteria, even if we are not able to reap
* it yet.
*/
wo->notask_error = -ECHILD;
if ((wo->wo_type < PIDTYPE_MAX) &&
(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
goto notask;
set_current_state(TASK_INTERRUPTIBLE);
read_lock(&tasklist_lock);
tsk = current;
do {
retval = do_wait_thread(wo, tsk);
if (retval)
goto end;
retval = ptrace_do_wait(wo, tsk);
if (retval)
goto end;
if (wo->wo_flags & __WNOTHREAD)
break;
} while_each_thread(current, tsk);
read_unlock(&tasklist_lock);
notask:
retval = wo->notask_error;
if (!retval && !(wo->wo_flags & WNOHANG)) {
retval = -ERESTARTSYS;
if (!signal_pending(current)) {
schedule();
goto repeat;
}
}
end:
__set_current_state(TASK_RUNNING);
remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
return retval;
}
SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
infop, int, options, struct rusage __user *, ru)
{
struct wait_opts wo;
struct pid *pid = NULL;
enum pid_type type;
long ret;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
return -EINVAL;
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
return -EINVAL;
switch (which) {
case P_ALL:
type = PIDTYPE_MAX;
break;
case P_PID:
type = PIDTYPE_PID;
if (upid <= 0)
return -EINVAL;
break;
case P_PGID:
type = PIDTYPE_PGID;
if (upid <= 0)
return -EINVAL;
break;
default:
return -EINVAL;
}
if (type < PIDTYPE_MAX)
pid = find_get_pid(upid);
wo.wo_type = type;
wo.wo_pid = pid;
wo.wo_flags = options;
wo.wo_info = infop;
wo.wo_stat = NULL;
wo.wo_rusage = ru;
ret = do_wait(&wo);
if (ret > 0) {
ret = 0;
} else if (infop) {
/*
* For a WNOHANG return, clear out all the fields
* we would set so the user can easily tell the
* difference.
*/
if (!ret)
ret = put_user(0, &infop->si_signo);
if (!ret)
ret = put_user(0, &infop->si_errno);
if (!ret)
ret = put_user(0, &infop->si_code);
if (!ret)
ret = put_user(0, &infop->si_pid);
if (!ret)
ret = put_user(0, &infop->si_uid);
if (!ret)
ret = put_user(0, &infop->si_status);
}
put_pid(pid);
return ret;
}
SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
int, options, struct rusage __user *, ru)
{
struct wait_opts wo;
struct pid *pid = NULL;
enum pid_type type;
long ret;
if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
if (upid == -1)
type = PIDTYPE_MAX;
else if (upid < 0) {
type = PIDTYPE_PGID;
pid = find_get_pid(-upid);
} else if (upid == 0) {
type = PIDTYPE_PGID;
pid = get_task_pid(current, PIDTYPE_PGID);
} else /* upid > 0 */ {
type = PIDTYPE_PID;
pid = find_get_pid(upid);
}
wo.wo_type = type;
wo.wo_pid = pid;
wo.wo_flags = options | WEXITED;
wo.wo_info = NULL;
wo.wo_stat = stat_addr;
wo.wo_rusage = ru;
ret = do_wait(&wo);
put_pid(pid);
return ret;
}
#ifdef __ARCH_WANT_SYS_WAITPID
/*
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
*/
SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
#endif