* lsk-44/linux-linaro-lsk-v4.4: Linux 4.4.3 modules: fix modparam async_probe request module: wrapper for symbol name. itimers: Handle relative timers with CONFIG_TIME_LOW_RES proper posix-timers: Handle relative timers with CONFIG_TIME_LOW_RES proper timerfd: Handle relative timers with CONFIG_TIME_LOW_RES proper prctl: take mmap sem for writing to protect against others xfs: log mount failures don't wait for buffers to be released Revert "xfs: clear PF_NOFREEZE for xfsaild kthread" xfs: inode recovery readahead can race with inode buffer creation libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct ovl: setattr: check permissions before copy-up ovl: root: copy attr ovl: check dentry positiveness in ovl_cleanup_whiteouts() ovl: use a minimal buffer in ovl_copy_xattr ovl: allow zero size xattr futex: Drop refcount if requeue_pi() acquired the rtmutex devm_memremap_release(): fix memremap'd addr handling ipc/shm: handle removed segments gracefully in shm_mmap() intel_scu_ipcutil: underflow in scu_reg_access() mm,thp: khugepaged: call pte flush at the time of collapse dump_stack: avoid potential deadlocks radix-tree: fix oops after radix_tree_iter_retry drivers/hwspinlock: fix race between radix tree insertion and lookup radix-tree: fix race in gang lookup MAINTAINERS: return arch/sh to maintained state, with new maintainers memcg: only free spare array when readers are done numa: fix /proc/<pid>/numa_maps for hugetlbfs on s390 fs/hugetlbfs/inode.c: fix bugs in hugetlb_vmtruncate_list() scripts/bloat-o-meter: fix python3 syntax error dma-debug: switch check from _text to _stext m32r: fix m32104ut_defconfig build fail xhci: Fix list corruption in urb dequeue at host removal Revert "xhci: don't finish a TD if we get a short-transfer event mid TD" iommu/vt-d: Clear PPR bit to ensure we get more page request interrupts iommu/vt-d: Fix 64-bit accesses to 32-bit DMAR_GSTS_REG iommu/vt-d: Fix mm refcounting to hold mm_count not mm_users iommu/amd: Correct the wrong setting of alias DTE in do_attach iommu/vt-d: Don't skip PCI devices when disabling IOTLB Input: vmmouse - fix absolute device registration string_helpers: fix precision loss for some inputs Input: i8042 - add Fujitsu Lifebook U745 to the nomux list Input: elantech - mark protocols v2 and v3 as semi-mt mm: fix regression in remap_file_pages() emulation mm: replace vma_lock_anon_vma with anon_vma_lock_read/write mm: fix mlock accouting libnvdimm: fix namespace object confusion in is_uuid_busy() mm: soft-offline: check return value in second __get_any_page() call perf kvm record/report: 'unprocessable sample' error while recording/reporting guest data KVM: PPC: Fix ONE_REG AltiVec support KVM: PPC: Fix emulation of H_SET_DABR/X on POWER8 KVM: arm/arm64: Fix reference to uninitialised VGIC arm64: dma-mapping: fix handling of devices registered before arch_initcall ARM: OMAP2+: Fix ppa_zero_params and ppa_por_params for rodata ARM: OMAP2+: Fix save_secure_ram_context for rodata ARM: OMAP2+: Fix l2dis_3630 for rodata ARM: OMAP2+: Fix l2_inv_api_params for rodata ARM: OMAP2+: Fix wait_dll_lock_timed for rodata ARM: dts: at91: sama5d4ek: add phy address and IRQ for macb0 ARM: dts: at91: sama5d4 xplained: fix phy0 IRQ type ARM: dts: at91: sama5d4: fix instance id of DBGU ARM: dts: at91: sama5d4 xplained: properly mux phy interrupt ARM: dts: omap5-board-common: enable rtc and charging of backup battery ARM: dts: Fix omap5 PMIC control lines for RTC writes ARM: dts: Fix wl12xx missing clocks that cause hangs ARM: nomadik: fix up SD/MMC DT settings ARM: 8517/1: ICST: avoid arithmetic overflow in icst_hz() ARM: 8519/1: ICST: try other dividends than 1 arm64: mm: avoid calling apply_to_page_range on empty range ARM: mvebu: remove duplicated regulator definition in Armada 388 GP powerpc/ioda: Set "read" permission when "write" is set powerpc/powernv: Fix stale PE primary bus powerpc/eeh: Fix stale cached primary bus powerpc/eeh: Fix PE location code SUNRPC: Fixup socket wait for memory udf: Check output buffer length when converting name to CS0 udf: Prevent buffer overrun with multi-byte characters udf: limit the maximum number of indirect extents in a row pNFS/flexfiles: Fix an XDR encoding bug in layoutreturn nfs: Fix race in __update_open_stateid() pNFS/flexfiles: Fix an Oopsable typo in ff_mirror_match_fh() NFS: Fix attribute cache revalidation cifs: fix erroneous return value cifs_dbg() outputs an uninitialized buffer in cifs_readdir() cifs: fix race between call_async() and reconnect() cifs: Ratelimit kernel log messages iio: inkern: fix a NULL dereference on error iio: pressure: mpl115: fix temperature offset sign iio: light: acpi-als: Report data as processed iio: dac: mcp4725: set iio name property in sysfs iio: add IIO_TRIGGER dependency to STK8BA50 iio: add HAS_IOMEM dependency to VF610_ADC iio-light: Use a signed return type for ltr501_match_samp_freq() iio:adc:ti_am335x_adc Fix buffered mode by identifying as software buffer. iio: adis_buffer: Fix out-of-bounds memory access scsi: fix soft lockup in scsi_remove_target() on module removal SCSI: Add Marvell Console to VPD blacklist scsi_dh_rdac: always retry MODE SELECT on command lock violation drivers/scsi/sg.c: mark VMA as VM_IO to prevent migration SCSI: fix crashes in sd and sr runtime PM iscsi-target: Fix potential dead-lock during node acl delete scsi: add Synology to 1024 sector blacklist klist: fix starting point removed bug in klist iterators tracepoints: Do not trace when cpu is offline tracing: Fix freak link error caused by branch tracer perf tools: tracepoint_error() can receive e=NULL, robustify it tools lib traceevent: Fix output of %llu for 64 bit values read on 32 bit machines ptrace: use fsuid, fsgid, effective creds for fs access checks Btrfs: fix direct IO requests not reporting IO error to user space Btrfs: fix hang on extent buffer lock caused by the inode_paths ioctl Btrfs: fix page reading in extent_same ioctl leading to csum errors Btrfs: fix invalid page accesses in extent_same (dedup) ioctl btrfs: properly set the termination value of ctx->pos in readdir Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()" Btrfs: fix fitrim discarding device area reserved for boot loader's use btrfs: handle invalid num_stripes in sys_array ext4: don't read blocks from disk after extents being swapped ext4: fix potential integer overflow ext4: fix scheduling in atomic on group checksum failure serial: omap: Prevent DoS using unprivileged ioctl(TIOCSRS485) serial: 8250_pci: Add Intel Broadwell ports tty: Add support for PCIe WCH382 2S multi-IO card pty: make sure super_block is still valid in final /dev/tty close pty: fix possible use after free of tty->driver_data staging/speakup: Use tty_ldisc_ref() for paste kworker phy: twl4030-usb: Fix unbalanced pm_runtime_enable on module reload phy: twl4030-usb: Relase usb phy on unload ALSA: seq: Fix double port list deletion ALSA: seq: Fix leak of pool buffer at concurrent writes ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream ALSA: hda - Cancel probe work instead of flush at remove x86/mm: Fix vmalloc_fault() to handle large pages properly x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache() x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable x86/mm/pat: Avoid truncation when converting cpa->numpages to address x86/mm: Fix types used in pgprot cacheability flags translations Linux 4.4.2 HID: multitouch: fix input mode switching on some Elan panels mm, vmstat: fix wrong WQ sleep when memory reclaim doesn't make any progress zsmalloc: fix migrate_zspage-zs_free race condition zram: don't call idr_remove() from zram_remove() zram: try vmalloc() after kmalloc() zram/zcomp: use GFP_NOIO to allocate streams rtlwifi: rtl8821ae: Fix 5G failure when EEPROM is incorrectly encoded rtlwifi: rtl8821ae: Fix errors in parameter initialization crypto: marvell/cesa - fix test in mv_cesa_dev_dma_init() crypto: atmel-sha - remove calls of clk_prepare() from atomic contexts crypto: atmel-sha - fix atmel_sha_remove() crypto: algif_skcipher - Do not set MAY_BACKLOG on the async path crypto: algif_skcipher - Do not dereference ctx without socket lock crypto: algif_skcipher - Do not assume that req is unchanged crypto: user - lock crypto_alg_list on alg dump EVM: Use crypto_memneq() for digest comparisons crypto: algif_hash - wait for crypto_ahash_init() to complete crypto: shash - Fix has_key setting crypto: chacha20-ssse3 - Align stack pointer to 64 bytes crypto: caam - make write transactions bufferable on PPC platforms crypto: algif_skcipher - sendmsg SG marking is off by one crypto: algif_skcipher - Load TX SG list after waiting crypto: crc32c - Fix crc32c soft dependency crypto: algif_skcipher - Fix race condition in skcipher_check_key crypto: algif_hash - Fix race condition in hash_check_key crypto: af_alg - Forbid bind(2) when nokey child sockets are present crypto: algif_skcipher - Remove custom release parent function crypto: algif_hash - Remove custom release parent function crypto: af_alg - Allow af_af_alg_release_parent to be called on nokey path ahci: Intel DNV device IDs SATA libata: disable forced PORTS_IMPL for >= AHCI 1.3 crypto: algif_skcipher - Add key check exception for cipher_null crypto: skcipher - Add crypto_skcipher_has_setkey crypto: algif_hash - Require setkey before accept(2) crypto: hash - Add crypto_ahash_has_setkey crypto: algif_skcipher - Add nokey compatibility path crypto: af_alg - Add nokey compatibility path crypto: af_alg - Fix socket double-free when accept fails crypto: af_alg - Disallow bind/setkey/... after accept(2) crypto: algif_skcipher - Require setkey before accept(2) sched: Fix crash in sched_init_numa() ext4 crypto: add missing locking for keyring_key access iommu/io-pgtable-arm: Ensure we free the final level on teardown tty: Fix unsafe ldisc reference via ioctl(TIOCGETD) tty: Retry failed reopen if tty teardown in-progress tty: Wait interruptibly for tty lock on reopen n_tty: Fix unsafe reference to "other" ldisc usb: xhci: apply XHCI_PME_STUCK_QUIRK to Intel Broxton-M platforms usb: xhci: handle both SSIC ports in PME stuck quirk usb: phy: msm: fix error handling in probe. usb: cdc-acm: send zero packet for intel 7260 modem usb: cdc-acm: handle unlinked urb in acm read callback USB: option: fix Cinterion AHxx enumeration USB: serial: option: Adding support for Telit LE922 USB: cp210x: add ID for IAI USB to RS485 adaptor USB: serial: ftdi_sio: add support for Yaesu SCU-18 cable usb: hub: do not clear BOS field during reset device USB: visor: fix null-deref at probe USB: serial: visor: fix crash on detecting device without write_urbs ASoC: rt5645: fix the shift bit of IN1 boost saa7134-alsa: Only frees registered sound cards ALSA: dummy: Implement timer backend switching more safely ALSA: hda - Fix bad dereference of jack object ALSA: hda - Fix speaker output from VAIO AiO machines Revert "ALSA: hda - Fix noise on Gigabyte Z170X mobo" ALSA: hda - Fix static checker warning in patch_hdmi.c ALSA: hda - Add fixup for Mac Mini 7,1 model ALSA: timer: Fix race between stop and interrupt ALSA: timer: Fix wrong instance passed to slave callbacks ALSA: timer: Fix race at concurrent reads ALSA: timer: Fix link corruption due to double start or stop ALSA: timer: Fix leftover link at closing ALSA: timer: Code cleanup ALSA: seq: Fix lockdep warnings due to double mutex locks ALSA: seq: Fix race at closing in virmidi driver ALSA: seq: Fix yet another races among ALSA timer accesses ASoC: dpcm: fix the BE state on hw_free ALSA: pcm: Fix potential deadlock in OSS emulation ALSA: hda/realtek - Support Dell headset mode for ALC225 ALSA: hda/realtek - Support headset mode for ALC225 ALSA: hda/realtek - New codec support of ALC225 ALSA: rawmidi: Fix race at copying & updating the position ALSA: rawmidi: Remove kernel WARNING for NULL user-space buffer check ALSA: rawmidi: Make snd_rawmidi_transmit() race-free ALSA: seq: Degrade the error message for too many opens ALSA: seq: Fix incorrect sanity check at snd_seq_oss_synth_cleanup() ALSA: dummy: Disable switching timer backend via sysfs ALSA: compress: Disable GET_CODEC_CAPS ioctl for some architectures ALSA: hda - disable dynamic clock gating on Broxton before reset ALSA: Add missing dependency on CONFIG_SND_TIMER ALSA: bebob: Use a signed return type for get_formation_index ALSA: usb-audio: avoid freeing umidi object twice ALSA: usb-audio: Add native DSD support for PS Audio NuWave DAC ALSA: usb-audio: Fix OPPO HA-1 vendor ID ALSA: usb-audio: Add quirk for Microsoft LifeCam HD-6000 ALSA: usb-audio: Fix TEAC UD-501/UD-503/NT-503 usb delay hrtimer: Handle remaining time proper for TIME_LOW_RES md/raid: only permit hot-add of compatible integrity profiles media: i2c: Don't export ir-kbd-i2c module alias parisc: Fix __ARCH_SI_PREAMBLE_SIZE parisc: Protect huge page pte changes with spinlocks printk: do cond_resched() between lines while outputting to consoles tracing/stacktrace: Show entire trace if passed in function not found tracing: Fix stacktrace skip depth in trace_buffer_unlock_commit_regs() PCI: Fix minimum allocation address overwrite PCI: host: Mark PCIe/PCI (MSI) IRQ cascade handlers as IRQF_NO_THREAD mtd: nand: assign reasonable default name for NAND drivers wlcore/wl12xx: spi: fix NULL pointer dereference (Oops) wlcore/wl12xx: spi: fix oops on firmware load ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup ocfs2/dlm: ignore cleaning the migration mle that is inuse ALSA: hda - Implement loopback control switch for Realtek and other codecs block: fix bio splitting on max sectors base/platform: Fix platform drivers with no probe callback HID: usbhid: fix recursive deadlock ocfs2: NFS hangs in __ocfs2_cluster_lock due to race with ocfs2_unblock_lock block: split bios to max possible length NFSv4.1/pnfs: Fixup an lo->plh_block_lgets imbalance in layoutreturn crypto: sun4i-ss - add missing statesize Linux 4.4.1 arm64: kernel: fix architected PMU registers unconditional access arm64: kernel: enforce pmuserenr_el0 initialization and restore arm64: mm: ensure that the zero page is visible to the page table walker arm64: Clear out any singlestep state on a ptrace detach operation powerpc/module: Handle R_PPC64_ENTRY relocations scripts/recordmcount.pl: support data in text section on powerpc powerpc: Make {cmp}xchg* and their atomic_ versions fully ordered powerpc: Make value-returning atomics fully ordered powerpc/tm: Check for already reclaimed tasks batman-adv: Drop immediate orig_node free function batman-adv: Drop immediate batadv_hard_iface free function batman-adv: Drop immediate neigh_ifinfo free function batman-adv: Drop immediate batadv_neigh_node free function batman-adv: Drop immediate batadv_orig_ifinfo free function batman-adv: Avoid recursive call_rcu for batadv_nc_node batman-adv: Avoid recursive call_rcu for batadv_bla_claim team: Replace rcu_read_lock with a mutex in team_vlan_rx_kill_vid net/mlx5_core: Fix trimming down IRQ number bridge: fix lockdep addr_list_lock false positive splat ipv6: update skb->csum when CE mark is propagated net: bpf: reject invalid shifts phonet: properly unshare skbs in phonet_rcv() dwc_eth_qos: Fix dma address for multi-fragment skbs bonding: Prevent IPv6 link local address on enslaved devices net: preserve IP control block during GSO segmentation udp: disallow UFO for sockets with SO_NO_CHECK option net: pktgen: fix null ptr deref in skb allocation sched,cls_flower: set key address type when present tcp_yeah: don't set ssthresh below 2 ipv6: tcp: add rcu locking in tcp_v6_send_synack() net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory vxlan: fix test which detect duplicate vxlan iface unix: properly account for FDs passed over unix sockets xhci: refuse loading if nousb is used usb: core: lpm: fix usb3_hardware_lpm sysfs node USB: cp210x: add ID for ELV Marble Sound Board 1 rtlwifi: fix memory leak for USB device ASoC: compress: Fix compress device direction check ASoC: wm5110: Fix PGA clear when disabling DRE ALSA: timer: Handle disconnection more safely ALSA: hda - Flush the pending probe work at remove ALSA: hda - Fix missing module loading with model=generic option ALSA: hda - Fix bass pin fixup for ASUS N550JX ALSA: control: Avoid kernel warnings from tlv ioctl with numid 0 ALSA: hrtimer: Fix stall by hrtimer_cancel() ALSA: pcm: Fix snd_pcm_hw_params struct copy in compat mode ALSA: seq: Fix snd_seq_call_port_info_ioctl in compat mode ALSA: hda - Add fixup for Dell Latitidue E6540 ALSA: timer: Fix double unlink of active_list ALSA: timer: Fix race among timer ioctls ALSA: hda - fix the headset mic detection problem for a Dell laptop ALSA: timer: Harden slave timer list handling ALSA: usb-audio: Fix mixer ctl regression of Native Instrument devices ALSA: hda - Fix white noise on Dell Latitude E5550 ALSA: seq: Fix race at timer setup and close ALSA: usb-audio: Avoid calling usb_autopm_put_interface() at disconnect ALSA: seq: Fix missing NULL check at remove_events ioctl ALSA: hda - Fixup inverted internal mic for Lenovo E50-80 ALSA: usb: Add native DSD support for Oppo HA-1 x86/mm: Improve switch_mm() barrier comments x86/mm: Add barriers and document switch_mm()-vs-flush synchronization x86/boot: Double BOOT_HEAP_SIZE to 64KB x86/reboot/quirks: Add iMac10,1 to pci_reboot_dmi_table[] kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL KVM: x86: correctly print #AC in traces KVM: x86: expose MSR_TSC_AUX to userspace x86/xen: don't reset vcpu_info on a cancelled suspend KEYS: Fix keyring ref leak in join_session_keyring() Conflicts: arch/arm64/kernel/perf_event.c drivers/scsi/sd.c sound/core/compress_offload.c Change-Id: I9f77fe42aaae249c24cd6e170202110ab1426878 Signed-off-by: Trilok Soni <tsoni@codeaurora.org>
795 lines
21 KiB
C
795 lines
21 KiB
C
/*
|
|
* linux/mm/mlock.c
|
|
*
|
|
* (C) Copyright 1995 Linus Torvalds
|
|
* (C) Copyright 2002 Christoph Hellwig
|
|
*/
|
|
|
|
#include <linux/capability.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/swapops.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/export.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/mm_inline.h>
|
|
|
|
#include "internal.h"
|
|
|
|
int can_do_mlock(void)
|
|
{
|
|
if (rlimit(RLIMIT_MEMLOCK) != 0)
|
|
return 1;
|
|
if (capable(CAP_IPC_LOCK))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(can_do_mlock);
|
|
|
|
/*
|
|
* Mlocked pages are marked with PageMlocked() flag for efficient testing
|
|
* in vmscan and, possibly, the fault path; and to support semi-accurate
|
|
* statistics.
|
|
*
|
|
* An mlocked page [PageMlocked(page)] is unevictable. As such, it will
|
|
* be placed on the LRU "unevictable" list, rather than the [in]active lists.
|
|
* The unevictable list is an LRU sibling list to the [in]active lists.
|
|
* PageUnevictable is set to indicate the unevictable state.
|
|
*
|
|
* When lazy mlocking via vmscan, it is important to ensure that the
|
|
* vma's VM_LOCKED status is not concurrently being modified, otherwise we
|
|
* may have mlocked a page that is being munlocked. So lazy mlock must take
|
|
* the mmap_sem for read, and verify that the vma really is locked
|
|
* (see mm/rmap.c).
|
|
*/
|
|
|
|
/*
|
|
* LRU accounting for clear_page_mlock()
|
|
*/
|
|
void clear_page_mlock(struct page *page)
|
|
{
|
|
if (!TestClearPageMlocked(page))
|
|
return;
|
|
|
|
mod_zone_page_state(page_zone(page), NR_MLOCK,
|
|
-hpage_nr_pages(page));
|
|
count_vm_event(UNEVICTABLE_PGCLEARED);
|
|
if (!isolate_lru_page(page)) {
|
|
putback_lru_page(page);
|
|
} else {
|
|
/*
|
|
* We lost the race. the page already moved to evictable list.
|
|
*/
|
|
if (PageUnevictable(page))
|
|
count_vm_event(UNEVICTABLE_PGSTRANDED);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Mark page as mlocked if not already.
|
|
* If page on LRU, isolate and putback to move to unevictable list.
|
|
*/
|
|
void mlock_vma_page(struct page *page)
|
|
{
|
|
/* Serialize with page migration */
|
|
BUG_ON(!PageLocked(page));
|
|
|
|
if (!TestSetPageMlocked(page)) {
|
|
mod_zone_page_state(page_zone(page), NR_MLOCK,
|
|
hpage_nr_pages(page));
|
|
count_vm_event(UNEVICTABLE_PGMLOCKED);
|
|
if (!isolate_lru_page(page))
|
|
putback_lru_page(page);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Isolate a page from LRU with optional get_page() pin.
|
|
* Assumes lru_lock already held and page already pinned.
|
|
*/
|
|
static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
|
|
{
|
|
if (PageLRU(page)) {
|
|
struct lruvec *lruvec;
|
|
|
|
lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
|
|
if (getpage)
|
|
get_page(page);
|
|
ClearPageLRU(page);
|
|
del_page_from_lru_list(page, lruvec, page_lru(page));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Finish munlock after successful page isolation
|
|
*
|
|
* Page must be locked. This is a wrapper for try_to_munlock()
|
|
* and putback_lru_page() with munlock accounting.
|
|
*/
|
|
static void __munlock_isolated_page(struct page *page)
|
|
{
|
|
int ret = SWAP_AGAIN;
|
|
|
|
/*
|
|
* Optimization: if the page was mapped just once, that's our mapping
|
|
* and we don't need to check all the other vmas.
|
|
*/
|
|
if (page_mapcount(page) > 1)
|
|
ret = try_to_munlock(page);
|
|
|
|
/* Did try_to_unlock() succeed or punt? */
|
|
if (ret != SWAP_MLOCK)
|
|
count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
|
|
|
putback_lru_page(page);
|
|
}
|
|
|
|
/*
|
|
* Accounting for page isolation fail during munlock
|
|
*
|
|
* Performs accounting when page isolation fails in munlock. There is nothing
|
|
* else to do because it means some other task has already removed the page
|
|
* from the LRU. putback_lru_page() will take care of removing the page from
|
|
* the unevictable list, if necessary. vmscan [page_referenced()] will move
|
|
* the page back to the unevictable list if some other vma has it mlocked.
|
|
*/
|
|
static void __munlock_isolation_failed(struct page *page)
|
|
{
|
|
if (PageUnevictable(page))
|
|
__count_vm_event(UNEVICTABLE_PGSTRANDED);
|
|
else
|
|
__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
|
}
|
|
|
|
/**
|
|
* munlock_vma_page - munlock a vma page
|
|
* @page - page to be unlocked, either a normal page or THP page head
|
|
*
|
|
* returns the size of the page as a page mask (0 for normal page,
|
|
* HPAGE_PMD_NR - 1 for THP head page)
|
|
*
|
|
* called from munlock()/munmap() path with page supposedly on the LRU.
|
|
* When we munlock a page, because the vma where we found the page is being
|
|
* munlock()ed or munmap()ed, we want to check whether other vmas hold the
|
|
* page locked so that we can leave it on the unevictable lru list and not
|
|
* bother vmscan with it. However, to walk the page's rmap list in
|
|
* try_to_munlock() we must isolate the page from the LRU. If some other
|
|
* task has removed the page from the LRU, we won't be able to do that.
|
|
* So we clear the PageMlocked as we might not get another chance. If we
|
|
* can't isolate the page, we leave it for putback_lru_page() and vmscan
|
|
* [page_referenced()/try_to_unmap()] to deal with.
|
|
*/
|
|
unsigned int munlock_vma_page(struct page *page)
|
|
{
|
|
int nr_pages;
|
|
struct zone *zone = page_zone(page);
|
|
|
|
/* For try_to_munlock() and to serialize with page migration */
|
|
BUG_ON(!PageLocked(page));
|
|
|
|
/*
|
|
* Serialize with any parallel __split_huge_page_refcount() which
|
|
* might otherwise copy PageMlocked to part of the tail pages before
|
|
* we clear it in the head page. It also stabilizes hpage_nr_pages().
|
|
*/
|
|
spin_lock_irq(&zone->lru_lock);
|
|
|
|
nr_pages = hpage_nr_pages(page);
|
|
if (!TestClearPageMlocked(page))
|
|
goto unlock_out;
|
|
|
|
__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
|
|
|
|
if (__munlock_isolate_lru_page(page, true)) {
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
__munlock_isolated_page(page);
|
|
goto out;
|
|
}
|
|
__munlock_isolation_failed(page);
|
|
|
|
unlock_out:
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
|
|
out:
|
|
return nr_pages - 1;
|
|
}
|
|
|
|
/*
|
|
* convert get_user_pages() return value to posix mlock() error
|
|
*/
|
|
static int __mlock_posix_error_return(long retval)
|
|
{
|
|
if (retval == -EFAULT)
|
|
retval = -ENOMEM;
|
|
else if (retval == -ENOMEM)
|
|
retval = -EAGAIN;
|
|
return retval;
|
|
}
|
|
|
|
/*
|
|
* Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
|
|
*
|
|
* The fast path is available only for evictable pages with single mapping.
|
|
* Then we can bypass the per-cpu pvec and get better performance.
|
|
* when mapcount > 1 we need try_to_munlock() which can fail.
|
|
* when !page_evictable(), we need the full redo logic of putback_lru_page to
|
|
* avoid leaving evictable page in unevictable list.
|
|
*
|
|
* In case of success, @page is added to @pvec and @pgrescued is incremented
|
|
* in case that the page was previously unevictable. @page is also unlocked.
|
|
*/
|
|
static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
|
|
int *pgrescued)
|
|
{
|
|
VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
|
|
if (page_mapcount(page) <= 1 && page_evictable(page)) {
|
|
pagevec_add(pvec, page);
|
|
if (TestClearPageUnevictable(page))
|
|
(*pgrescued)++;
|
|
unlock_page(page);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Putback multiple evictable pages to the LRU
|
|
*
|
|
* Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
|
|
* the pages might have meanwhile become unevictable but that is OK.
|
|
*/
|
|
static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
|
|
{
|
|
count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
|
|
/*
|
|
*__pagevec_lru_add() calls release_pages() so we don't call
|
|
* put_page() explicitly
|
|
*/
|
|
__pagevec_lru_add(pvec);
|
|
count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
|
|
}
|
|
|
|
/*
|
|
* Munlock a batch of pages from the same zone
|
|
*
|
|
* The work is split to two main phases. First phase clears the Mlocked flag
|
|
* and attempts to isolate the pages, all under a single zone lru lock.
|
|
* The second phase finishes the munlock only for pages where isolation
|
|
* succeeded.
|
|
*
|
|
* Note that the pagevec may be modified during the process.
|
|
*/
|
|
static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
|
{
|
|
int i;
|
|
int nr = pagevec_count(pvec);
|
|
int delta_munlocked;
|
|
struct pagevec pvec_putback;
|
|
int pgrescued = 0;
|
|
|
|
pagevec_init(&pvec_putback, 0);
|
|
|
|
/* Phase 1: page isolation */
|
|
spin_lock_irq(&zone->lru_lock);
|
|
for (i = 0; i < nr; i++) {
|
|
struct page *page = pvec->pages[i];
|
|
|
|
if (TestClearPageMlocked(page)) {
|
|
/*
|
|
* We already have pin from follow_page_mask()
|
|
* so we can spare the get_page() here.
|
|
*/
|
|
if (__munlock_isolate_lru_page(page, false))
|
|
continue;
|
|
else
|
|
__munlock_isolation_failed(page);
|
|
}
|
|
|
|
/*
|
|
* We won't be munlocking this page in the next phase
|
|
* but we still need to release the follow_page_mask()
|
|
* pin. We cannot do it under lru_lock however. If it's
|
|
* the last pin, __page_cache_release() would deadlock.
|
|
*/
|
|
pagevec_add(&pvec_putback, pvec->pages[i]);
|
|
pvec->pages[i] = NULL;
|
|
}
|
|
delta_munlocked = -nr + pagevec_count(&pvec_putback);
|
|
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
|
|
/* Now we can release pins of pages that we are not munlocking */
|
|
pagevec_release(&pvec_putback);
|
|
|
|
/* Phase 2: page munlock */
|
|
for (i = 0; i < nr; i++) {
|
|
struct page *page = pvec->pages[i];
|
|
|
|
if (page) {
|
|
lock_page(page);
|
|
if (!__putback_lru_fast_prepare(page, &pvec_putback,
|
|
&pgrescued)) {
|
|
/*
|
|
* Slow path. We don't want to lose the last
|
|
* pin before unlock_page()
|
|
*/
|
|
get_page(page); /* for putback_lru_page() */
|
|
__munlock_isolated_page(page);
|
|
unlock_page(page);
|
|
put_page(page); /* from follow_page_mask() */
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Phase 3: page putback for pages that qualified for the fast path
|
|
* This will also call put_page() to return pin from follow_page_mask()
|
|
*/
|
|
if (pagevec_count(&pvec_putback))
|
|
__putback_lru_fast(&pvec_putback, pgrescued);
|
|
}
|
|
|
|
/*
|
|
* Fill up pagevec for __munlock_pagevec using pte walk
|
|
*
|
|
* The function expects that the struct page corresponding to @start address is
|
|
* a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
|
|
*
|
|
* The rest of @pvec is filled by subsequent pages within the same pmd and same
|
|
* zone, as long as the pte's are present and vm_normal_page() succeeds. These
|
|
* pages also get pinned.
|
|
*
|
|
* Returns the address of the next page that should be scanned. This equals
|
|
* @start + PAGE_SIZE when no page could be added by the pte walk.
|
|
*/
|
|
static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
|
|
struct vm_area_struct *vma, int zoneid, unsigned long start,
|
|
unsigned long end)
|
|
{
|
|
pte_t *pte;
|
|
spinlock_t *ptl;
|
|
|
|
/*
|
|
* Initialize pte walk starting at the already pinned page where we
|
|
* are sure that there is a pte, as it was pinned under the same
|
|
* mmap_sem write op.
|
|
*/
|
|
pte = get_locked_pte(vma->vm_mm, start, &ptl);
|
|
/* Make sure we do not cross the page table boundary */
|
|
end = pgd_addr_end(start, end);
|
|
end = pud_addr_end(start, end);
|
|
end = pmd_addr_end(start, end);
|
|
|
|
/* The page next to the pinned page is the first we will try to get */
|
|
start += PAGE_SIZE;
|
|
while (start < end) {
|
|
struct page *page = NULL;
|
|
pte++;
|
|
if (pte_present(*pte))
|
|
page = vm_normal_page(vma, start, *pte);
|
|
/*
|
|
* Break if page could not be obtained or the page's node+zone does not
|
|
* match
|
|
*/
|
|
if (!page || page_zone_id(page) != zoneid)
|
|
break;
|
|
|
|
get_page(page);
|
|
/*
|
|
* Increase the address that will be returned *before* the
|
|
* eventual break due to pvec becoming full by adding the page
|
|
*/
|
|
start += PAGE_SIZE;
|
|
if (pagevec_add(pvec, page) == 0)
|
|
break;
|
|
}
|
|
pte_unmap_unlock(pte, ptl);
|
|
return start;
|
|
}
|
|
|
|
/*
|
|
* munlock_vma_pages_range() - munlock all pages in the vma range.'
|
|
* @vma - vma containing range to be munlock()ed.
|
|
* @start - start address in @vma of the range
|
|
* @end - end of range in @vma.
|
|
*
|
|
* For mremap(), munmap() and exit().
|
|
*
|
|
* Called with @vma VM_LOCKED.
|
|
*
|
|
* Returns with VM_LOCKED cleared. Callers must be prepared to
|
|
* deal with this.
|
|
*
|
|
* We don't save and restore VM_LOCKED here because pages are
|
|
* still on lru. In unmap path, pages might be scanned by reclaim
|
|
* and re-mlocked by try_to_{munlock|unmap} before we unmap and
|
|
* free them. This will result in freeing mlocked pages.
|
|
*/
|
|
void munlock_vma_pages_range(struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
|
|
|
|
while (start < end) {
|
|
struct page *page = NULL;
|
|
unsigned int page_mask;
|
|
unsigned long page_increm;
|
|
struct pagevec pvec;
|
|
struct zone *zone;
|
|
int zoneid;
|
|
|
|
pagevec_init(&pvec, 0);
|
|
/*
|
|
* Although FOLL_DUMP is intended for get_dump_page(),
|
|
* it just so happens that its special treatment of the
|
|
* ZERO_PAGE (returning an error instead of doing get_page)
|
|
* suits munlock very well (and if somehow an abnormal page
|
|
* has sneaked into the range, we won't oops here: great).
|
|
*/
|
|
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
|
&page_mask);
|
|
|
|
if (page && !IS_ERR(page)) {
|
|
if (PageTransHuge(page)) {
|
|
lock_page(page);
|
|
/*
|
|
* Any THP page found by follow_page_mask() may
|
|
* have gotten split before reaching
|
|
* munlock_vma_page(), so we need to recompute
|
|
* the page_mask here.
|
|
*/
|
|
page_mask = munlock_vma_page(page);
|
|
unlock_page(page);
|
|
put_page(page); /* follow_page_mask() */
|
|
} else {
|
|
/*
|
|
* Non-huge pages are handled in batches via
|
|
* pagevec. The pin from follow_page_mask()
|
|
* prevents them from collapsing by THP.
|
|
*/
|
|
pagevec_add(&pvec, page);
|
|
zone = page_zone(page);
|
|
zoneid = page_zone_id(page);
|
|
|
|
/*
|
|
* Try to fill the rest of pagevec using fast
|
|
* pte walk. This will also update start to
|
|
* the next page to process. Then munlock the
|
|
* pagevec.
|
|
*/
|
|
start = __munlock_pagevec_fill(&pvec, vma,
|
|
zoneid, start, end);
|
|
__munlock_pagevec(&pvec, zone);
|
|
goto next;
|
|
}
|
|
}
|
|
/* It's a bug to munlock in the middle of a THP page */
|
|
VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
|
|
page_increm = 1 + page_mask;
|
|
start += page_increm * PAGE_SIZE;
|
|
next:
|
|
cond_resched();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* mlock_fixup - handle mlock[all]/munlock[all] requests.
|
|
*
|
|
* Filters out "special" vmas -- VM_LOCKED never gets set for these, and
|
|
* munlock is a no-op. However, for some special vmas, we go ahead and
|
|
* populate the ptes.
|
|
*
|
|
* For vmas that pass the filters, merge/split as appropriate.
|
|
*/
|
|
static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
|
|
unsigned long start, unsigned long end, vm_flags_t newflags)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
pgoff_t pgoff;
|
|
int nr_pages;
|
|
int ret = 0;
|
|
int lock = !!(newflags & VM_LOCKED);
|
|
|
|
if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
|
|
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
|
|
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
|
|
goto out;
|
|
|
|
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
|
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
|
|
vma->vm_file, pgoff, vma_policy(vma),
|
|
vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
|
|
if (*prev) {
|
|
vma = *prev;
|
|
goto success;
|
|
}
|
|
|
|
if (start != vma->vm_start) {
|
|
ret = split_vma(mm, vma, start, 1);
|
|
if (ret)
|
|
goto out;
|
|
}
|
|
|
|
if (end != vma->vm_end) {
|
|
ret = split_vma(mm, vma, end, 0);
|
|
if (ret)
|
|
goto out;
|
|
}
|
|
|
|
success:
|
|
/*
|
|
* Keep track of amount of locked VM.
|
|
*/
|
|
nr_pages = (end - start) >> PAGE_SHIFT;
|
|
if (!lock)
|
|
nr_pages = -nr_pages;
|
|
mm->locked_vm += nr_pages;
|
|
|
|
/*
|
|
* vm_flags is protected by the mmap_sem held in write mode.
|
|
* It's okay if try_to_unmap_one unmaps a page just after we
|
|
* set VM_LOCKED, populate_vma_page_range will bring it back.
|
|
*/
|
|
|
|
if (lock)
|
|
vma->vm_flags = newflags;
|
|
else
|
|
munlock_vma_pages_range(vma, start, end);
|
|
|
|
out:
|
|
*prev = vma;
|
|
return ret;
|
|
}
|
|
|
|
static int apply_vma_lock_flags(unsigned long start, size_t len,
|
|
vm_flags_t flags)
|
|
{
|
|
unsigned long nstart, end, tmp;
|
|
struct vm_area_struct * vma, * prev;
|
|
int error;
|
|
|
|
VM_BUG_ON(offset_in_page(start));
|
|
VM_BUG_ON(len != PAGE_ALIGN(len));
|
|
end = start + len;
|
|
if (end < start)
|
|
return -EINVAL;
|
|
if (end == start)
|
|
return 0;
|
|
vma = find_vma(current->mm, start);
|
|
if (!vma || vma->vm_start > start)
|
|
return -ENOMEM;
|
|
|
|
prev = vma->vm_prev;
|
|
if (start > vma->vm_start)
|
|
prev = vma;
|
|
|
|
for (nstart = start ; ; ) {
|
|
vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
|
|
|
|
newflags |= flags;
|
|
|
|
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
|
tmp = vma->vm_end;
|
|
if (tmp > end)
|
|
tmp = end;
|
|
error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
|
|
if (error)
|
|
break;
|
|
nstart = tmp;
|
|
if (nstart < prev->vm_end)
|
|
nstart = prev->vm_end;
|
|
if (nstart >= end)
|
|
break;
|
|
|
|
vma = prev->vm_next;
|
|
if (!vma || vma->vm_start != nstart) {
|
|
error = -ENOMEM;
|
|
break;
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
static int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
|
|
{
|
|
unsigned long locked;
|
|
unsigned long lock_limit;
|
|
int error = -ENOMEM;
|
|
|
|
if (!can_do_mlock())
|
|
return -EPERM;
|
|
|
|
lru_add_drain_all(); /* flush pagevec */
|
|
|
|
len = PAGE_ALIGN(len + (offset_in_page(start)));
|
|
start &= PAGE_MASK;
|
|
|
|
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
|
lock_limit >>= PAGE_SHIFT;
|
|
locked = len >> PAGE_SHIFT;
|
|
|
|
down_write(¤t->mm->mmap_sem);
|
|
|
|
locked += current->mm->locked_vm;
|
|
|
|
/* check against resource limits */
|
|
if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
|
|
error = apply_vma_lock_flags(start, len, flags);
|
|
|
|
up_write(¤t->mm->mmap_sem);
|
|
if (error)
|
|
return error;
|
|
|
|
error = __mm_populate(start, len, 0);
|
|
if (error)
|
|
return __mlock_posix_error_return(error);
|
|
return 0;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
|
|
{
|
|
return do_mlock(start, len, VM_LOCKED);
|
|
}
|
|
|
|
SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
|
|
{
|
|
vm_flags_t vm_flags = VM_LOCKED;
|
|
|
|
if (flags & ~MLOCK_ONFAULT)
|
|
return -EINVAL;
|
|
|
|
if (flags & MLOCK_ONFAULT)
|
|
vm_flags |= VM_LOCKONFAULT;
|
|
|
|
return do_mlock(start, len, vm_flags);
|
|
}
|
|
|
|
SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
|
|
{
|
|
int ret;
|
|
|
|
len = PAGE_ALIGN(len + (offset_in_page(start)));
|
|
start &= PAGE_MASK;
|
|
|
|
down_write(¤t->mm->mmap_sem);
|
|
ret = apply_vma_lock_flags(start, len, 0);
|
|
up_write(¤t->mm->mmap_sem);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
|
|
* and translate into the appropriate modifications to mm->def_flags and/or the
|
|
* flags for all current VMAs.
|
|
*
|
|
* There are a couple of subtleties with this. If mlockall() is called multiple
|
|
* times with different flags, the values do not necessarily stack. If mlockall
|
|
* is called once including the MCL_FUTURE flag and then a second time without
|
|
* it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
|
|
*/
|
|
static int apply_mlockall_flags(int flags)
|
|
{
|
|
struct vm_area_struct * vma, * prev = NULL;
|
|
vm_flags_t to_add = 0;
|
|
|
|
current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
|
|
if (flags & MCL_FUTURE) {
|
|
current->mm->def_flags |= VM_LOCKED;
|
|
|
|
if (flags & MCL_ONFAULT)
|
|
current->mm->def_flags |= VM_LOCKONFAULT;
|
|
|
|
if (!(flags & MCL_CURRENT))
|
|
goto out;
|
|
}
|
|
|
|
if (flags & MCL_CURRENT) {
|
|
to_add |= VM_LOCKED;
|
|
if (flags & MCL_ONFAULT)
|
|
to_add |= VM_LOCKONFAULT;
|
|
}
|
|
|
|
for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
|
|
vm_flags_t newflags;
|
|
|
|
newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
|
|
newflags |= to_add;
|
|
|
|
/* Ignore errors */
|
|
mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
|
|
cond_resched_rcu_qs();
|
|
}
|
|
out:
|
|
return 0;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(mlockall, int, flags)
|
|
{
|
|
unsigned long lock_limit;
|
|
int ret;
|
|
|
|
if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
|
|
return -EINVAL;
|
|
|
|
if (!can_do_mlock())
|
|
return -EPERM;
|
|
|
|
if (flags & MCL_CURRENT)
|
|
lru_add_drain_all(); /* flush pagevec */
|
|
|
|
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
|
lock_limit >>= PAGE_SHIFT;
|
|
|
|
ret = -ENOMEM;
|
|
down_write(¤t->mm->mmap_sem);
|
|
|
|
if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
|
|
capable(CAP_IPC_LOCK))
|
|
ret = apply_mlockall_flags(flags);
|
|
up_write(¤t->mm->mmap_sem);
|
|
if (!ret && (flags & MCL_CURRENT))
|
|
mm_populate(0, TASK_SIZE);
|
|
|
|
return ret;
|
|
}
|
|
|
|
SYSCALL_DEFINE0(munlockall)
|
|
{
|
|
int ret;
|
|
|
|
down_write(¤t->mm->mmap_sem);
|
|
ret = apply_mlockall_flags(0);
|
|
up_write(¤t->mm->mmap_sem);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
|
|
* shm segments) get accounted against the user_struct instead.
|
|
*/
|
|
static DEFINE_SPINLOCK(shmlock_user_lock);
|
|
|
|
int user_shm_lock(size_t size, struct user_struct *user)
|
|
{
|
|
unsigned long lock_limit, locked;
|
|
int allowed = 0;
|
|
|
|
locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
|
if (lock_limit == RLIM_INFINITY)
|
|
allowed = 1;
|
|
lock_limit >>= PAGE_SHIFT;
|
|
spin_lock(&shmlock_user_lock);
|
|
if (!allowed &&
|
|
locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
|
|
goto out;
|
|
get_uid(user);
|
|
user->locked_shm += locked;
|
|
allowed = 1;
|
|
out:
|
|
spin_unlock(&shmlock_user_lock);
|
|
return allowed;
|
|
}
|
|
|
|
void user_shm_unlock(size_t size, struct user_struct *user)
|
|
{
|
|
spin_lock(&shmlock_user_lock);
|
|
user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
spin_unlock(&shmlock_user_lock);
|
|
free_uid(user);
|
|
}
|