android_kernel_oneplus_msm8998/mm/backing-dev.c
Srinivasarao P 81a6413ed7 Merge android-4.4.127 (d6bbe8b) into msm-4.4
* refs/heads/tmp-d6bbe8b
  Linux 4.4.127
  Revert "ip6_vti: adjust vti mtu according to mtu of lower device"
  net: cavium: liquidio: fix up "Avoid dma_unmap_single on uninitialized ndata"
  spi: davinci: fix up dma_mapping_error() incorrect patch
  Revert "mtip32xx: use runtime tag to initialize command header"
  Revert "cpufreq: Fix governor module removal race"
  Revert "ARM: dts: omap3-n900: Fix the audio CODEC's reset pin"
  Revert "ARM: dts: am335x-pepper: Fix the audio CODEC's reset pin"
  Revert "PCI/MSI: Stop disabling MSI/MSI-X in pci_device_shutdown()"
  nospec: Kill array_index_nospec_mask_check()
  nospec: Move array_index_nospec() parameter checking into separate macro
  net: hns: Fix ethtool private flags
  md/raid10: reset the 'first' at the end of loop
  ARM: dts: am57xx-beagle-x15-common: Add overide powerhold property
  ARM: dts: dra7: Add power hold and power controller properties to palmas
  Documentation: pinctrl: palmas: Add ti,palmas-powerhold-override property definition
  vt: change SGR 21 to follow the standards
  Input: i8042 - enable MUX on Sony VAIO VGN-CS series to fix touchpad
  Input: i8042 - add Lenovo ThinkPad L460 to i8042 reset list
  staging: comedi: ni_mio_common: ack ai fifo error interrupts.
  fs/proc: Stop trying to report thread stacks
  crypto: x86/cast5-avx - fix ECB encryption when long sg follows short one
  crypto: ahash - Fix early termination in hash walk
  parport_pc: Add support for WCH CH382L PCI-E single parallel port card.
  media: usbtv: prevent double free in error case
  mei: remove dev_err message on an unsupported ioctl
  USB: serial: cp210x: add ELDAT Easywave RX09 id
  USB: serial: ftdi_sio: add support for Harman FirmwareHubEmulator
  USB: serial: ftdi_sio: add RT Systems VX-8 cable
  usb: dwc2: Improve gadget state disconnection handling
  scsi: virtio_scsi: always read VPD pages for multiqueue too
  llist: clang: introduce member_address_is_nonnull()
  Bluetooth: Fix missing encryption refresh on Security Request
  netfilter: x_tables: add and use xt_check_proc_name
  netfilter: bridge: ebt_among: add more missing match size checks
  xfrm: Refuse to insert 32 bit userspace socket policies on 64 bit systems
  net: xfrm: use preempt-safe this_cpu_read() in ipcomp_alloc_tfms()
  RDMA/ucma: Introduce safer rdma_addr_size() variants
  RDMA/ucma: Don't allow join attempts for unsupported AF family
  RDMA/ucma: Check that device exists prior to accessing it
  RDMA/ucma: Check that device is connected prior to access it
  RDMA/ucma: Ensure that CM_ID exists prior to access it
  RDMA/ucma: Fix use-after-free access in ucma_close
  RDMA/ucma: Check AF family prior resolving address
  xfrm_user: uncoditionally validate esn replay attribute struct
  arm64: avoid overflow in VA_START and PAGE_OFFSET
  selinux: Remove redundant check for unknown labeling behavior
  netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch
  tty: provide tty_name() even without CONFIG_TTY
  audit: add tty field to LOGIN event
  frv: declare jiffies to be located in the .data section
  jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp
  fs: compat: Remove warning from COMPATIBLE_IOCTL
  selinux: Remove unnecessary check of array base in selinux_set_mapping()
  cpumask: Add helper cpumask_available()
  genirq: Use cpumask_available() for check of cpumask variable
  netfilter: nf_nat_h323: fix logical-not-parentheses warning
  Input: mousedev - fix implicit conversion warning
  dm ioctl: remove double parentheses
  PCI: Make PCI_ROM_ADDRESS_MASK a 32-bit constant
  writeback: fix the wrong congested state variable definition
  ACPI, PCI, irq: remove redundant check for null string pointer
  kprobes/x86: Fix to set RWX bits correctly before releasing trampoline
  usb: gadget: f_hid: fix: Prevent accessing released memory
  usb: gadget: align buffer size when allocating for OUT endpoint
  usb: gadget: fix usb_ep_align_maybe endianness and new usb_ep_align
  usb: gadget: change len to size_t on alloc_ep_req()
  usb: gadget: define free_ep_req as universal function
  partitions/msdos: Unable to mount UFS 44bsd partitions
  perf/hwbp: Simplify the perf-hwbp code, fix documentation
  ALSA: pcm: potential uninitialized return values
  ALSA: pcm: Use dma_bytes as size parameter in dma_mmap_coherent()
  mtd: jedec_probe: Fix crash in jedec_read_mfr()
  Replace #define with enum for better compilation errors.
  Add missing include to drivers/tty/goldfish.c
  Fix whitespace in drivers/tty/goldfish.c
  ANDROID: fuse: Add null terminator to path in canonical path to avoid issue
  ANDROID: sdcardfs: Fix sdcardfs to stop creating cases-sensitive duplicate entries.
  ANDROID: add missing include to pdev_bus
  ANDROID: pdev_bus: replace writel with gf_write_ptr
  ANDROID: Cleanup type casting in goldfish.h
  ANDROID: Include missing headers in goldfish.h
  ANDROID: cpufreq: times: skip printing invalid frequencies
  ANDROID: xt_qtaguid: Remove unnecessary null checks to device's name
  ANDROID: xt_qtaguid: Remove unnecessary null checks to ifa_label
  ANDROID: cpufreq: times: allocate enough space for a uid_entry
  Linux 4.4.126
  net: systemport: Rewrite __bcm_sysport_tx_reclaim()
  net: fec: Fix unbalanced PM runtime calls
  ieee802154: 6lowpan: fix possible NULL deref in lowpan_device_event()
  s390/qeth: on channel error, reject further cmd requests
  s390/qeth: lock read device while queueing next buffer
  s390/qeth: when thread completes, wake up all waiters
  s390/qeth: free netdevice when removing a card
  team: Fix double free in error path
  skbuff: Fix not waking applications when errors are enqueued
  net: Only honor ifindex in IP_PKTINFO if non-0
  netlink: avoid a double skb free in genlmsg_mcast()
  net/iucv: Free memory obtained by kzalloc
  net: ethernet: ti: cpsw: add check for in-band mode setting with RGMII PHY interface
  net: ethernet: arc: Fix a potential memory leak if an optional regulator is deferred
  l2tp: do not accept arbitrary sockets
  ipv6: fix access to non-linear packet in ndisc_fill_redirect_hdr_option()
  dccp: check sk for closed state in dccp_sendmsg()
  net: Fix hlist corruptions in inet_evict_bucket()
  Revert "genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs"
  scsi: sg: don't return bogus Sg_requests
  Revert "genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs"
  UPSTREAM: drm: virtio-gpu: set atomic flag
  UPSTREAM: drm: virtio-gpu: transfer dumb buffers to host on plane update
  UPSTREAM: drm: virtio-gpu: ensure plane is flushed to host on atomic update
  UPSTREAM: drm: virtio-gpu: get the fb from the plane state for atomic updates
  Linux 4.4.125
  bpf, x64: increase number of passes
  bpf: skip unnecessary capability check
  kbuild: disable clang's default use of -fmerge-all-constants
  staging: lustre: ptlrpc: kfree used instead of kvfree
  perf/x86/intel: Don't accidentally clear high bits in bdw_limit_period()
  x86/entry/64: Don't use IST entry for #BP stack
  x86/boot/64: Verify alignment of the LOAD segment
  x86/build/64: Force the linker to use 2MB page size
  kvm/x86: fix icebp instruction handling
  tty: vt: fix up tabstops properly
  can: cc770: Fix use after free in cc770_tx_interrupt()
  can: cc770: Fix queue stall & dropped RTR reply
  can: cc770: Fix stalls on rt-linux, remove redundant IRQ ack
  staging: ncpfs: memory corruption in ncp_read_kernel()
  mtd: nand: fsl_ifc: Fix nand waitfunc return value
  tracing: probeevent: Fix to support minus offset from symbol
  rtlwifi: rtl8723be: Fix loss of signal
  brcmfmac: fix P2P_DEVICE ethernet address generation
  acpi, numa: fix pxm to online numa node associations
  drm: udl: Properly check framebuffer mmap offsets
  drm/radeon: Don't turn off DP sink when disconnected
  drm/vmwgfx: Fix a destoy-while-held mutex problem.
  x86/mm: implement free pmd/pte page interfaces
  mm/vmalloc: add interfaces to free unmapped page table
  libata: Modify quirks for MX100 to limit NCQ_TRIM quirk to MU01 version
  libata: Make Crucial BX100 500GB LPM quirk apply to all firmware versions
  libata: Apply NOLPM quirk to Crucial M500 480 and 960GB SSDs
  libata: Enable queued TRIM for Samsung SSD 860
  libata: disable LPM for Crucial BX100 SSD 500GB drive
  libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs
  libata: remove WARN() for DMA or PIO command without data
  libata: fix length validation of ATAPI-relayed SCSI commands
  Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174
  clk: bcm2835: Protect sections updating shared registers
  ahci: Add PCI-id for the Highpoint Rocketraid 644L card
  PCI: Add function 1 DMA alias quirk for Highpoint RocketRAID 644L
  mmc: dw_mmc: fix falling from idmac to PIO mode when dw_mci_reset occurs
  ALSA: hda/realtek - Always immediately update mute LED with pin VREF
  ALSA: aloop: Fix access to not-yet-ready substream via cable
  ALSA: aloop: Sync stale timer before release
  ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit
  iio: st_pressure: st_accel: pass correct platform data to init
  MIPS: ralink: Remove ralink_halt()
  ANDROID: cpufreq: times: fix proc_time_in_state_show
  dtc: turn off dtc unit address warnings by default
  Linux 4.4.124
  RDMA/ucma: Fix access to non-initialized CM_ID object
  dmaengine: ti-dma-crossbar: Fix event mapping for TPCC_EVT_MUX_60_63
  clk: si5351: Rename internal plls to avoid name collisions
  nfsd4: permit layoutget of executable-only files
  RDMA/ocrdma: Fix permissions for OCRDMA_RESET_STATS
  ip6_vti: adjust vti mtu according to mtu of lower device
  iommu/vt-d: clean up pr_irq if request_threaded_irq fails
  pinctrl: Really force states during suspend/resume
  coresight: Fix disabling of CoreSight TPIU
  pty: cancel pty slave port buf's work in tty_release
  drm/omap: DMM: Check for DMM readiness after successful transaction commit
  vgacon: Set VGA struct resource types
  IB/umem: Fix use of npages/nmap fields
  RDMA/cma: Use correct size when writing netlink stats
  IB/ipoib: Avoid memory leak if the SA returns a different DGID
  mmc: avoid removing non-removable hosts during suspend
  platform/chrome: Use proper protocol transfer function
  cros_ec: fix nul-termination for firmware build info
  media: [RESEND] media: dvb-frontends: Add delay to Si2168 restart
  media: bt8xx: Fix err 'bt878_probe()'
  rtlwifi: rtl_pci: Fix the bug when inactiveps is enabled.
  RDMA/iwpm: Fix uninitialized error code in iwpm_send_mapinfo()
  drm/msm: fix leak in failed get_pages
  media: c8sectpfe: fix potential NULL pointer dereference in c8sectpfe_timer_interrupt
  Bluetooth: hci_qca: Avoid setup failure on missing rampatch
  perf tests kmod-path: Don't fail if compressed modules aren't supported
  rtc: ds1374: wdt: Fix stop/start ioctl always returning -EINVAL
  rtc: ds1374: wdt: Fix issue with timeout scaling from secs to wdt ticks
  cifs: small underflow in cnvrtDosUnixTm()
  net: hns: fix ethtool_get_strings overflow in hns driver
  sm501fb: don't return zero on failure path in sm501fb_start()
  video: fbdev: udlfb: Fix buffer on stack
  tcm_fileio: Prevent information leak for short reads
  ia64: fix module loading for gcc-5.4
  md/raid10: skip spare disk as 'first' disk
  Input: twl4030-pwrbutton - use correct device for irq request
  power: supply: pda_power: move from timer to delayed_work
  bnx2x: Align RX buffers
  drm/nouveau/kms: Increase max retries in scanout position queries.
  ACPI / PMIC: xpower: Fix power_table addresses
  ipmi/watchdog: fix wdog hang on panic waiting for ipmi response
  ARM: DRA7: clockdomain: Change the CLKTRCTRL of CM_PCIE_CLKSTCTRL to SW_WKUP
  mmc: sdhci-of-esdhc: limit SD clock for ls1012a/ls1046a
  staging: wilc1000: fix unchecked return value
  staging: unisys: visorhba: fix s-Par to boot with option CONFIG_VMAP_STACK set to y
  mtip32xx: use runtime tag to initialize command header
  mfd: palmas: Reset the POWERHOLD mux during power off
  mac80211: don't parse encrypted management frames in ieee80211_frame_acked
  Btrfs: send, fix file hole not being preserved due to inline extent
  rndis_wlan: add return value validation
  mt7601u: check return value of alloc_skb
  iio: st_pressure: st_accel: Initialise sensor platform data properly
  NFS: don't try to cross a mountpount when there isn't one there.
  infiniband/uverbs: Fix integer overflows
  scsi: mac_esp: Replace bogus memory barrier with spinlock
  qlcnic: fix unchecked return value
  wan: pc300too: abort path on failure
  mmc: host: omap_hsmmc: checking for NULL instead of IS_ERR()
  openvswitch: Delete conntrack entry clashing with an expectation.
  netfilter: xt_CT: fix refcnt leak on error path
  Fix driver usage of 128B WQEs when WQ_CREATE is V1.
  ASoC: Intel: Skylake: Uninitialized variable in probe_codec()
  IB/mlx4: Change vma from shared to private
  IB/mlx4: Take write semaphore when changing the vma struct
  HSI: ssi_protocol: double free in ssip_pn_xmit()
  IB/ipoib: Update broadcast object if PKey value was changed in index 0
  IB/ipoib: Fix deadlock between ipoib_stop and mcast join flow
  ALSA: hda - Fix headset microphone detection for ASUS N551 and N751
  e1000e: fix timing for 82579 Gigabit Ethernet controller
  tcp: remove poll() flakes with FastOpen
  NFS: Fix missing pg_cleanup after nfs_pageio_cond_complete()
  md/raid10: wait up frozen array in handle_write_completed
  iommu/omap: Register driver before setting IOMMU ops
  ARM: 8668/1: ftrace: Fix dynamic ftrace with DEBUG_RODATA and !FRAME_POINTER
  KVM: PPC: Book3S PR: Exit KVM on failed mapping
  scsi: virtio_scsi: Always try to read VPD pages
  clk: ns2: Correct SDIO bits
  ath: Fix updating radar flags for coutry code India
  spi: dw: Disable clock after unregistering the host
  media/dvb-core: Race condition when writing to CAM
  net: ipv6: send unsolicited NA on admin up
  i2c: i2c-scmi: add a MS HID
  genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs
  cpufreq/sh: Replace racy task affinity logic
  ACPI/processor: Replace racy task affinity logic
  ACPI/processor: Fix error handling in __acpi_processor_start()
  time: Change posix clocks ops interfaces to use timespec64
  Input: ar1021_i2c - fix too long name in driver's device table
  rtc: cmos: Do not assume irq 8 for rtc when there are no legacy irqs
  x86: i8259: export legacy_pic symbol
  regulator: anatop: set default voltage selector for pcie
  platform/x86: asus-nb-wmi: Add wapf4 quirk for the X302UA
  staging: android: ashmem: Fix possible deadlock in ashmem_ioctl
  CIFS: Enable encryption during session setup phase
  SMB3: Validate negotiate request must always be signed
  tpm_tis: fix potential buffer overruns caused by bit glitches on the bus
  tpm: fix potential buffer overruns caused by bit glitches on the bus
  BACKPORT, FROMLIST: crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS
  Linux 4.4.123
  bpf: fix incorrect sign extension in check_alu_op()
  usb: gadget: bdc: 64-bit pointer capability check
  USB: gadget: udc: Add missing platform_device_put() on error in bdc_pci_probe()
  btrfs: Fix use-after-free when cleaning up fs_devs with a single stale device
  btrfs: alloc_chunk: fix DUP stripe size handling
  ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux
  scsi: sg: only check for dxfer_len greater than 256M
  scsi: sg: fix static checker warning in sg_is_valid_dxfer
  scsi: sg: fix SG_DXFER_FROM_DEV transfers
  irqchip/gic-v3-its: Ensure nr_ites >= nr_lpis
  fs/aio: Use RCU accessors for kioctx_table->table[]
  fs/aio: Add explicit RCU grace period when freeing kioctx
  lock_parent() needs to recheck if dentry got __dentry_kill'ed under it
  fs: Teach path_connected to handle nfs filesystems with multiple roots.
  drm/amdgpu/dce: Don't turn off DP sink when disconnected
  ALSA: seq: Clear client entry before deleting else at closing
  ALSA: seq: Fix possible UAF in snd_seq_check_queue()
  ALSA: hda - Revert power_save option default value
  ALSA: pcm: Fix UAF in snd_pcm_oss_get_formats()
  x86/mm: Fix vmalloc_fault to use pXd_large
  x86/vm86/32: Fix POPF emulation
  selftests/x86/entry_from_vm86: Add test cases for POPF
  selftests/x86: Add tests for the STR and SLDT instructions
  selftests/x86: Add tests for User-Mode Instruction Prevention
  selftests/x86/entry_from_vm86: Exit with 1 if we fail
  ima: relax requiring a file signature for new files with zero length
  rcutorture/configinit: Fix build directory error message
  ipvlan: add L2 check for packets arriving via virtual devices
  ASoC: nuc900: Fix a loop timeout test
  mac80211: remove BUG() when interface type is invalid
  mac80211_hwsim: enforce PS_MANUAL_POLL to be set after PS_ENABLED
  agp/intel: Flush all chipset writes after updating the GGTT
  drm/amdkfd: Fix memory leaks in kfd topology
  veth: set peer GSO values
  media: cpia2: Fix a couple off by one bugs
  scsi: dh: add new rdac devices
  scsi: devinfo: apply to HP XP the same flags as Hitachi VSP
  scsi: core: scsi_get_device_flags_keyed(): Always return device flags
  spi: sun6i: disable/unprepare clocks on remove
  tools/usbip: fixes build with musl libc toolchain
  ath10k: fix invalid STS_CAP_OFFSET_MASK
  clk: qcom: msm8916: fix mnd_width for codec_digcodec
  cpufreq: Fix governor module removal race
  ath10k: update tdls teardown state to target
  ARM: dts: omap3-n900: Fix the audio CODEC's reset pin
  ARM: dts: am335x-pepper: Fix the audio CODEC's reset pin
  mtd: nand: fix interpretation of NAND_CMD_NONE in nand_command[_lp]()
  net: xfrm: allow clearing socket xfrm policies.
  test_firmware: fix setting old custom fw path back on exit
  sched: Stop resched_cpu() from sending IPIs to offline CPUs
  sched: Stop switched_to_rt() from sending IPIs to offline CPUs
  ARM: dts: exynos: Correct Trats2 panel reset line
  HID: elo: clear BTN_LEFT mapping
  video/hdmi: Allow "empty" HDMI infoframes
  drm/edid: set ELD connector type in drm_edid_to_eld()
  wil6210: fix memory access violation in wil_memcpy_from/toio_32
  pwm: tegra: Increase precision in PWM rate calculation
  kprobes/x86: Set kprobes pages read-only
  kprobes/x86: Fix kprobe-booster not to boost far call instructions
  scsi: sg: close race condition in sg_remove_sfp_usercontext()
  scsi: sg: check for valid direction before starting the request
  perf session: Don't rely on evlist in pipe mode
  perf inject: Copy events when reordering events in pipe mode
  drivers/perf: arm_pmu: handle no platform_device
  usb: gadget: dummy_hcd: Fix wrong power status bit clear/reset in dummy_hub_control()
  usb: dwc2: Make sure we disconnect the gadget state
  md/raid6: Fix anomily when recovering a single device in RAID6.
  regulator: isl9305: fix array size
  MIPS: r2-on-r6-emu: Clear BLTZALL and BGEZALL debugfs counters
  MIPS: r2-on-r6-emu: Fix BLEZL and BGTZL identification
  MIPS: BPF: Fix multiple problems in JIT skb access helpers.
  MIPS: BPF: Quit clobbering callee saved registers in JIT code.
  coresight: Fixes coresight DT parse to get correct output port ID.
  drm/amdgpu: Fail fb creation from imported dma-bufs. (v2)
  drm/radeon: Fail fb creation from imported dma-bufs.
  video: ARM CLCD: fix dma allocation size
  iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range
  apparmor: Make path_max parameter readonly
  scsi: ses: don't get power status of SES device slot on probe
  fm10k: correctly check if interface is removed
  ALSA: firewire-digi00x: handle all MIDI messages on streaming packets
  reiserfs: Make cancel_old_flush() reliable
  ARM: dts: koelsch: Correct clock frequency of X2 DU clock input
  net/faraday: Add missing include of of.h
  powerpc: Avoid taking a data miss on every userspace instruction miss
  ARM: dts: r8a7791: Correct parent of SSI[0-9] clocks
  ARM: dts: r8a7790: Correct parent of SSI[0-9] clocks
  NFC: nfcmrvl: double free on error path
  NFC: nfcmrvl: Include unaligned.h instead of access_ok.h
  vxlan: vxlan dev should inherit lowerdev's gso_max_size
  drm/vmwgfx: Fixes to vmwgfx_fb
  braille-console: Fix value returned by _braille_console_setup
  bonding: refine bond_fold_stats() wrap detection
  f2fs: relax node version check for victim data in gc
  blk-throttle: make sure expire time isn't too big
  mm: Fix false-positive VM_BUG_ON() in page_cache_{get,add}_speculative()
  driver: (adm1275) set the m,b and R coefficients correctly for power
  dmaengine: imx-sdma: add 1ms delay to ensure SDMA channel is stopped
  tcp: sysctl: Fix a race to avoid unexpected 0 window from space
  spi: omap2-mcspi: poll OMAP2_MCSPI_CHSTAT_RXS for PIO transfer
  ASoC: rcar: ssi: don't set SSICR.CKDV = 000 with SSIWSR.CONT
  sched: act_csum: don't mangle TCP and UDP GSO packets
  Input: qt1070 - add OF device ID table
  sysrq: Reset the watchdog timers while displaying high-resolution timers
  timers, sched_clock: Update timeout for clock wrap
  media: i2c/soc_camera: fix ov6650 sensor getting wrong clock
  scsi: ipr: Fix missed EH wakeup
  solo6x10: release vb2 buffers in solo_stop_streaming()
  of: fix of_device_get_modalias returned length when truncating buffers
  batman-adv: handle race condition for claims between gateways
  ARM: dts: Adjust moxart IRQ controller and flags
  net/8021q: create device with all possible features in wanted_features
  HID: clamp input to logical range if no null state
  perf probe: Return errno when not hitting any event
  ath10k: disallow DFS simulation if DFS channel is not enabled
  drm: Defer disabling the vblank IRQ until the next interrupt (for instant-off)
  drivers: net: xgene: Fix hardware checksum setting
  perf tools: Make perf_event__synthesize_mmap_events() scale
  i40e: fix ethtool to get EEPROM data from X722 interface
  i40e: Acquire NVM lock before reads on all devices
  perf sort: Fix segfault with basic block 'cycles' sort dimension
  selinux: check for address length in selinux_socket_bind()
  PCI/MSI: Stop disabling MSI/MSI-X in pci_device_shutdown()
  ath10k: fix a warning during channel switch with multiple vaps
  drm: qxl: Don't alloc fbdev if emulation is not supported
  HID: reject input outside logical range only if null state is set
  staging: wilc1000: add check for kmalloc allocation failure.
  staging: speakup: Replace BUG_ON() with WARN_ON().
  Input: tsc2007 - check for presence and power down tsc2007 during probe
  blkcg: fix double free of new_blkg in blkcg_init_queue
  ANDROID: cpufreq: times: avoid prematurely freeing uid_entry
  ANDROID: Use standard logging functions in goldfish_pipe
  ANDROID: Fix whitespace in goldfish
  staging: android: ashmem: Fix possible deadlock in ashmem_ioctl
  llist: clang: introduce member_address_is_nonnull()
  Linux 4.4.122
  fixup: sctp: verify size of a new chunk in _sctp_make_chunk()
  serial: 8250_pci: Add Brainboxes UC-260 4 port serial device
  usb: gadget: f_fs: Fix use-after-free in ffs_fs_kill_sb()
  usb: usbmon: Read text within supplied buffer size
  USB: usbmon: remove assignment from IS_ERR argument
  usb: quirks: add control message delay for 1b1c:1b20
  USB: storage: Add JMicron bridge 152d:2567 to unusual_devs.h
  staging: android: ashmem: Fix lockdep issue during llseek
  staging: comedi: fix comedi_nsamples_left.
  uas: fix comparison for error code
  tty/serial: atmel: add new version check for usart
  serial: sh-sci: prevent lockup on full TTY buffers
  x86: Treat R_X86_64_PLT32 as R_X86_64_PC32
  x86/module: Detect and skip invalid relocations
  Revert "ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux"
  NFS: Fix an incorrect type in struct nfs_direct_req
  scsi: qla2xxx: Replace fcport alloc with qla2x00_alloc_fcport
  ubi: Fix race condition between ubi volume creation and udev
  ext4: inplace xattr block update fails to deduplicate blocks
  netfilter: x_tables: pack percpu counter allocations
  netfilter: x_tables: pass xt_counters struct to counter allocator
  netfilter: x_tables: pass xt_counters struct instead of packet counter
  netfilter: use skb_to_full_sk in ip_route_me_harder
  netfilter: ipv6: fix use-after-free Write in nf_nat_ipv6_manip_pkt
  netfilter: bridge: ebt_among: add missing match size checks
  netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets
  netfilter: IDLETIMER: be syzkaller friendly
  netfilter: nat: cope with negative port range
  netfilter: x_tables: fix missing timer initialization in xt_LED
  netfilter: add back stackpointer size checks
  tc358743: fix register i2c_rd/wr function fix
  Input: tca8418_keypad - remove double read of key event register
  ARM: omap2: hide omap3_save_secure_ram on non-OMAP3 builds
  netfilter: nfnetlink_queue: fix timestamp attribute
  watchdog: hpwdt: fix unused variable warning
  watchdog: hpwdt: Check source of NMI
  watchdog: hpwdt: SMBIOS check
  nospec: Include <asm/barrier.h> dependency
  ALSA: hda: add dock and led support for HP ProBook 640 G2
  ALSA: hda: add dock and led support for HP EliteBook 820 G3
  ALSA: seq: More protection for concurrent write and ioctl races
  ALSA: seq: Don't allow resizing pool in use
  ALSA: hda/realtek - Fix dock line-out volume on Dell Precision 7520
  x86/MCE: Serialize sysfs changes
  bcache: don't attach backing with duplicate UUID
  kbuild: Handle builtin dtb file names containing hyphens
  loop: Fix lost writes caused by missing flag
  Input: matrix_keypad - fix race when disabling interrupts
  MIPS: OCTEON: irq: Check for null return on kzalloc allocation
  MIPS: ath25: Check for kzalloc allocation failure
  MIPS: BMIPS: Do not mask IPIs during suspend
  drm/amdgpu: fix KV harvesting
  drm/radeon: fix KV harvesting
  drm/amdgpu: Notify sbios device ready before send request
  drm/amdgpu: Fix deadlock on runtime suspend
  drm/radeon: Fix deadlock on runtime suspend
  drm/nouveau: Fix deadlock on runtime suspend
  drm: Allow determining if current task is output poll worker
  workqueue: Allow retrieval of current task's work struct
  scsi: qla2xxx: Fix NULL pointer crash due to active timer for ABTS
  RDMA/mlx5: Fix integer overflow while resizing CQ
  RDMA/ucma: Check that user doesn't overflow QP state
  RDMA/ucma: Limit possible option size
  ANDROID: ranchu: 32 bit framebuffer support
  ANDROID: Address checkpatch warnings in goldfishfb
  ANDROID: Address checkpatch.pl warnings in goldfish_pipe
  ANDROID: sdcardfs: fix lock issue on 32 bit/SMP architectures
  ANDROID: goldfish: Fix typo in goldfish_cmd_locked() call
  ANDROID: Address checkpatch.pl warnings in goldfish_pipe_v2
  FROMLIST: f2fs: don't put dentry page in pagecache into highmem
  Linux 4.4.121
  btrfs: preserve i_mode if __btrfs_set_acl() fails
  bpf, x64: implement retpoline for tail call
  dm io: fix duplicate bio completion due to missing ref count
  mpls, nospec: Sanitize array index in mpls_label_ok()
  net: mpls: Pull common label check into helper
  sctp: verify size of a new chunk in _sctp_make_chunk()
  s390/qeth: fix IPA command submission race
  s390/qeth: fix SETIP command handling
  sctp: fix dst refcnt leak in sctp_v6_get_dst()
  sctp: fix dst refcnt leak in sctp_v4_get_dst
  udplite: fix partial checksum initialization
  ppp: prevent unregistered channels from connecting to PPP units
  netlink: ensure to loop over all netns in genlmsg_multicast_allns()
  net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68
  net: fix race on decreasing number of TX queues
  ipv6 sit: work around bogus gcc-8 -Wrestrict warning
  hdlc_ppp: carrier detect ok, don't turn off negotiation
  fib_semantics: Don't match route with mismatching tclassid
  bridge: check brport attr show in brport_show
  Revert "led: core: Fix brightness setting when setting delay_off=0"
  x86/spectre: Fix an error message
  leds: do not overflow sysfs buffer in led_trigger_show
  x86/apic/vector: Handle legacy irq data correctly
  ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux
  btrfs: Don't clear SGID when inheriting ACLs
  x86/syscall: Sanitize syscall table de-references under speculation fix
  KVM: mmu: Fix overlap between public and private memslots
  ARM: mvebu: Fix broken PL310_ERRATA_753970 selects
  nospec: Allow index argument to have const-qualified type
  media: m88ds3103: don't call a non-initalized function
  cpufreq: s3c24xx: Fix broken s3c_cpufreq_init()
  ALSA: hda: Add a power_save blacklist
  ALSA: usb-audio: Add a quirck for B&W PX headphones
  tpm_i2c_nuvoton: fix potential buffer overruns caused by bit glitches on the bus
  tpm_i2c_infineon: fix potential buffer overruns caused by bit glitches on the bus
  tpm: st33zp24: fix potential buffer overruns caused by bit glitches on the bus
  ANDROID: Delete the goldfish_nand driver.
  ANDROID: Add input support for Android Wear.
  ANDROID: proc: fix config & includes for /proc/uid
  FROMLIST: ARM: amba: Don't read past the end of sysfs "driver_override" buffer
  UPSTREAM: ANDROID: binder: remove WARN() for redundant txn error
  ANDROID: cpufreq: times: Add missing includes
  ANDROID: cpufreq: Add time_in_state to /proc/uid directories
  ANDROID: proc: Add /proc/uid directory
  ANDROID: cpufreq: times: track per-uid time in state
  ANDROID: cpufreq: track per-task time in state

Conflicts:
	drivers/gpu/drm/msm/msm_gem.c
	drivers/net/wireless/ath/regd.c
	kernel/sched/core.c

Change-Id: I9bb7b5a062415da6925a5a56a34e6eb066a53320
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2018-04-20 12:27:57 +05:30

1091 lines
27 KiB
C

#include <linux/wait.h>
#include <linux/backing-dev.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
#include <trace/events/writeback.h>
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
struct backing_dev_info noop_backing_dev_info = {
.name = "noop",
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
/*
* bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
* locking.
*/
DEFINE_SPINLOCK(bdi_lock);
LIST_HEAD(bdi_list);
/* bdi_wq serves all asynchronous writeback tasks */
struct workqueue_struct *bdi_wq;
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#include <linux/seq_file.h>
static struct dentry *bdi_debug_root;
static void bdi_debug_init(void)
{
bdi_debug_root = debugfs_create_dir("bdi", NULL);
}
static int bdi_debug_stats_show(struct seq_file *m, void *v)
{
struct backing_dev_info *bdi = m->private;
struct bdi_writeback *wb = &bdi->wb;
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long wb_thresh;
unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode;
nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io_list)
nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_io_list)
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_io_list)
nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
if (inode->i_state & I_DIRTY_TIME)
nr_dirty_time++;
spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
wb_thresh = wb_calc_thresh(wb, dirty_thresh);
#define K(x) ((x) << (PAGE_SHIFT - 10))
seq_printf(m,
"BdiWriteback: %10lu kB\n"
"BdiReclaimable: %10lu kB\n"
"BdiDirtyThresh: %10lu kB\n"
"DirtyThresh: %10lu kB\n"
"BackgroundThresh: %10lu kB\n"
"BdiDirtied: %10lu kB\n"
"BdiWritten: %10lu kB\n"
"BdiWriteBandwidth: %10lu kBps\n"
"b_dirty: %10lu\n"
"b_io: %10lu\n"
"b_more_io: %10lu\n"
"b_dirty_time: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
(unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
(unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
K(wb_thresh),
K(dirty_thresh),
K(background_thresh),
(unsigned long) K(wb_stat(wb, WB_DIRTIED)),
(unsigned long) K(wb_stat(wb, WB_WRITTEN)),
(unsigned long) K(wb->write_bandwidth),
nr_dirty,
nr_io,
nr_more_io,
nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->wb.state);
#undef K
return 0;
}
static int bdi_debug_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, bdi_debug_stats_show, inode->i_private);
}
static const struct file_operations bdi_debug_stats_fops = {
.open = bdi_debug_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{
bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
bdi, &bdi_debug_stats_fops);
}
static void bdi_debug_unregister(struct backing_dev_info *bdi)
{
debugfs_remove(bdi->debug_stats);
debugfs_remove(bdi->debug_dir);
}
#else
static inline void bdi_debug_init(void)
{
}
static inline void bdi_debug_register(struct backing_dev_info *bdi,
const char *name)
{
}
static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{
}
#endif
static ssize_t read_ahead_kb_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct backing_dev_info *bdi = dev_get_drvdata(dev);
unsigned long read_ahead_kb;
ssize_t ret;
ret = kstrtoul(buf, 10, &read_ahead_kb);
if (ret < 0)
return ret;
bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
return count;
}
#define K(pages) ((pages) << (PAGE_SHIFT - 10))
#define BDI_SHOW(name, expr) \
static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, char *page) \
{ \
struct backing_dev_info *bdi = dev_get_drvdata(dev); \
\
return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \
} \
static DEVICE_ATTR_RW(name);
BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
static ssize_t min_ratio_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct backing_dev_info *bdi = dev_get_drvdata(dev);
unsigned int ratio;
ssize_t ret;
ret = kstrtouint(buf, 10, &ratio);
if (ret < 0)
return ret;
ret = bdi_set_min_ratio(bdi, ratio);
if (!ret)
ret = count;
return ret;
}
BDI_SHOW(min_ratio, bdi->min_ratio)
static ssize_t max_ratio_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct backing_dev_info *bdi = dev_get_drvdata(dev);
unsigned int ratio;
ssize_t ret;
ret = kstrtouint(buf, 10, &ratio);
if (ret < 0)
return ret;
ret = bdi_set_max_ratio(bdi, ratio);
if (!ret)
ret = count;
return ret;
}
BDI_SHOW(max_ratio, bdi->max_ratio)
static ssize_t stable_pages_required_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct backing_dev_info *bdi = dev_get_drvdata(dev);
return snprintf(page, PAGE_SIZE-1, "%d\n",
bdi_cap_stable_pages_required(bdi) ? 1 : 0);
}
static DEVICE_ATTR_RO(stable_pages_required);
static struct attribute *bdi_dev_attrs[] = {
&dev_attr_read_ahead_kb.attr,
&dev_attr_min_ratio.attr,
&dev_attr_max_ratio.attr,
&dev_attr_stable_pages_required.attr,
NULL,
};
ATTRIBUTE_GROUPS(bdi_dev);
static __init int bdi_class_init(void)
{
bdi_class = class_create(THIS_MODULE, "bdi");
if (IS_ERR(bdi_class))
return PTR_ERR(bdi_class);
bdi_class->dev_groups = bdi_dev_groups;
bdi_debug_init();
return 0;
}
postcore_initcall(bdi_class_init);
static int __init default_bdi_init(void)
{
int err;
bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
err = bdi_init(&noop_backing_dev_info);
return err;
}
subsys_initcall(default_bdi_init);
/*
* This function is used when the first inode for this wb is marked dirty. It
* wakes-up the corresponding bdi thread which should then take care of the
* periodic background write-out of dirty inodes. Since the write-out would
* starts only 'dirty_writeback_interval' centisecs from now anyway, we just
* set up a timer which wakes the bdi thread up later.
*
* Note, we wouldn't bother setting up the timer, but this function is on the
* fast-path (used by '__mark_inode_dirty()'), so we save few context switches
* by delaying the wake-up.
*
* We have to be careful not to postpone flush work if it is scheduled for
* earlier. Thus we use queue_delayed_work().
*/
void wb_wakeup_delayed(struct bdi_writeback *wb)
{
unsigned long timeout;
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
spin_lock_bh(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
queue_delayed_work(bdi_wq, &wb->dwork, timeout);
spin_unlock_bh(&wb->work_lock);
}
/*
* Initial write bandwidth: 100 MB/s
*/
#define INIT_BW (100 << (20 - PAGE_SHIFT))
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
int blkcg_id, gfp_t gfp)
{
int i, err;
memset(wb, 0, sizeof(*wb));
wb->bdi = bdi;
wb->last_old_flush = jiffies;
INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
INIT_LIST_HEAD(&wb->b_dirty_time);
spin_lock_init(&wb->list_lock);
wb->bw_time_stamp = jiffies;
wb->balanced_dirty_ratelimit = INIT_BW;
wb->dirty_ratelimit = INIT_BW;
wb->write_bandwidth = INIT_BW;
wb->avg_write_bandwidth = INIT_BW;
spin_lock_init(&wb->work_lock);
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
if (!wb->congested)
return -ENOMEM;
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
goto out_put_cong;
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, gfp);
if (err)
goto out_destroy_stat;
}
return 0;
out_destroy_stat:
while (--i)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
out_put_cong:
wb_congested_put(wb->congested);
return err;
}
/*
* Remove bdi from the global list and shutdown any threads we have running
*/
static void wb_shutdown(struct bdi_writeback *wb)
{
/* Make sure nobody queues further work */
spin_lock_bh(&wb->work_lock);
if (!test_and_clear_bit(WB_registered, &wb->state)) {
spin_unlock_bh(&wb->work_lock);
return;
}
spin_unlock_bh(&wb->work_lock);
/*
* Drain work list and shutdown the delayed_work. !WB_registered
* tells wb_workfn() that @wb is dying and its work_list needs to
* be drained no matter what.
*/
mod_delayed_work(bdi_wq, &wb->dwork, 0);
flush_delayed_work(&wb->dwork);
WARN_ON(!list_empty(&wb->work_list));
}
static void wb_exit(struct bdi_writeback *wb)
{
int i;
WARN_ON(delayed_work_pending(&wb->dwork));
for (i = 0; i < NR_WB_STAT_ITEMS; i++)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
wb_congested_put(wb->congested);
}
#ifdef CONFIG_CGROUP_WRITEBACK
#include <linux/memcontrol.h>
/*
* cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
* blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
* protected. cgwb_release_wait is used to wait for the completion of cgwb
* releases from bdi destruction path.
*/
static DEFINE_SPINLOCK(cgwb_lock);
static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
/**
* wb_congested_get_create - get or create a wb_congested
* @bdi: associated bdi
* @blkcg_id: ID of the associated blkcg
* @gfp: allocation mask
*
* Look up the wb_congested for @blkcg_id on @bdi. If missing, create one.
* The returned wb_congested has its reference count incremented. Returns
* NULL on failure.
*/
struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
struct bdi_writeback_congested *new_congested = NULL, *congested;
struct rb_node **node, *parent;
unsigned long flags;
retry:
spin_lock_irqsave(&cgwb_lock, flags);
node = &bdi->cgwb_congested_tree.rb_node;
parent = NULL;
while (*node != NULL) {
parent = *node;
congested = container_of(parent, struct bdi_writeback_congested,
rb_node);
if (congested->blkcg_id < blkcg_id)
node = &parent->rb_left;
else if (congested->blkcg_id > blkcg_id)
node = &parent->rb_right;
else
goto found;
}
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
goto found;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
/* allocate storage for new one and retry */
new_congested = kzalloc(sizeof(*new_congested), gfp);
if (!new_congested)
return NULL;
atomic_set(&new_congested->refcnt, 0);
new_congested->bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
found:
atomic_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
}
/**
* wb_congested_put - put a wb_congested
* @congested: wb_congested to put
*
* Put @congested and destroy it if the refcnt reaches zero.
*/
void wb_congested_put(struct bdi_writeback_congested *congested)
{
unsigned long flags;
local_irq_save(flags);
if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
local_irq_restore(flags);
return;
}
/* bdi might already have been destroyed leaving @congested unlinked */
if (congested->bdi) {
rb_erase(&congested->rb_node,
&congested->bdi->cgwb_congested_tree);
congested->bdi = NULL;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(congested);
}
static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
release_work);
struct backing_dev_info *bdi = wb->bdi;
spin_lock_irq(&cgwb_lock);
list_del_rcu(&wb->bdi_node);
spin_unlock_irq(&cgwb_lock);
wb_shutdown(wb);
css_put(wb->memcg_css);
css_put(wb->blkcg_css);
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
kfree_rcu(wb, rcu);
if (atomic_dec_and_test(&bdi->usage_cnt))
wake_up_all(&cgwb_release_wait);
}
static void cgwb_release(struct percpu_ref *refcnt)
{
struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
refcnt);
schedule_work(&wb->release_work);
}
static void cgwb_kill(struct bdi_writeback *wb)
{
lockdep_assert_held(&cgwb_lock);
WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
list_del(&wb->memcg_node);
list_del(&wb->blkcg_node);
percpu_ref_kill(&wb->refcnt);
}
static int cgwb_create(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css, gfp_t gfp)
{
struct mem_cgroup *memcg;
struct cgroup_subsys_state *blkcg_css;
struct blkcg *blkcg;
struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
struct bdi_writeback *wb;
unsigned long flags;
int ret = 0;
memcg = mem_cgroup_from_css(memcg_css);
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
blkcg = css_to_blkcg(blkcg_css);
memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
blkcg_cgwb_list = &blkcg->cgwb_list;
/* look up again under lock and discard on blkcg mismatch */
spin_lock_irqsave(&cgwb_lock, flags);
wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
if (wb && wb->blkcg_css != blkcg_css) {
cgwb_kill(wb);
wb = NULL;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
if (wb)
goto out_put;
/* need to create a new one */
wb = kmalloc(sizeof(*wb), gfp);
if (!wb)
return -ENOMEM;
ret = wb_init(wb, bdi, blkcg_css->id, gfp);
if (ret)
goto err_free;
ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
if (ret)
goto err_wb_exit;
ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
if (ret)
goto err_ref_exit;
wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css;
INIT_WORK(&wb->release_work, cgwb_release_workfn);
set_bit(WB_registered, &wb->state);
/*
* The root wb determines the registered state of the whole bdi and
* memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
* whether they're still online. Don't link @wb if any is dead.
* See wb_memcg_offline() and wb_blkcg_offline().
*/
ret = -ENODEV;
spin_lock_irqsave(&cgwb_lock, flags);
if (test_bit(WB_registered, &bdi->wb.state) &&
blkcg_cgwb_list->next && memcg_cgwb_list->next) {
/* we might have raced another instance of this function */
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
if (!ret) {
atomic_inc(&bdi->usage_cnt);
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
css_get(memcg_css);
css_get(blkcg_css);
}
}
spin_unlock_irqrestore(&cgwb_lock, flags);
if (ret) {
if (ret == -EEXIST)
ret = 0;
goto err_fprop_exit;
}
goto out_put;
err_fprop_exit:
fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
percpu_ref_exit(&wb->refcnt);
err_wb_exit:
wb_exit(wb);
err_free:
kfree(wb);
out_put:
css_put(blkcg_css);
return ret;
}
/**
* wb_get_create - get wb for a given memcg, create if necessary
* @bdi: target bdi
* @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
* @gfp: allocation mask to use
*
* Try to get the wb for @memcg_css on @bdi. If it doesn't exist, try to
* create one. The returned wb has its refcount incremented.
*
* This function uses css_get() on @memcg_css and thus expects its refcnt
* to be positive on invocation. IOW, rcu_read_lock() protection on
* @memcg_css isn't enough. try_get it before calling this function.
*
* A wb is keyed by its associated memcg. As blkcg implicitly enables
* memcg on the default hierarchy, memcg association is guaranteed to be
* more specific (equal or descendant to the associated blkcg) and thus can
* identify both the memcg and blkcg associations.
*
* Because the blkcg associated with a memcg may change as blkcg is enabled
* and disabled closer to root in the hierarchy, each wb keeps track of
* both the memcg and blkcg associated with it and verifies the blkcg on
* each lookup. On mismatch, the existing wb is discarded and a new one is
* created.
*/
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css,
gfp_t gfp)
{
struct bdi_writeback *wb;
might_sleep_if(gfpflags_allow_blocking(gfp));
if (!memcg_css->parent)
return &bdi->wb;
do {
rcu_read_lock();
wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
if (wb) {
struct cgroup_subsys_state *blkcg_css;
/* see whether the blkcg association has changed */
blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
&io_cgrp_subsys);
if (unlikely(wb->blkcg_css != blkcg_css ||
!wb_tryget(wb)))
wb = NULL;
css_put(blkcg_css);
}
rcu_read_unlock();
} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
return wb;
}
static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
int ret;
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
atomic_set(&bdi->usage_cnt, 1);
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
bdi->wb.memcg_css = mem_cgroup_root_css;
bdi->wb.blkcg_css = blkcg_root_css;
}
return ret;
}
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
{
struct radix_tree_iter iter;
struct rb_node *rbn;
void **slot;
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
spin_lock_irq(&cgwb_lock);
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
cgwb_kill(*slot);
while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
struct bdi_writeback_congested *congested =
rb_entry(rbn, struct bdi_writeback_congested, rb_node);
rb_erase(rbn, &bdi->cgwb_congested_tree);
congested->bdi = NULL; /* mark @congested unlinked */
}
spin_unlock_irq(&cgwb_lock);
/*
* All cgwb's and their congested states must be shutdown and
* released before returning. Drain the usage counter to wait for
* all cgwb's and cgwb_congested's ever created on @bdi.
*/
atomic_dec(&bdi->usage_cnt);
wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
}
/**
* wb_memcg_offline - kill all wb's associated with a memcg being offlined
* @memcg: memcg being offlined
*
* Also prevents creation of any new wb's associated with @memcg.
*/
void wb_memcg_offline(struct mem_cgroup *memcg)
{
LIST_HEAD(to_destroy);
struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
struct bdi_writeback *wb, *next;
spin_lock_irq(&cgwb_lock);
list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
cgwb_kill(wb);
memcg_cgwb_list->next = NULL; /* prevent new wb's */
spin_unlock_irq(&cgwb_lock);
}
/**
* wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
* @blkcg: blkcg being offlined
*
* Also prevents creation of any new wb's associated with @blkcg.
*/
void wb_blkcg_offline(struct blkcg *blkcg)
{
LIST_HEAD(to_destroy);
struct bdi_writeback *wb, *next;
spin_lock_irq(&cgwb_lock);
list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
cgwb_kill(wb);
blkcg->cgwb_list.next = NULL; /* prevent new wb's */
spin_unlock_irq(&cgwb_lock);
}
#else /* CONFIG_CGROUP_WRITEBACK */
static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
int err;
bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
if (!bdi->wb_congested)
return -ENOMEM;
atomic_set(&bdi->wb_congested->refcnt, 1);
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {
wb_congested_put(bdi->wb_congested);
return err;
}
return 0;
}
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
{
wb_congested_put(bdi->wb_congested);
}
#endif /* CONFIG_CGROUP_WRITEBACK */
int bdi_init(struct backing_dev_info *bdi)
{
int ret;
bdi->dev = NULL;
kref_init(&bdi->refcnt);
bdi->min_ratio = 0;
bdi->max_ratio = 100;
bdi->max_prop_frac = FPROP_FRAC_BASE;
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
ret = cgwb_bdi_init(bdi);
list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
return ret;
}
EXPORT_SYMBOL(bdi_init);
struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
{
struct backing_dev_info *bdi;
bdi = kmalloc_node(sizeof(struct backing_dev_info),
gfp_mask | __GFP_ZERO, node_id);
if (!bdi)
return NULL;
if (bdi_init(bdi)) {
kfree(bdi);
return NULL;
}
return bdi;
}
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
const char *fmt, ...)
{
va_list args;
struct device *dev;
if (bdi->dev) /* The driver needs to use separate queues per device */
return 0;
va_start(args, fmt);
dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
va_end(args);
if (IS_ERR(dev))
return PTR_ERR(dev);
bdi->dev = dev;
bdi_debug_register(bdi, dev_name(dev));
set_bit(WB_registered, &bdi->wb.state);
spin_lock_bh(&bdi_lock);
list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
spin_unlock_bh(&bdi_lock);
trace_writeback_bdi_register(bdi);
return 0;
}
EXPORT_SYMBOL(bdi_register);
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
{
return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
}
EXPORT_SYMBOL(bdi_register_dev);
int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
{
int rc;
rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt),
MINOR(owner->devt));
if (rc)
return rc;
bdi->owner = owner;
get_device(owner);
return 0;
}
EXPORT_SYMBOL(bdi_register_owner);
/*
* Remove bdi from bdi_list, and ensure that it is no longer visible
*/
static void bdi_remove_from_list(struct backing_dev_info *bdi)
{
spin_lock_bh(&bdi_lock);
list_del_rcu(&bdi->bdi_list);
spin_unlock_bh(&bdi_lock);
synchronize_rcu_expedited();
}
void bdi_unregister(struct backing_dev_info *bdi)
{
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
cgwb_bdi_destroy(bdi);
if (bdi->dev) {
bdi_debug_unregister(bdi);
device_unregister(bdi->dev);
bdi->dev = NULL;
}
if (bdi->owner) {
put_device(bdi->owner);
bdi->owner = NULL;
}
}
static void bdi_exit(struct backing_dev_info *bdi)
{
WARN_ON_ONCE(bdi->dev);
wb_exit(&bdi->wb);
}
static void release_bdi(struct kref *ref)
{
struct backing_dev_info *bdi =
container_of(ref, struct backing_dev_info, refcnt);
bdi_exit(bdi);
kfree(bdi);
}
void bdi_put(struct backing_dev_info *bdi)
{
kref_put(&bdi->refcnt, release_bdi);
}
void bdi_destroy(struct backing_dev_info *bdi)
{
bdi_unregister(bdi);
bdi_exit(bdi);
}
EXPORT_SYMBOL(bdi_destroy);
/*
* For use from filesystems to quickly init and register a bdi associated
* with dirty writeback
*/
int bdi_setup_and_register(struct backing_dev_info *bdi, char *name)
{
int err;
bdi->name = name;
bdi->capabilities = 0;
err = bdi_init(bdi);
if (err)
return err;
err = bdi_register(bdi, NULL, "%.28s-%ld", name,
atomic_long_inc_return(&bdi_seq));
if (err) {
bdi_destroy(bdi);
return err;
}
return 0;
}
EXPORT_SYMBOL(bdi_setup_and_register);
static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
};
static atomic_t nr_wb_congested[2];
void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
{
wait_queue_head_t *wqh = &congestion_wqh[sync];
enum wb_congested_state bit;
bit = sync ? WB_sync_congested : WB_async_congested;
if (test_and_clear_bit(bit, &congested->state))
atomic_dec(&nr_wb_congested[sync]);
smp_mb__after_atomic();
if (waitqueue_active(wqh))
wake_up(wqh);
}
EXPORT_SYMBOL(clear_wb_congested);
void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
{
enum wb_congested_state bit;
bit = sync ? WB_sync_congested : WB_async_congested;
if (!test_and_set_bit(bit, &congested->state))
atomic_inc(&nr_wb_congested[sync]);
}
EXPORT_SYMBOL(set_wb_congested);
/**
* congestion_wait - wait for a backing_dev to become uncongested
* @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies
*
* Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
* write congestion. If no backing_devs are congested then just wait for the
* next write to be completed.
*/
long congestion_wait(int sync, long timeout)
{
long ret;
unsigned long start = jiffies;
DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[sync];
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout);
finish_wait(wqh, &wait);
trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
jiffies_to_usecs(jiffies - start));
return ret;
}
EXPORT_SYMBOL(congestion_wait);
/**
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
* @zone: A zone to check if it is heavily congested
* @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies
*
* In the event of a congested backing_dev (any backing_dev) and the given
* @zone has experienced recent congestion, this waits for up to @timeout
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
* In the absence of zone congestion, a short sleep or a cond_resched is
* performed to yield the processor and to allow other subsystems to make
* a forward progress.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
* returned. return_value == timeout implies the function did not sleep.
*/
long wait_iff_congested(struct zone *zone, int sync, long timeout)
{
long ret;
unsigned long start = jiffies;
DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[sync];
/*
* If there is no congestion, or heavy congestion is not being
* encountered in the current zone, yield if necessary instead
* of sleeping on the congestion queue
*/
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
/*
* Memory allocation/reclaim might be called from a WQ
* context and the current implementation of the WQ
* concurrency control doesn't recognize that a particular
* WQ is congested if the worker thread is looping without
* ever sleeping. Therefore we have to do a short sleep
* here rather than calling cond_resched().
*/
if (current->flags & PF_WQ_WORKER)
schedule_timeout_uninterruptible(1);
else
cond_resched();
/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
if (ret < 0)
ret = 0;
goto out;
}
/* Sleep until uncongested or a write happens */
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout);
finish_wait(wqh, &wait);
out:
trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
jiffies_to_usecs(jiffies - start));
return ret;
}
EXPORT_SYMBOL(wait_iff_congested);
int pdflush_proc_obsolete(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char kbuf[] = "0\n";
if (*ppos || *lenp < sizeof(kbuf)) {
*lenp = 0;
return 0;
}
if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
return -EFAULT;
printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal\n",
table->procname);
*lenp = 2;
*ppos += *lenp;
return 2;
}