* refs/heads/tmp-9bc4622: Linux 4.4.70 drivers: char: mem: Check for address space wraparound with mmap() nfsd: encoders mustn't use unitialized values in error cases drm/edid: Add 10 bpc quirk for LGD 764 panel in HP zBook 17 G2 PCI: Freeze PME scan before suspending devices PCI: Fix pci_mmap_fits() for HAVE_PCI_RESOURCE_TO_USER platforms tracing/kprobes: Enforce kprobes teardown after testing osf_wait4(): fix infoleak genirq: Fix chained interrupt data ordering uwb: fix device quirk on big-endian hosts metag/uaccess: Check access_ok in strncpy_from_user metag/uaccess: Fix access_ok() iommu/vt-d: Flush the IOTLB to get rid of the initial kdump mappings staging: rtl8192e: rtl92e_get_eeprom_size Fix read size of EPROM_CMD. staging: rtl8192e: fix 2 byte alignment of register BSSIDR. mm/huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp xc2028: Fix use-after-free bug properly arm64: documentation: document tagged pointer stack constraints arm64: uaccess: ensure extension of access_ok() addr arm64: xchg: hazard against entire exchange variable ARM: dts: at91: sama5d3_xplained: not all ADC channels are available ARM: dts: at91: sama5d3_xplained: fix ADC vref powerpc/64e: Fix hang when debugging programs with relocated kernel powerpc/pseries: Fix of_node_put() underflow during DLPAR remove powerpc/book3s/mce: Move add_taint() later in virtual mode cx231xx-cards: fix NULL-deref at probe cx231xx-audio: fix NULL-deref at probe cx231xx-audio: fix init error path dvb-frontends/cxd2841er: define symbol_rate_min/max in T/C fe-ops zr364xx: enforce minimum size when reading header dib0700: fix NULL-deref at probe s5p-mfc: Fix unbalanced call to clock management gspca: konica: add missing endpoint sanity check ceph: fix recursion between ceph_set_acl() and __ceph_setattr() iio: proximity: as3935: fix as3935_write ipx: call ipxitf_put() in ioctl error path USB: hub: fix non-SS hub-descriptor handling USB: hub: fix SS hub-descriptor handling USB: serial: io_ti: fix div-by-zero in set_termios USB: serial: mct_u232: fix big-endian baud-rate handling USB: serial: qcserial: add more Lenovo EM74xx device IDs usb: serial: option: add Telit ME910 support USB: iowarrior: fix info ioctl on big-endian hosts usb: musb: tusb6010_omap: Do not reset the other direction's packet size ttusb2: limit messages to buffer size mceusb: fix NULL-deref at probe usbvision: fix NULL-deref at probe net: irda: irda-usb: fix firmware name on big-endian hosts usb: host: xhci-mem: allocate zeroed Scratchpad Buffer xhci: apply PME_STUCK_QUIRK and MISSING_CAS quirk for Denverton usb: host: xhci-plat: propagate return value of platform_get_irq() sched/fair: Initialize throttle_count for new task-groups lazily sched/fair: Do not announce throttled next buddy in dequeue_task_fair() fscrypt: avoid collisions when presenting long encrypted filenames f2fs: check entire encrypted bigname when finding a dentry fscrypt: fix context consistency check when key(s) unavailable net: qmi_wwan: Add SIMCom 7230E ext4 crypto: fix some error handling ext4 crypto: don't let data integrity writebacks fail with ENOMEM USB: serial: ftdi_sio: add Olimex ARM-USB-TINY(H) PIDs USB: serial: ftdi_sio: fix setting latency for unprivileged users pid_ns: Fix race between setns'ed fork() and zap_pid_ns_processes() pid_ns: Sleep in TASK_INTERRUPTIBLE in zap_pid_ns_processes iio: dac: ad7303: fix channel description of: fix sparse warning in of_pci_range_parser_one proc: Fix unbalanced hard link numbers cdc-acm: fix possible invalid access when processing notification drm/nouveau/tmr: handle races with hw when updating the next alarm time drm/nouveau/tmr: avoid processing completed alarms when adding a new one drm/nouveau/tmr: fix corruption of the pending list when rescheduling an alarm drm/nouveau/tmr: ack interrupt before processing alarms drm/nouveau/therm: remove ineffective workarounds for alarm bugs drm/amdgpu: Make display watermark calculations more accurate drm/amdgpu: Avoid overflows/divide-by-zero in latency_watermark calculations. ath9k_htc: fix NULL-deref at probe ath9k_htc: Add support of AirTies 1eda:2315 AR9271 device s390/cputime: fix incorrect system time s390/kdump: Add final note regulator: tps65023: Fix inverted core enable logic. KVM: X86: Fix read out-of-bounds vulnerability in kvm pio emulation KVM: x86: Fix load damaged SSEx MXCSR register ima: accept previously set IMA_NEW_FILE mwifiex: pcie: fix cmd_buf use-after-free in remove/reset rtlwifi: rtl8821ae: setup 8812ae RFE according to device type md: update slab_cache before releasing new stripes when stripes resizing dm space map disk: fix some book keeping in the disk space map dm thin metadata: call precommit before saving the roots dm bufio: make the parameter "retain_bytes" unsigned long dm cache metadata: fail operations if fail_io mode has been established dm bufio: check new buffer allocation watermark every 30 seconds dm bufio: avoid a possible ABBA deadlock dm raid: select the Kconfig option CONFIG_MD_RAID0 dm btree: fix for dm_btree_find_lowest_key() infiniband: call ipv6 route lookup via the stub interface tpm_crb: check for bad response size ARM: tegra: paz00: Mark panel regulator as enabled on boot USB: core: replace %p with %pK char: lp: fix possible integer overflow in lp_setup() watchdog: pcwd_usb: fix NULL-deref at probe USB: ene_usb6250: fix DMA to the stack usb: misc: legousbtower: Fix memory leak usb: misc: legousbtower: Fix buffers on stack ANDROID: uid_sys_stats: defer io stats calulation for dead tasks ANDROID: AVB: Fix linter errors. ANDROID: AVB: Fix invalidate_vbmeta_submit(). ANDROID: sdcardfs: Check for NULL in revalidate Linux 4.4.69 ipmi: Fix kernel panic at ipmi_ssif_thread() wlcore: Add RX_BA_WIN_SIZE_CHANGE_EVENT event wlcore: Pass win_size taken from ieee80211_sta to FW mac80211: RX BA support for sta max_rx_aggregation_subframes mac80211: pass block ack session timeout to to driver mac80211: pass RX aggregation window size to driver Bluetooth: hci_intel: add missing tty-device sanity check Bluetooth: hci_bcm: add missing tty-device sanity check Bluetooth: Fix user channel for 32bit userspace on 64bit kernel tty: pty: Fix ldisc flush after userspace become aware of the data already serial: omap: suspend device on probe errors serial: omap: fix runtime-pm handling on unbind serial: samsung: Use right device for DMA-mapping calls arm64: KVM: Fix decoding of Rt/Rt2 when trapping AArch32 CP accesses padata: free correct variable CIFS: add misssing SFM mapping for doublequote cifs: fix CIFS_IOC_GET_MNT_INFO oops CIFS: fix mapping of SFM_SPACE and SFM_PERIOD SMB3: Work around mount failure when using SMB3 dialect to Macs Set unicode flag on cifs echo request to avoid Mac error fs/block_dev: always invalidate cleancache in invalidate_bdev() ceph: fix memory leak in __ceph_setxattr() fs/xattr.c: zero out memory copied to userspace in getxattr ext4: evict inline data when writing to memory map IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level IB/mlx4: Fix ib device initialization error flow IB/IPoIB: ibX: failed to create mcg debug file IB/core: Fix sysfs registration error flow vfio/type1: Remove locked page accounting workqueue dm era: save spacemap metadata root after the pre-commit crypto: algif_aead - Require setkey before accept(2) block: fix blk_integrity_register to use template's interval_exp if not 0 KVM: arm/arm64: fix races in kvm_psci_vcpu_on KVM: x86: fix user triggerable warning in kvm_apic_accept_events() um: Fix PTRACE_POKEUSER on x86_64 x86, pmem: Fix cache flushing for iovec write < 8 bytes selftests/x86/ldt_gdt_32: Work around a glibc sigaction() bug x86/boot: Fix BSS corruption/overwrite bug in early x86 kernel startup usb: hub: Do not attempt to autosuspend disconnected devices usb: hub: Fix error loop seen after hub communication errors usb: Make sure usb/phy/of gets built-in usb: misc: add missing continue in switch staging: comedi: jr3_pci: cope with jiffies wraparound staging: comedi: jr3_pci: fix possible null pointer dereference staging: gdm724x: gdm_mux: fix use-after-free on module unload staging: vt6656: use off stack for out buffer USB transfers. staging: vt6656: use off stack for in buffer USB transfers. USB: Proper handling of Race Condition when two USB class drivers try to call init_usb_class simultaneously USB: serial: ftdi_sio: add device ID for Microsemi/Arrow SF2PLUS Dev Kit usb: host: xhci: print correct command ring address iscsi-target: Set session_fall_back_to_erl0 when forcing reinstatement target: Convert ACL change queue_depth se_session reference usage target/fileio: Fix zero-length READ and WRITE handling target: Fix compare_and_write_callback handling for non GOOD status xen: adjust early dom0 p2m handling to xen hypervisor behavior ANDROID: AVB: Only invalidate vbmeta when told to do so. ANDROID: sdcardfs: Move top to its own struct ANDROID: lowmemorykiller: account for unevictable pages ANDROID: usb: gadget: fix NULL pointer issue in mtp_read() ANDROID: usb: f_mtp: return error code if transfer error in receive_file_work function Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org> Conflicts: drivers/usb/gadget/function/f_mtp.c fs/ext4/page-io.c net/mac80211/agg-rx.c Change-Id: Id65e75bf3bcee4114eb5d00730a9ef2444ad58eb Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
548 lines
14 KiB
C
548 lines
14 KiB
C
/*
|
|
* linux/fs/ext4/page-io.c
|
|
*
|
|
* This contains the new page_io functions for ext4
|
|
*
|
|
* Written by Theodore Ts'o, 2010.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/time.h>
|
|
#include <linux/highuid.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/quotaops.h>
|
|
#include <linux/string.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/backing-dev.h>
|
|
|
|
#include "ext4_jbd2.h"
|
|
#include "xattr.h"
|
|
#include "acl.h"
|
|
#include "ext4_ice.h"
|
|
|
|
static struct kmem_cache *io_end_cachep;
|
|
|
|
int __init ext4_init_pageio(void)
|
|
{
|
|
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
|
if (io_end_cachep == NULL)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void ext4_exit_pageio(void)
|
|
{
|
|
kmem_cache_destroy(io_end_cachep);
|
|
}
|
|
|
|
/*
|
|
* Print an buffer I/O error compatible with the fs/buffer.c. This
|
|
* provides compatibility with dmesg scrapers that look for a specific
|
|
* buffer I/O error message. We really need a unified error reporting
|
|
* structure to userspace ala Digital Unix's uerf system, but it's
|
|
* probably not going to happen in my lifetime, due to LKML politics...
|
|
*/
|
|
static void buffer_io_error(struct buffer_head *bh)
|
|
{
|
|
char b[BDEVNAME_SIZE];
|
|
printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
|
|
bdevname(bh->b_bdev, b),
|
|
(unsigned long long)bh->b_blocknr);
|
|
}
|
|
|
|
static void ext4_finish_bio(struct bio *bio)
|
|
{
|
|
int i;
|
|
struct bio_vec *bvec;
|
|
|
|
bio_for_each_segment_all(bvec, bio, i) {
|
|
struct page *page = bvec->bv_page;
|
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
|
struct page *data_page = NULL;
|
|
struct ext4_crypto_ctx *ctx = NULL;
|
|
#endif
|
|
struct buffer_head *bh, *head;
|
|
unsigned bio_start = bvec->bv_offset;
|
|
unsigned bio_end = bio_start + bvec->bv_len;
|
|
unsigned under_io = 0;
|
|
unsigned long flags;
|
|
|
|
if (!page)
|
|
continue;
|
|
|
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
|
if (!page->mapping) {
|
|
/* The bounce data pages are unmapped. */
|
|
data_page = page;
|
|
ctx = (struct ext4_crypto_ctx *)page_private(data_page);
|
|
page = ctx->w.control_page;
|
|
}
|
|
#endif
|
|
|
|
if (bio->bi_error) {
|
|
SetPageError(page);
|
|
set_bit(AS_EIO, &page->mapping->flags);
|
|
}
|
|
bh = head = page_buffers(page);
|
|
/*
|
|
* We check all buffers in the page under BH_Uptodate_Lock
|
|
* to avoid races with other end io clearing async_write flags
|
|
*/
|
|
local_irq_save(flags);
|
|
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
|
do {
|
|
if (bh_offset(bh) < bio_start ||
|
|
bh_offset(bh) + bh->b_size > bio_end) {
|
|
if (buffer_async_write(bh))
|
|
under_io++;
|
|
continue;
|
|
}
|
|
clear_buffer_async_write(bh);
|
|
if (bio->bi_error)
|
|
buffer_io_error(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
|
local_irq_restore(flags);
|
|
if (!under_io) {
|
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
|
if (ctx)
|
|
ext4_restore_control_page(data_page);
|
|
#endif
|
|
end_page_writeback(page);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void ext4_release_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
struct bio *bio, *next_bio;
|
|
|
|
BUG_ON(!list_empty(&io_end->list));
|
|
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
|
WARN_ON(io_end->handle);
|
|
|
|
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
|
|
wake_up_all(ext4_ioend_wq(io_end->inode));
|
|
|
|
for (bio = io_end->bio; bio; bio = next_bio) {
|
|
next_bio = bio->bi_private;
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
kmem_cache_free(io_end_cachep, io_end);
|
|
}
|
|
|
|
static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
|
|
{
|
|
struct inode *inode = io_end->inode;
|
|
|
|
io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
|
|
/* Wake up anyone waiting on unwritten extent conversion */
|
|
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
|
wake_up_all(ext4_ioend_wq(inode));
|
|
}
|
|
|
|
/*
|
|
* Check a range of space and convert unwritten extents to written. Note that
|
|
* we are protected from truncate touching same part of extent tree by the
|
|
* fact that truncate code waits for all DIO to finish (thus exclusion from
|
|
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
|
|
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
|
|
* completed (happens from ext4_free_ioend()).
|
|
*/
|
|
static int ext4_end_io(ext4_io_end_t *io)
|
|
{
|
|
struct inode *inode = io->inode;
|
|
loff_t offset = io->offset;
|
|
ssize_t size = io->size;
|
|
handle_t *handle = io->handle;
|
|
int ret = 0;
|
|
|
|
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
|
"list->prev 0x%p\n",
|
|
io, inode->i_ino, io->list.next, io->list.prev);
|
|
|
|
io->handle = NULL; /* Following call will use up the handle */
|
|
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
|
|
if (ret < 0) {
|
|
ext4_msg(inode->i_sb, KERN_EMERG,
|
|
"failed to convert unwritten extents to written "
|
|
"extents -- potential data loss! "
|
|
"(inode %lu, offset %llu, size %zd, error %d)",
|
|
inode->i_ino, offset, size, ret);
|
|
}
|
|
ext4_clear_io_unwritten_flag(io);
|
|
ext4_release_io_end(io);
|
|
return ret;
|
|
}
|
|
|
|
static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
|
{
|
|
#ifdef EXT4FS_DEBUG
|
|
struct list_head *cur, *before, *after;
|
|
ext4_io_end_t *io, *io0, *io1;
|
|
|
|
if (list_empty(head))
|
|
return;
|
|
|
|
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
|
|
list_for_each_entry(io, head, list) {
|
|
cur = &io->list;
|
|
before = cur->prev;
|
|
io0 = container_of(before, ext4_io_end_t, list);
|
|
after = cur->next;
|
|
io1 = container_of(after, ext4_io_end_t, list);
|
|
|
|
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
|
io, inode->i_ino, io0, io1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* Add the io_end to per-inode completed end_io list. */
|
|
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
|
{
|
|
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
|
struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
|
|
struct workqueue_struct *wq;
|
|
unsigned long flags;
|
|
|
|
/* Only reserved conversions from writeback should enter here */
|
|
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
|
WARN_ON(!io_end->handle && sbi->s_journal);
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
wq = sbi->rsv_conversion_wq;
|
|
if (list_empty(&ei->i_rsv_conversion_list))
|
|
queue_work(wq, &ei->i_rsv_conversion_work);
|
|
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
}
|
|
|
|
static int ext4_do_flush_completed_IO(struct inode *inode,
|
|
struct list_head *head)
|
|
{
|
|
ext4_io_end_t *io;
|
|
struct list_head unwritten;
|
|
unsigned long flags;
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
int err, ret = 0;
|
|
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
dump_completed_IO(inode, head);
|
|
list_replace_init(head, &unwritten);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
|
|
while (!list_empty(&unwritten)) {
|
|
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
|
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
|
list_del_init(&io->list);
|
|
|
|
err = ext4_end_io(io);
|
|
if (unlikely(!ret && err))
|
|
ret = err;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* work on completed IO, to convert unwritten extents to extents
|
|
*/
|
|
void ext4_end_io_rsv_work(struct work_struct *work)
|
|
{
|
|
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
|
|
i_rsv_conversion_work);
|
|
ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
|
|
}
|
|
|
|
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
|
{
|
|
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
|
if (io) {
|
|
atomic_inc(&EXT4_I(inode)->i_ioend_count);
|
|
io->inode = inode;
|
|
INIT_LIST_HEAD(&io->list);
|
|
atomic_set(&io->count, 1);
|
|
}
|
|
return io;
|
|
}
|
|
|
|
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
|
{
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
|
ext4_release_io_end(io_end);
|
|
return;
|
|
}
|
|
ext4_add_complete_io(io_end);
|
|
}
|
|
}
|
|
|
|
int ext4_put_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
int err = 0;
|
|
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
err = ext4_convert_unwritten_extents(io_end->handle,
|
|
io_end->inode, io_end->offset,
|
|
io_end->size);
|
|
io_end->handle = NULL;
|
|
ext4_clear_io_unwritten_flag(io_end);
|
|
}
|
|
ext4_release_io_end(io_end);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
atomic_inc(&io_end->count);
|
|
return io_end;
|
|
}
|
|
|
|
/* BIO completion function for page writeback */
|
|
static void ext4_end_bio(struct bio *bio)
|
|
{
|
|
ext4_io_end_t *io_end = bio->bi_private;
|
|
sector_t bi_sector = bio->bi_iter.bi_sector;
|
|
|
|
BUG_ON(!io_end);
|
|
bio->bi_end_io = NULL;
|
|
|
|
if (bio->bi_error) {
|
|
struct inode *inode = io_end->inode;
|
|
|
|
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
|
|
"(offset %llu size %ld starting block %llu)",
|
|
bio->bi_error, inode->i_ino,
|
|
(unsigned long long) io_end->offset,
|
|
(long) io_end->size,
|
|
(unsigned long long)
|
|
bi_sector >> (inode->i_blkbits - 9));
|
|
mapping_set_error(inode->i_mapping, bio->bi_error);
|
|
}
|
|
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
/*
|
|
* Link bio into list hanging from io_end. We have to do it
|
|
* atomically as bio completions can be racing against each
|
|
* other.
|
|
*/
|
|
bio->bi_private = xchg(&io_end->bio, bio);
|
|
ext4_put_io_end_defer(io_end);
|
|
} else {
|
|
/*
|
|
* Drop io_end reference early. Inode can get freed once
|
|
* we finish the bio.
|
|
*/
|
|
ext4_put_io_end_defer(io_end);
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
}
|
|
|
|
void ext4_io_submit(struct ext4_io_submit *io)
|
|
{
|
|
struct bio *bio = io->io_bio;
|
|
|
|
if (bio) {
|
|
int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
|
|
WRITE_SYNC : WRITE;
|
|
bio_get(io->io_bio);
|
|
submit_bio(io_op, io->io_bio);
|
|
bio_put(io->io_bio);
|
|
}
|
|
io->io_bio = NULL;
|
|
}
|
|
|
|
void ext4_io_submit_init(struct ext4_io_submit *io,
|
|
struct writeback_control *wbc)
|
|
{
|
|
io->io_wbc = wbc;
|
|
io->io_bio = NULL;
|
|
io->io_end = NULL;
|
|
}
|
|
|
|
static int io_submit_init_bio(struct ext4_io_submit *io,
|
|
struct buffer_head *bh)
|
|
{
|
|
struct bio *bio;
|
|
|
|
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
|
if (!bio)
|
|
return -ENOMEM;
|
|
wbc_init_bio(io->io_wbc, bio);
|
|
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
|
bio->bi_bdev = bh->b_bdev;
|
|
bio->bi_end_io = ext4_end_bio;
|
|
bio->bi_private = ext4_get_io_end(io->io_end);
|
|
io->io_bio = bio;
|
|
io->io_next_block = bh->b_blocknr;
|
|
return 0;
|
|
}
|
|
|
|
static int io_submit_add_bh(struct ext4_io_submit *io,
|
|
struct inode *inode,
|
|
struct page *page,
|
|
struct buffer_head *bh)
|
|
{
|
|
int ret;
|
|
|
|
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
|
|
submit_and_retry:
|
|
ext4_io_submit(io);
|
|
}
|
|
if (io->io_bio == NULL) {
|
|
ret = io_submit_init_bio(io, bh);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
|
|
if (ret != bh->b_size)
|
|
goto submit_and_retry;
|
|
wbc_account_io(io->io_wbc, page, bh->b_size);
|
|
io->io_next_block++;
|
|
return 0;
|
|
}
|
|
|
|
int ext4_bio_write_page(struct ext4_io_submit *io,
|
|
struct page *page,
|
|
int len,
|
|
struct writeback_control *wbc,
|
|
bool keep_towrite)
|
|
{
|
|
struct page *data_page = NULL;
|
|
struct inode *inode = page->mapping->host;
|
|
unsigned block_start, blocksize;
|
|
struct buffer_head *bh, *head;
|
|
int ret = 0;
|
|
int nr_submitted = 0;
|
|
int nr_to_submit = 0;
|
|
|
|
blocksize = 1 << inode->i_blkbits;
|
|
|
|
BUG_ON(!PageLocked(page));
|
|
BUG_ON(PageWriteback(page));
|
|
|
|
if (keep_towrite)
|
|
set_page_writeback_keepwrite(page);
|
|
else
|
|
set_page_writeback(page);
|
|
ClearPageError(page);
|
|
|
|
/*
|
|
* Comments copied from block_write_full_page:
|
|
*
|
|
* The page straddles i_size. It must be zeroed out on each and every
|
|
* writepage invocation because it may be mmapped. "A file is mapped
|
|
* in multiples of the page size. For a file that is not a multiple of
|
|
* the page size, the remaining memory is zeroed when mapped, and
|
|
* writes to that region are not written out to the file."
|
|
*/
|
|
if (len < PAGE_CACHE_SIZE)
|
|
zero_user_segment(page, len, PAGE_CACHE_SIZE);
|
|
/*
|
|
* In the first loop we prepare and mark buffers to submit. We have to
|
|
* mark all buffers in the page before submitting so that
|
|
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
|
|
* on the first buffer finishes and we are still working on submitting
|
|
* the second buffer.
|
|
*/
|
|
bh = head = page_buffers(page);
|
|
do {
|
|
block_start = bh_offset(bh);
|
|
if (block_start >= len) {
|
|
clear_buffer_dirty(bh);
|
|
set_buffer_uptodate(bh);
|
|
continue;
|
|
}
|
|
if (!buffer_dirty(bh) || buffer_delay(bh) ||
|
|
!buffer_mapped(bh) || buffer_unwritten(bh)) {
|
|
/* A hole? We can safely clear the dirty bit */
|
|
if (!buffer_mapped(bh))
|
|
clear_buffer_dirty(bh);
|
|
if (io->io_bio)
|
|
ext4_io_submit(io);
|
|
continue;
|
|
}
|
|
if (buffer_new(bh)) {
|
|
clear_buffer_new(bh);
|
|
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
|
|
}
|
|
set_buffer_async_write(bh);
|
|
nr_to_submit++;
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
bh = head = page_buffers(page);
|
|
|
|
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
|
|
nr_to_submit) {
|
|
gfp_t gfp_flags = GFP_NOFS;
|
|
|
|
retry_encrypt:
|
|
|
|
if (!ext4_using_hardware_encryption(inode))
|
|
data_page = ext4_encrypt(inode, page, gfp_flags);
|
|
|
|
|
|
if (IS_ERR(data_page)) {
|
|
ret = PTR_ERR(data_page);
|
|
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
|
|
if (io->io_bio) {
|
|
ext4_io_submit(io);
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
}
|
|
gfp_flags |= __GFP_NOFAIL;
|
|
goto retry_encrypt;
|
|
}
|
|
data_page = NULL;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* Now submit buffers to write */
|
|
do {
|
|
if (!buffer_async_write(bh))
|
|
continue;
|
|
ret = io_submit_add_bh(io, inode,
|
|
data_page ? data_page : page, bh);
|
|
if (ret) {
|
|
/*
|
|
* We only get here on ENOMEM. Not much else
|
|
* we can do but mark the page as dirty, and
|
|
* better luck next time.
|
|
*/
|
|
break;
|
|
}
|
|
nr_submitted++;
|
|
clear_buffer_dirty(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
/* Error stopped previous loop? Clean up buffers... */
|
|
if (ret) {
|
|
out:
|
|
if (data_page)
|
|
ext4_restore_control_page(data_page);
|
|
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
|
|
redirty_page_for_writepage(wbc, page);
|
|
do {
|
|
clear_buffer_async_write(bh);
|
|
bh = bh->b_this_page;
|
|
} while (bh != head);
|
|
}
|
|
unlock_page(page);
|
|
/* Nothing submitted - we have to end page writeback */
|
|
if (!nr_submitted)
|
|
end_page_writeback(page);
|
|
return ret;
|
|
}
|