From 393737a28b63836b3f12613b1c031e6f32cc3476 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:49 +0100 Subject: [PATCH 1/5] block: Unhash block device inodes on gendisk destruction Currently, block device inodes stay around after corresponding gendisk hash died until memory reclaim finds them and frees them. Since we will make block device inode pin the bdi, we want to free the block device inode as soon as the device goes away so that bdi does not stay around unnecessarily. Furthermore we need to avoid issues when new device with the same major,minor pair gets created since reusing the bdi structure would be rather difficult in this case. Unhashing block device inode on gendisk destruction nicely deals with these problems. Once last block device inode reference is dropped (which may be directly in del_gendisk()), the inode gets evicted. Furthermore if the major,minor pair gets reallocated, we are guaranteed to get new block device inode even if old block device inode is not yet evicted and thus we avoid issues with possible reuse of bdi. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Change-Id: I41087d3bc818c4e58e0b4e20876e136ec1cbb07b Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git Git-commit: f44f1ab5a2dcd4e16eab850fd08e40ff2d0c28d4 [riteshh@codeaurora.org: resolved merge conflicts] Signed-off-by: Ritesh Harjani --- block/genhd.c | 2 ++ fs/block_dev.c | 15 +++++++++++++++ include/linux/fs.h | 1 + 3 files changed, 18 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index de2a6162e3f3..214165c9e52a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -646,6 +646,8 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { + bdev_unhash_inode(MKDEV(disk->major, + disk->first_minor + part->partno)); invalidate_partition(disk, part->partno); delete_partition(disk, part->partno); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 26bbaaefdff4..d180449f07ce 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -623,6 +623,21 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); +/* + * If there is a bdev inode for this device, unhash it so that it gets evicted + * as soon as last inode reference is dropped. + */ +void bdev_unhash_inode(dev_t dev) +{ + struct inode *inode; + + inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); + if (inode) { + remove_inode_hash(inode); + iput(inode); + } +} + struct block_device *bdget(dev_t dev) { struct block_device *bdev; diff --git a/include/linux/fs.h b/include/linux/fs.h index c6de40668e08..82c5da84efe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2331,6 +2331,7 @@ extern struct kmem_cache *names_cachep; #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); +extern void bdev_unhash_inode(dev_t dev); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); From 1c6dd645345c75a6cafc6fc0ab41c60c7f381e99 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:50 +0100 Subject: [PATCH 2/5] block: Use pointer to backing_dev_info from request_queue We will want to have struct backing_dev_info allocated separately from struct request_queue. As the first step add pointer to backing_dev_info to request_queue and convert all users touching it. No functional changes in this patch. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Change-Id: I77fbb181de7e39c83fbfba8cfb128d6ace161f31 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git Git-commit: 97419acd22a0bacc52dbc34d5bbc96d315e48acb [riteshh@codeaurora.org: resolved merge conflicts] Signed-off-by: Ritesh Harjani --- block/blk-cgroup.c | 6 +++--- block/blk-core.c | 25 +++++++++++++------------ block/blk-integrity.c | 4 ++-- block/blk-sysfs.c | 6 +++--- block/genhd.c | 4 ++-- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 8 ++++---- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/pktcdvd.c | 4 ++-- drivers/block/rbd.c | 2 +- drivers/md/bcache/request.c | 10 +++++----- drivers/md/bcache/super.c | 8 ++++---- drivers/md/dm-cache-target.c | 2 +- drivers/md/dm-era-target.c | 2 +- drivers/md/dm-table.c | 2 +- drivers/md/dm-thin.c | 2 +- drivers/md/dm.c | 6 +++--- drivers/md/linear.c | 2 +- drivers/md/md.c | 6 +++--- drivers/md/multipath.c | 2 +- drivers/md/raid0.c | 6 +++--- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 10 +++++----- drivers/md/raid5.c | 12 ++++++------ fs/gfs2/ops_fstype.c | 2 +- fs/nilfs2/super.c | 2 +- fs/super.c | 2 +- include/linux/blkdev.h | 3 ++- mm/page-writeback.c | 4 ++-- 31 files changed, 81 insertions(+), 79 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8161090a1970..37ade035c956 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -184,7 +184,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, goto err_free_blkg; } - wb_congested = wb_congested_get_create(&q->backing_dev_info, + wb_congested = wb_congested_get_create(q->backing_dev_info, blkcg->css.id, GFP_NOWAIT); if (!wb_congested) { ret = -ENOMEM; @@ -468,8 +468,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ - if (blkg->q->backing_dev_info.dev) - return dev_name(blkg->q->backing_dev_info.dev); + if (blkg->q->backing_dev_info->dev) + return dev_name(blkg->q->backing_dev_info->dev); return NULL; } EXPORT_SYMBOL_GPL(blkg_dev_name); diff --git a/block/blk-core.c b/block/blk-core.c index 1bc9f212b3a2..8248d21f4616 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -87,7 +87,7 @@ static void blk_clear_congested(struct request_list *rl, int sync) * flip its congestion state for events on other blkcgs. */ if (rl == &rl->q->root_rl) - clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -98,7 +98,7 @@ static void blk_set_congested(struct request_list *rl, int sync) #else /* see blk_clear_congested() */ if (rl == &rl->q->root_rl) - set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -129,7 +129,7 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - return &q->backing_dev_info; + return q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); @@ -597,7 +597,7 @@ void blk_cleanup_queue(struct request_queue *q) blk_flush_integrity(); /* @q won't process any more request, flush async actions */ - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); if (q->mq_ops) @@ -712,17 +712,18 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q->bio_split) goto fail_id; - q->backing_dev_info.ra_pages = + q->backing_dev_info = &q->_backing_dev_info; + q->backing_dev_info->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; - q->backing_dev_info.name = "block"; + q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; + q->backing_dev_info->name = "block"; q->node = node_id; - err = bdi_init(&q->backing_dev_info); + err = bdi_init(q->backing_dev_info); if (err) goto fail_split; - setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, + setup_timer(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->queue_head); @@ -772,7 +773,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) fail_ref: percpu_ref_exit(&q->q_usage_counter); fail_bdi: - bdi_destroy(&q->backing_dev_info); + bdi_destroy(q->backing_dev_info); fail_split: bioset_free(q->bio_split); fail_id: @@ -1195,7 +1196,7 @@ fail_elvpriv: * disturb iosched and blkcg but weird is bettern than dead. */ printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", - __func__, dev_name(q->backing_dev_info.dev)); + __func__, dev_name(q->backing_dev_info->dev)); rq->cmd_flags &= ~REQ_ELVPRIV; rq->elv.icq = NULL; @@ -3251,7 +3252,7 @@ void blk_finish_request(struct request *req, int error) BUG_ON(blk_queued_rq(req)); if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) - laptop_io_completion(&req->q->backing_dev_info); + laptop_io_completion(req->q->backing_dev_info); blk_delete_timer(req); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 478f572cb1e7..e4ebd79de679 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -418,7 +418,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; - disk->queue->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; } EXPORT_SYMBOL(blk_integrity_register); @@ -431,7 +431,7 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { - disk->queue->backing_dev_info.capabilities &= ~BDI_CAP_STABLE_WRITES; + disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e140cc487ce1..8e1e284fa159 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -75,7 +75,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_ra_show(struct request_queue *q, char *page) { - unsigned long ra_kb = q->backing_dev_info.ra_pages << + unsigned long ra_kb = q->backing_dev_info->ra_pages << (PAGE_CACHE_SHIFT - 10); return queue_var_show(ra_kb, (page)); @@ -90,7 +90,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) if (ret < 0) return ret; - q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); + q->backing_dev_info->ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); return ret; } @@ -578,7 +578,7 @@ static void blk_release_queue(struct kobject *kobj) struct request_queue *q = container_of(kobj, struct request_queue, kobj); - bdi_exit(&q->backing_dev_info); + bdi_exit(q->backing_dev_info); blkcg_exit_queue(q); if (q->elevator) { diff --git a/block/genhd.c b/block/genhd.c index 214165c9e52a..53a931b30d78 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -611,7 +611,7 @@ void add_disk(struct gendisk *disk) disk_alloc_events(disk); /* Register BDI before referencing it from bdev */ - bdi = &disk->queue->backing_dev_info; + bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); blk_register_region(disk_devt(disk), disk->minors, NULL, @@ -663,7 +663,7 @@ void del_gendisk(struct gendisk *disk) * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs). */ - bdi_unregister(&disk->queue->backing_dev_info); + bdi_unregister(disk->queue->backing_dev_info); blk_unregister_queue(disk); } else { WARN_ON(1); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index dd73e1ff1759..aadab0381e0d 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -396,8 +396,8 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info.name = "aoe"; - q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; + q->backing_dev_info->name = "aoe"; + q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1d58854c4a9f..1f9c77609dd1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2393,7 +2393,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) if (get_ldev(device)) { q = bdev_get_queue(device->ldev->backing_bdev); - r = bdi_congested(&q->backing_dev_info, bdi_bits); + r = bdi_congested(q->backing_dev_info, bdi_bits); put_ldev(device); if (r) reason = 'b'; @@ -2765,8 +2765,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* we have no partitions. we contain only ourselves. */ device->this_bdev->bd_contains = device->this_bdev; - q->backing_dev_info.congested_fn = drbd_congested; - q->backing_dev_info.congested_data = device; + q->backing_dev_info->congested_fn = drbd_congested; + q->backing_dev_info->congested_data = device; blk_queue_make_request(q, drbd_make_request); blk_queue_flush(q, REQ_FLUSH | REQ_FUA); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e80cbefbc2b5..ef03cb25f5bf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1170,11 +1170,11 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi blk_queue_stack_limits(q, b); - if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + if (q->backing_dev_info->ra_pages != b->backing_dev_info->ra_pages) { drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info.ra_pages, - b->backing_dev_info.ra_pages); - q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + q->backing_dev_info->ra_pages, + b->backing_dev_info->ra_pages); + q->backing_dev_info->ra_pages = b->backing_dev_info->ra_pages; } } } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 3b10fa6cb039..7a6b9f3e1a9f 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -288,7 +288,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { /* reset device->congestion_reason */ - bdi_rw_congested(&device->rq_queue->backing_dev_info); + bdi_rw_congested(device->rq_queue->backing_dev_info); nc = rcu_dereference(first_peer_device(device)->connection->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3ae2c0086563..17ae4e1ab358 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -937,7 +937,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se switch (rbm) { case RB_CONGESTED_REMOTE: - bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; return bdi_read_congested(bdi); case RB_LEAST_PENDING: return atomic_read(&device->local_cnt) > diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d06c62eccdf0..f018318d4466 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1276,7 +1276,7 @@ try_next_bio: && pd->bio_queue_size <= pd->write_congestion_off); spin_unlock(&pd->lock); if (wakeup) { - clear_bdi_congested(&pd->disk->queue->backing_dev_info, + clear_bdi_congested(pd->disk->queue->backing_dev_info, BLK_RW_ASYNC); } @@ -2405,7 +2405,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC); do { spin_unlock(&pd->lock); congestion_wait(BLK_RW_ASYNC, HZ); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index fbdddd6f94b8..55a8671f1979 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3780,7 +3780,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_zeroes_data = 1; if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; disk->queue = q; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6c4c7caea693..0ee41fd9d850 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1014,7 +1014,7 @@ static int cached_dev_congested(void *data, int bits) struct request_queue *q = bdev_get_queue(dc->bdev); int ret = 0; - if (bdi_congested(&q->backing_dev_info, bits)) + if (bdi_congested(q->backing_dev_info, bits)) return 1; if (cached_dev_get(dc)) { @@ -1023,7 +1023,7 @@ static int cached_dev_congested(void *data, int bits) for_each_cache(ca, d->c, i) { q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } cached_dev_put(dc); @@ -1037,7 +1037,7 @@ void bch_cached_dev_request_init(struct cached_dev *dc) struct gendisk *g = dc->disk.disk; g->queue->make_request_fn = cached_dev_make_request; - g->queue->backing_dev_info.congested_fn = cached_dev_congested; + g->queue->backing_dev_info->congested_fn = cached_dev_congested; dc->disk.cache_miss = cached_dev_cache_miss; dc->disk.ioctl = cached_dev_ioctl; } @@ -1130,7 +1130,7 @@ static int flash_dev_congested(void *data, int bits) for_each_cache(ca, d->c, i) { q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } return ret; @@ -1141,7 +1141,7 @@ void bch_flash_dev_request_init(struct bcache_device *d) struct gendisk *g = d->disk; g->queue->make_request_fn = flash_dev_make_request; - g->queue->backing_dev_info.congested_fn = flash_dev_congested; + g->queue->backing_dev_info->congested_fn = flash_dev_congested; d->cache_miss = flash_dev_cache_miss; d->ioctl = flash_dev_ioctl; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c5ceea9222ff..13acf48c5210 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -802,7 +802,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, blk_queue_make_request(q, NULL); d->disk->queue = q; q->queuedata = d; - q->backing_dev_info.congested_data = d; + q->backing_dev_info->congested_data = d; q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; @@ -1129,9 +1129,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) set_capacity(dc->disk.disk, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); - dc->disk.disk->queue->backing_dev_info.ra_pages = - max(dc->disk.disk->queue->backing_dev_info.ra_pages, - q->backing_dev_info.ra_pages); + dc->disk.disk->queue->backing_dev_info->ra_pages = + max(dc->disk.disk->queue->backing_dev_info->ra_pages, + q->backing_dev_info->ra_pages); bch_cached_dev_request_init(dc); bch_cached_dev_writeback_init(dc); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 0da5efaad85c..54e50fc908e9 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2288,7 +2288,7 @@ static void do_waker(struct work_struct *ws) static int is_congested(struct dm_dev *dev, int bdi_bits) { struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 32e76c5ee741..11c52567304f 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1379,7 +1379,7 @@ static void stop_worker(struct era *era) static int dev_is_congested(struct dm_dev *dev, int bdi_bits) { struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b3d78bba3a79..9411deaaddf9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1660,7 +1660,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) char b[BDEVNAME_SIZE]; if (likely(q)) - r |= bdi_congested(&q->backing_dev_info, bdi_bits); + r |= bdi_congested(q->backing_dev_info, bdi_bits); else DMWARN_LIMIT("%s: any_congested: nonexistent device %s", dm_device_name(t->md), diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a1cc797fe88f..5f1a943d9e81 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2634,7 +2634,7 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) return 1; q = bdev_get_queue(pt->data_dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static void requeue_bios(struct pool *pool) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1a7b11d57256..47ac131099d9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2220,7 +2220,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) * the query about congestion status of request_queue */ if (dm_request_based(md)) - r = md->queue->backing_dev_info.wb.state & + r = md->queue->backing_dev_info->wb.state & bdi_bits; else r = dm_table_any_congested(map, bdi_bits); @@ -2302,7 +2302,7 @@ static void dm_init_md_queue(struct mapped_device *md) * - must do so here (in alloc_dev callchain) before queue is used */ md->queue->queuedata = md; - md->queue->backing_dev_info.congested_data = md; + md->queue->backing_dev_info->congested_data = md; } static void dm_init_old_md_queue(struct mapped_device *md) @@ -2313,7 +2313,7 @@ static void dm_init_old_md_queue(struct mapped_device *md) /* * Initialize aspects of queue that aren't relevant for blk-mq */ - md->queue->backing_dev_info.congested_fn = dm_any_congested; + md->queue->backing_dev_info->congested_fn = dm_any_congested; blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); } diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 6ba3227e29b2..79223dceb1c2 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -68,7 +68,7 @@ static int linear_congested(struct mddev *mddev, int bits) for (i = 0; i < conf->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } rcu_read_unlock(); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a856cb181e9..1cd819202553 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5284,8 +5284,8 @@ int md_run(struct mddev *mddev) return err; } if (mddev->queue) { - mddev->queue->backing_dev_info.congested_data = mddev; - mddev->queue->backing_dev_info.congested_fn = md_congested; + mddev->queue->backing_dev_info->congested_data = mddev; + mddev->queue->backing_dev_info->congested_fn = md_congested; } if (pers->sync_request) { if (mddev->kobj.sd && @@ -5642,7 +5642,7 @@ static int do_md_stop(struct mddev *mddev, int mode, __md_stop_writes(mddev); __md_stop(mddev); - mddev->queue->backing_dev_info.congested_fn = NULL; + mddev->queue->backing_dev_info->congested_fn = NULL; /* tell userspace to handle 'inactive' */ sysfs_notify_dirent_safe(mddev->sysfs_state); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index dd483bb2e111..fb03ed86d57a 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -166,7 +166,7 @@ static int multipath_congested(struct mddev *mddev, int bits) if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); /* Just like multipath_map, we just check the * first available device */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f8e5db0cb5aa..7a67e7dcf546 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -35,7 +35,7 @@ static int raid0_congested(struct mddev *mddev, int bits) for (i = 0; i < raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(devlist[i]->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } return ret; } @@ -415,8 +415,8 @@ static int raid0_run(struct mddev *mddev) */ int stripe = mddev->raid_disks * (mddev->chunk_sectors << 9) / PAGE_SIZE; - if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) - mddev->queue->backing_dev_info.ra_pages = 2* stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) + mddev->queue->backing_dev_info->ra_pages = 2* stripe; } dump_zones(mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f24a9e14021d..a3ec3c5a8ee9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -730,9 +730,9 @@ static int raid1_congested(struct mddev *mddev, int bits) * non-congested targets, it can be removed */ if ((bits & (1 << WB_async_congested)) || 1) - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); else - ret &= bdi_congested(&q->backing_dev_info, bits); + ret &= bdi_congested(q->backing_dev_info, bits); } } rcu_read_unlock(); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e5ee4e9e0ea5..186d753b7fdb 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -838,7 +838,7 @@ static int raid10_congested(struct mddev *mddev, int bits) if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } } rcu_read_unlock(); @@ -3698,8 +3698,8 @@ static int run(struct mddev *mddev) * maybe... */ stripe /= conf->geo.near_copies; - if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } if (md_integrity_register(mddev)) @@ -4493,8 +4493,8 @@ static void end_reshape(struct r10conf *conf) int stripe = conf->geo.raid_disks * ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->geo.near_copies; - if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5eac08ffc697..165da5b94999 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6123,10 +6123,10 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info.capabilities |= + mddev->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else - mddev->queue->backing_dev_info.capabilities &= + mddev->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; mddev_resume(mddev); } @@ -6970,8 +6970,8 @@ static int run(struct mddev *mddev) int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + mddev->queue->backing_dev_info->ra_pages = 2 * stripe; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -7552,8 +7552,8 @@ static void end_reshape(struct r5conf *conf) int data_disks = conf->raid_disks - conf->max_degraded; int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); - if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } } } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index baab99b69d8a..de3e91817228 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1222,7 +1222,7 @@ static int set_gfs2_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 354013ea22ec..67c6c650b21e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1079,7 +1079,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/super.c b/fs/super.c index c96434ea71e2..cbd4fab271d4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -968,7 +968,7 @@ static int set_bdev_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae64a897622c..74f53ea5c267 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -332,7 +332,8 @@ struct request_queue { */ struct delayed_work delay_work; - struct backing_dev_info backing_dev_info; + struct backing_dev_info *backing_dev_info; + struct backing_dev_info _backing_dev_info; /* * The queue owner gets to use this for whatever they like. diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 15cb026ef807..29597fe6bb35 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1993,11 +1993,11 @@ void laptop_mode_timer_fn(unsigned long data) * We want to write everything out, not just down to the dirty * threshold */ - if (!bdi_has_dirty_io(&q->backing_dev_info)) + if (!bdi_has_dirty_io(q->backing_dev_info)) return; rcu_read_lock(); - list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node) + list_for_each_entry_rcu(wb, &q->backing_dev_info->wb_list, bdi_node) if (wb_has_dirty_io(wb)) wb_start_writeback(wb, nr_pages, true, WB_REASON_LAPTOP_TIMER); From b56ec58cde8aa78d30f0af2f71c5ffcbe5def4ce Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:51 +0100 Subject: [PATCH 3/5] block: Dynamically allocate and refcount backing_dev_info Instead of storing backing_dev_info inside struct request_queue, allocate it dynamically, reference count it, and free it when the last reference is dropped. Currently only request_queue holds the reference but in the following patch we add other users referencing backing_dev_info. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Change-Id: Ibcee7b4c014018f9243cd3edbfd9c4a8877c3862 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git Git-commit: d03f6cdc1fc422accb734c7c07a661a0018d8631 [riteshh@codeaurora.org: resolved merge conflicts] Signed-off-by: Ritesh Harjani --- block/blk-core.c | 12 +++++------ block/blk-sysfs.c | 2 +- include/linux/backing-dev-defs.h | 2 ++ include/linux/backing-dev.h | 10 +++++++++- include/linux/blkdev.h | 1 - mm/backing-dev.c | 34 +++++++++++++++++++++++++++++++- 6 files changed, 50 insertions(+), 11 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 8248d21f4616..1c510b2b2549 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -697,7 +697,6 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { struct request_queue *q; - int err; q = kmem_cache_alloc_node(blk_requestq_cachep, gfp_mask | __GFP_ZERO, node_id); @@ -712,17 +711,16 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q->bio_split) goto fail_id; - q->backing_dev_info = &q->_backing_dev_info; + q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); + if (!q->backing_dev_info) + goto fail_split; + q->backing_dev_info->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->backing_dev_info->name = "block"; q->node = node_id; - err = bdi_init(q->backing_dev_info); - if (err) - goto fail_split; - setup_timer(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); @@ -773,7 +771,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) fail_ref: percpu_ref_exit(&q->q_usage_counter); fail_bdi: - bdi_destroy(q->backing_dev_info); + bdi_put(q->backing_dev_info); fail_split: bioset_free(q->bio_split); fail_id: diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8e1e284fa159..a113dc1e3eb7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -578,7 +578,7 @@ static void blk_release_queue(struct kobject *kobj) struct request_queue *q = container_of(kobj, struct request_queue, kobj); - bdi_exit(q->backing_dev_info); + bdi_put(q->backing_dev_info); blkcg_exit_queue(q); if (q->elevator) { diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 140c29635069..c104d4aed62a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -10,6 +10,7 @@ #include #include #include +#include struct page; struct device; @@ -141,6 +142,7 @@ struct backing_dev_info { void *congested_data; /* Pointer to aux data for congested func */ char *name; + struct kref refcnt; /* Reference counter for the structure */ unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 89d3de3e096b..125bc67319b4 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,7 +18,14 @@ #include int __must_check bdi_init(struct backing_dev_info *bdi); -void bdi_exit(struct backing_dev_info *bdi); + +static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) +{ + kref_get(&bdi->refcnt); + return bdi; +} + +void bdi_put(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -29,6 +36,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_destroy(struct backing_dev_info *bdi); +struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 74f53ea5c267..8150e164385c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -333,7 +333,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct backing_dev_info _backing_dev_info; /* * The queue owner gets to use this for whatever they like. diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a988d4ef39da..4c480a20d76c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -237,6 +237,7 @@ static __init int bdi_class_init(void) bdi_class->dev_groups = bdi_dev_groups; bdi_debug_init(); + return 0; } postcore_initcall(bdi_class_init); @@ -780,6 +781,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; + kref_init(&bdi->refcnt); bdi->min_ratio = 0; bdi->max_ratio = 100; bdi->max_prop_frac = FPROP_FRAC_BASE; @@ -795,6 +797,22 @@ int bdi_init(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_init); +struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) +{ + struct backing_dev_info *bdi; + + bdi = kmalloc_node(sizeof(struct backing_dev_info), + gfp_mask | __GFP_ZERO, node_id); + if (!bdi) + return NULL; + + if (bdi_init(bdi)) { + kfree(bdi); + return NULL; + } + return bdi; +} + int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { @@ -875,12 +893,26 @@ void bdi_unregister(struct backing_dev_info *bdi) } } -void bdi_exit(struct backing_dev_info *bdi) +static void bdi_exit(struct backing_dev_info *bdi) { WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); } +static void release_bdi(struct kref *ref) +{ + struct backing_dev_info *bdi = + container_of(ref, struct backing_dev_info, refcnt); + + bdi_exit(bdi); + kfree(bdi); +} + +void bdi_put(struct backing_dev_info *bdi) +{ + kref_put(&bdi->refcnt, release_bdi); +} + void bdi_destroy(struct backing_dev_info *bdi) { bdi_unregister(bdi); From 9c197e78d2888e51936f5794ae9356ef42aca160 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:52 +0100 Subject: [PATCH 4/5] block: Make blk_get_backing_dev_info() safe without open bdev Currenly blk_get_backing_dev_info() is not safe to be called when the block device is not open as bdev->bd_disk is NULL in that case. However inode_to_bdi() uses this function and may be call called from flusher worker or other writeback related functions without bdev being open which leads to crashes such as: [113031.075540] Unable to handle kernel paging request for data at address 0x00000000 [113031.075614] Faulting instruction address: 0xc0000000003692e0 0:mon> t [c0000000fb65f900] c00000000036cb6c writeback_sb_inodes+0x30c/0x590 [c0000000fb65fa10] c00000000036ced4 __writeback_inodes_wb+0xe4/0x150 [c0000000fb65fa70] c00000000036d33c wb_writeback+0x30c/0x450 [c0000000fb65fb40] c00000000036e198 wb_workfn+0x268/0x580 [c0000000fb65fc50] c0000000000f3470 process_one_work+0x1e0/0x590 [c0000000fb65fce0] c0000000000f38c8 worker_thread+0xa8/0x660 [c0000000fb65fd80] c0000000000fc4b0 kthread+0x110/0x130 [c0000000fb65fe30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Jens Axboe Change-Id: I26955b919bd05fe34dc60aab1797ea2739ad5fd7 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git Git-commit: b1d2dc5659b41741f5a29b2ade76ffb4e5bb13d8 [riteshh@codeaurora.org: resolved merge conflicts] Signed-off-by: Ritesh Harjani --- block/blk-core.c | 8 +++----- fs/block_dev.c | 8 ++++++++ include/linux/fs.h | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1c510b2b2549..56652cd209db 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -122,14 +122,12 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info. This function can only be called if @bdev is opened - * and the return value is never NULL. + * backing_dev_info. The return value is never NULL however we may return + * &noop_backing_dev_info if the bdev is not currently open. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - return q->backing_dev_info; + return bdev->bd_bdi; } EXPORT_SYMBOL(blk_get_backing_dev_info); diff --git a/fs/block_dev.c b/fs/block_dev.c index d180449f07ce..49533c903467 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -557,6 +557,8 @@ static void bdev_evict_inode(struct inode *inode) } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + if (bdev->bd_bdi != &noop_backing_dev_info) + bdi_put(bdev->bd_bdi); } static const struct super_operations bdev_sops = { @@ -655,6 +657,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; + bdev->bd_bdi = &noop_backing_dev_info; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; @@ -1216,6 +1219,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; @@ -1317,6 +1323,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; + bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 82c5da84efe4..d5264dcaaa26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -481,6 +481,7 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; + struct backing_dev_info *bd_bdi; struct list_head bd_list; /* * Private data. You must have bd_claim'ed the block_device From 16d289189c40a69737bbb59ea6520ab4dec205da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Mar 2017 16:50:13 +0100 Subject: [PATCH 5/5] block: Initialize bd_bdi on inode initialization So far we initialized bd_bdi only in bdget(). That is fine for normal bdev inodes however for the special case of the root inode of blockdev_superblock that function is never called and thus bd_bdi is left uninitialized. As a result bdev_evict_inode() may oops doing bdi_put(root->bd_bdi) on that inode as can be seen when doing: mount -t bdev none /mnt Fix the problem by initializing bd_bdi when first allocating the inode and then reinitializing bd_bdi in bdev_evict_inode(). Thanks to syzkaller team for finding the problem. Reported-by: Dmitry Vyukov Fixes: b1d2dc5659b4 ("block: Make blk_get_backing_dev_info() safe without open bdev") Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Change-Id: I9c34ca321ab311936946187ed21e25d67caf5ba5 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git Git-commit: 7ee143f46cb1f58ee07194076b9a3e6ea3f27ad3 [riteshh@codeaurora.org: resolved merge conflicts] Signed-off-by: Ritesh Harjani --- fs/block_dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 49533c903467..d3c296d4eb25 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -532,6 +532,7 @@ static void init_once(void *foo) #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif + bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -557,8 +558,10 @@ static void bdev_evict_inode(struct inode *inode) } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); - if (bdev->bd_bdi != &noop_backing_dev_info) + if (bdev->bd_bdi != &noop_backing_dev_info) { bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; + } } static const struct super_operations bdev_sops = { @@ -657,7 +660,6 @@ struct block_device *bdget(dev_t dev) bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; - bdev->bd_bdi = &noop_backing_dev_info; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; bdev->bd_invalidated = 0;