diff --git a/block/blk-core.c b/block/blk-core.c index 33e2f62d5062..c3b113e90c96 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1720,7 +1720,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_POST_FLUSH_BARRIER | + REQ_BARRIER)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; diff --git a/block/blk-flush.c b/block/blk-flush.c index 9c423e53324a..d9a005037179 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -62,6 +62,45 @@ * The above peculiarity requires that each FLUSH/FUA request has only one * bio attached to it, which is guaranteed as they aren't allowed to be * merged in the usual way. + * + * Cache Barrier support: + * + * Cache barrier is a requests that instruct the storage devices to apply some + * ordering when writing data from the device's cache to the medium. Write + * requests arriving before a 'cache barrier' request will be written to the + * medium before write requests that will arrive after the 'cache barrier'. + * Since the barrier request is not supported by all block devices, the + * appropriate fallback is flush request. This will make sure application using + * it can relay on correct functionality without consider the specification of + * the device. + * + * If a barrier request is queued, it will follow the same path as a flush + * request. When its time to issue the request, the flush pending list will + * be scanned and if it contains only requests marked with barrier, a barrier + * request will be issued. Otherwise, if at least one flush is pending - flush + * will be issued. + * A barrier request is a flush request marked with the REQ_BARRIER flag. It + * is the LLD responsibility to test this flag if it supports the barrier + * feature and decide whether to issue a flush or a barrier request. + * + * When considering a barrier request, three sequences must be addressed: + * 1. (A)Barrier -> (B)Data, This sequence will be marked with + * WRITE_FLUSH_BARRIER or (REQ_FLUSH | REQ_BARRIER). + * This scenario will be split to a PREFLUSH and DATA and no additional + * execution phase are required. If barrier is not supported, a flush + * will be issued instead (A). + * 2. (A)Data -> (B)Barrier, This sequence will be marked with + * WRITE_POST_FLUSH_BARRIER or (REQ_POST_FLUSH_BARRIER | REQ_BARRIER). + * This request, when barrier is supported, this request will execute DATA + * and than POSTFLUSH. + * If barrier is not supported, but FUA is. The barrier may be replaced + * with DATA+FUA. + * If barrier and FUA are not supported, a flush must be issued instead of + * (B). This is similar to current FUA fallback. + * 3. (A)Barrier -> (B)Data -> (C)Barrier, This sequence will be marked with + * WRITE_ORDERED_FLUSH_BARRIER or (REQ_FLUSH | REQ_POST_FLUSH_BARRIER | + * REQ_BARRIER). This scenario is just a combination of the previous two, + * and no additional logic is required. */ #include @@ -105,8 +144,26 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) + /* + * Use post flush when: + * 1. If FUA is desired but not supported, + * 2. If post barrier is desired and supported + * 3. If post barrier is desired and not supported and FUA is + * not supported. + */ + if ((!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) || + ((fflags & REQ_BARRIER) && (rq->cmd_flags & + REQ_POST_FLUSH_BARRIER)) || + ((!(fflags & REQ_BARRIER) && !(fflags & REQ_FUA) && + (rq->cmd_flags & REQ_POST_FLUSH_BARRIER)))) policy |= REQ_FSEQ_POSTFLUSH; + /* + * If post barrier is desired and not supported but FUA is + * supported append FUA flag. + */ + if ((rq->cmd_flags & REQ_POST_FLUSH_BARRIER) && + !(fflags & REQ_BARRIER) && (fflags & REQ_FUA)) + rq->cmd_flags |= REQ_FUA; } return policy; } @@ -290,9 +347,10 @@ static void flush_end_io(struct request *flush_rq, int error) static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) { struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; - struct request *first_rq = + struct request *rq, *n, *first_rq = list_first_entry(pending, struct request, flush.list); struct request *flush_rq = fq->flush_rq; + u64 barrier_flag = REQ_BARRIER; /* C1 described at the top of this file */ if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) @@ -330,6 +388,12 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) flush_rq->cmd_type = REQ_TYPE_FS; flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; + /* Issue a barrier only if all pending flushes request it */ + list_for_each_entry_safe(rq, n, pending, flush.list) { + barrier_flag &= rq->cmd_flags; + } + flush_rq->cmd_flags |= barrier_flag; + flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -388,6 +452,8 @@ void blk_insert_flush(struct request *rq) unsigned int policy = blk_flush_policy(fflags, rq); struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); + WARN_ON((rq->cmd_flags & REQ_POST_FLUSH_BARRIER) && + !blk_rq_sectors(rq)); /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -447,20 +513,8 @@ void blk_insert_flush(struct request *rq) blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); } -/** - * blkdev_issue_flush - queue a flush - * @bdev: blockdev to issue flush for - * @gfp_mask: memory allocation flags (for bio_alloc) - * @error_sector: error sector - * - * Description: - * Issue a flush for the block device in question. Caller can supply - * room for storing the error offset in case of a flush error, if they - * wish to. If WAIT flag is not passed then caller may check only what - * request was pushed in some internal queue for later handling. - */ -int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector) +static int __blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector, int flush_type) { struct request_queue *q; struct bio *bio; @@ -485,7 +539,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_bdev = bdev; - ret = submit_bio_wait(WRITE_FLUSH, bio); + ret = submit_bio_wait(flush_type, bio); /* * The driver must store the error location in ->bi_sector, if @@ -498,6 +552,45 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio_put(bio); return ret; } + +/** + * blkdev_issue_barrier - queue a barrier + * @bdev: blockdev to issue barrier for + * @gfp_mask: memory allocation flags (for bio_alloc) + * @error_sector: error sector + * + * Description: + * If blkdev supports the barrier API, issue barrier, otherwise issue a + * flush Caller can supply room for storing the error offset in case of a + * flush error, if they wish to. If WAIT flag is not passed then caller may + * check only what request was pushed in some internal queue for later + * handling. + */ +int blkdev_issue_barrier(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector) +{ + return __blkdev_issue_flush(bdev, gfp_mask, error_sector, + WRITE_FLUSH_BARRIER); +} +EXPORT_SYMBOL(blkdev_issue_barrier); + +/** + * blkdev_issue_flush - queue a flush + * @bdev: blockdev to issue flush for + * @gfp_mask: memory allocation flags (for bio_alloc) + * @error_sector: error sector + * + * Description: + * Issue a flush for the block device in question. Caller can supply + * room for storing the error offset in case of a flush error, if they + * wish to. If WAIT flag is not passed then caller may check only what + * request was pushed in some internal queue for later handling. + */ +int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector) +{ + return __blkdev_issue_flush(bdev, gfp_mask, error_sector, WRITE_FLUSH); +} EXPORT_SYMBOL(blkdev_issue_flush); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, diff --git a/block/blk-settings.c b/block/blk-settings.c index dd4973583978..77a482b88913 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -823,20 +823,24 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment); /** * blk_queue_flush - configure queue's cache flush capability * @q: the request queue for the device - * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA | REQ_BARRIER * * Tell block layer cache flush capability of @q. If it supports * flushing, REQ_FLUSH should be set. If it supports bypassing - * write cache for individual writes, REQ_FUA should be set. + * write cache for individual writes, REQ_FUA should be set. If cache + * barrier is supported set REQ_BARRIER. */ void blk_queue_flush(struct request_queue *q, unsigned int flush) { - WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA | REQ_BARRIER)); - if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && ((flush & REQ_FUA) || + (flush & REQ_BARRIER)))) { flush &= ~REQ_FUA; + flush &= ~REQ_BARRIER; + } - q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA | REQ_BARRIER); } EXPORT_SYMBOL_GPL(blk_queue_flush); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6509574f9b95..d32400f5402b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -161,6 +161,7 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_FLUSH, /* request for cache flush */ + __REQ_POST_FLUSH_BARRIER,/* cache barrier after a data req */ __REQ_BARRIER, /* marks flush req as barrier */ /* bio only flags */ @@ -239,6 +240,7 @@ enum rq_flag_bits { #define REQ_ALLOCED (1ULL << __REQ_ALLOCED) #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) #define REQ_FLUSH (1ULL << __REQ_FLUSH) +#define REQ_POST_FLUSH_BARRIER (1ULL << __REQ_POST_FLUSH_BARRIER) #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c70e3588a48c..0adfa9e76f64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1118,6 +1118,7 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, #define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); +extern int blkdev_issue_barrier(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa514254161..605454ccaa0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -202,8 +202,15 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT (WRITE | REQ_SYNC) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) +#define WRITE_FLUSH_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | \ + REQ_BARRIER) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define WRITE_POST_FLUSH_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | \ + REQ_POST_FLUSH_BARRIER | REQ_BARRIER) +#define WRITE_ORDERED_FLUSH_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | \ + REQ_FLUSH | REQ_POST_FLUSH_BARRIER | \ + REQ_BARRIER) /* * Attribute flags. These should be or-ed together to figure out what