From 014929f975fab25e65776cd2b162e667644ba044 Mon Sep 17 00:00:00 2001 From: Venkat Gopalakrishnan Date: Thu, 1 May 2014 19:15:23 -0700 Subject: [PATCH] block/fs: keep track of the task that dirtied the page Background writes happen in the context of a background thread. It is very useful to identify the actual task that generated the request instead of background task that submited the request. Hence keep track of the task when a page gets dirtied and dump this task info while tracing. Not all the pages in the bio are dirtied by the same task but most likely it will be, since the sectors accessed on the device must be adjacent. Change-Id: I6afba85a2063dd3350a0141ba87cf8440ce9f777 Signed-off-by: Venkat Gopalakrishnan [venkatg@codeaurora.org: Fixed trivial merge conflicts] Signed-off-by: Venkat Gopalakrishnan --- block/blk-core.c | 13 ++++++- fs/buffer.c | 2 + include/linux/mm_types.h | 2 + kernel/trace/blktrace.c | 83 +++++++++++++++++++++++++++++++++------- 4 files changed, 85 insertions(+), 15 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 0e65201d9633..b5f890e8caab 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2093,6 +2093,7 @@ EXPORT_SYMBOL(generic_make_request); */ blk_qc_t submit_bio(int rw, struct bio *bio) { + struct task_struct *tsk = current; bio->bi_rw |= rw; /* @@ -2116,8 +2117,18 @@ blk_qc_t submit_bio(int rw, struct bio *bio) if (unlikely(block_dump)) { char b[BDEVNAME_SIZE]; + + /* + * Not all the pages in the bio are dirtied by the + * same task but most likely it will be, since the + * sectors accessed on the device must be adjacent. + */ + if (bio->bi_io_vec && bio->bi_io_vec->bv_page && + bio->bi_io_vec->bv_page->tsk_dirty) + tsk = bio->bi_io_vec->bv_page->tsk_dirty; + printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", - current->comm, task_pid_nr(current), + tsk->comm, task_pid_nr(tsk), (rw & WRITE) ? "WRITE" : "READ", (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), diff --git a/fs/buffer.c b/fs/buffer.c index 4f4cd959da7c..06fc75159808 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -641,6 +641,8 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, account_page_dirtied(page, mapping, memcg); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + /* Save the task that is dirtying this page */ + page->tsk_dirty = current; } spin_unlock_irqrestore(&mapping->tree_lock, flags); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0a732c5e0de1..9751b9354d61 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -207,6 +207,8 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ + struct task_struct *tsk_dirty; /* task that sets this page dirty */ + #ifdef CONFIG_KMEMCHECK /* * kmemcheck wants to track the status of each byte in a page; this diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a990824c8604..0386cf742a7a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -199,9 +199,9 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, - int rw, u32 what, int error, int pdu_len, void *pdu_data) + int rw, u32 what, int error, int pdu_len, + void *pdu_data, struct task_struct *tsk) { - struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; struct ring_buffer *buffer = NULL; struct blk_io_trace *t; @@ -708,18 +708,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, unsigned int nr_bytes, u32 what) { struct blk_trace *bt = q->blk_trace; + struct task_struct *tsk = current; if (likely(!bt)) return; + /* + * Use the bio context for all events except ISSUE and + * COMPLETE events. + * + * Not all the pages in the bio are dirtied by the same task but + * most likely it will be, since the sectors accessed on the device + * must be adjacent. + */ + if (!((what == BLK_TA_ISSUE) || (what == BLK_TA_COMPLETE)) && + bio_has_data(rq->bio) && rq->bio->bi_io_vec && + rq->bio->bi_io_vec->bv_page && + rq->bio->bi_io_vec->bv_page->tsk_dirty) + tsk = rq->bio->bi_io_vec->bv_page->tsk_dirty; + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags, - what, rq->errors, rq->cmd_len, rq->cmd); + what, rq->errors, rq->cmd_len, rq->cmd, tsk); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, - rq->cmd_flags, what, rq->errors, 0, NULL); + rq->cmd_flags, what, rq->errors, 0, NULL, tsk); } } @@ -771,12 +786,25 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { struct blk_trace *bt = q->blk_trace; + struct task_struct *tsk = current; if (likely(!bt)) return; + /* + * Not all the pages in the bio are dirtied by the same task but + * most likely it will be, since the sectors accessed on the device + * must be adjacent. + */ + if (bio_has_data(bio) && bio->bi_io_vec && bio->bi_io_vec->bv_page && + bio->bi_io_vec->bv_page->tsk_dirty) + tsk = bio->bi_io_vec->bv_page->tsk_dirty; + + if (!error && !bio_flagged(bio, BIO_UPTODATE)) + error = EIO; + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio->bi_rw, what, error, 0, NULL); + bio->bi_rw, what, error, 0, NULL, tsk); } static void blk_add_trace_bio_bounce(void *ignore, @@ -824,7 +852,8 @@ static void blk_add_trace_getrq(void *ignore, struct blk_trace *bt = q->blk_trace; if (bt) - __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); + __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, + NULL, current); } } @@ -840,7 +869,7 @@ static void blk_add_trace_sleeprq(void *ignore, if (bt) __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, - 0, 0, NULL); + 0, 0, NULL, current); } } @@ -849,7 +878,8 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) struct blk_trace *bt = q->blk_trace; if (bt) - __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, + current); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, @@ -866,7 +896,8 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, else what = BLK_TA_UNPLUG_TIMER; - __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, + current); } } @@ -875,13 +906,19 @@ static void blk_add_trace_split(void *ignore, unsigned int pdu) { struct blk_trace *bt = q->blk_trace; + struct task_struct *tsk = current; if (bt) { __be64 rpdu = cpu_to_be64(pdu); + if (bio_has_data(bio) && bio->bi_io_vec && + bio->bi_io_vec->bv_page && + bio->bi_io_vec->bv_page->tsk_dirty) + tsk = bio->bi_io_vec->bv_page->tsk_dirty; + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, - bio->bi_error, sizeof(rpdu), &rpdu); + bio->bi_error, sizeof(rpdu), &rpdu, tsk); } } @@ -904,6 +941,7 @@ static void blk_add_trace_bio_remap(void *ignore, { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; + struct task_struct *tsk = current; if (likely(!bt)) return; @@ -912,9 +950,14 @@ static void blk_add_trace_bio_remap(void *ignore, r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector_from = cpu_to_be64(from); + if (bio_has_data(bio) && bio->bi_io_vec && + bio->bi_io_vec->bv_page && + bio->bi_io_vec->bv_page->tsk_dirty) + tsk = bio->bi_io_vec->bv_page->tsk_dirty; + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_REMAP, bio->bi_error, - sizeof(r), &r); + sizeof(r), &r, tsk); } /** @@ -937,6 +980,7 @@ static void blk_add_trace_rq_remap(void *ignore, { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; + struct task_struct *tsk = current; if (likely(!bt)) return; @@ -945,9 +989,14 @@ static void blk_add_trace_rq_remap(void *ignore, r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); r.sector_from = cpu_to_be64(from); + if (bio_has_data(rq->bio) && rq->bio->bi_io_vec && + rq->bio->bi_io_vec->bv_page && + rq->bio->bi_io_vec->bv_page->tsk_dirty) + tsk = rq->bio->bi_io_vec->bv_page->tsk_dirty; + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, - sizeof(r), &r); + sizeof(r), &r, tsk); } /** @@ -966,16 +1015,22 @@ void blk_add_driver_data(struct request_queue *q, void *data, size_t len) { struct blk_trace *bt = q->blk_trace; + struct task_struct *tsk = current; if (likely(!bt)) return; + if (bio_has_data(rq->bio) && rq->bio->bi_io_vec && + rq->bio->bi_io_vec->bv_page && + rq->bio->bi_io_vec->bv_page->tsk_dirty) + tsk = rq->bio->bi_io_vec->bv_page->tsk_dirty; + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, - BLK_TA_DRV_DATA, rq->errors, len, data); + BLK_TA_DRV_DATA, rq->errors, len, data, tsk); else __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, - BLK_TA_DRV_DATA, rq->errors, len, data); + BLK_TA_DRV_DATA, rq->errors, len, data, tsk); } EXPORT_SYMBOL_GPL(blk_add_driver_data);