Android: MMC/UFS IO Latency Histograms.

This patch adds a new sysfs node (latency_hist) and reports
IO (svc time) latency histograms. Disabled by default, can be
enabled by echoing 0 into latency_hist, stats can be cleared
by writing 2 into latency_hist. This commit fixes the 32 bit
build breakage in the previous commit. Tested on both 32 bit
and 64 bit arm devices.

Bug: 30677035
Change-Id: I9a615a16616d80f87e75676ac4d078a5c429dcf9
Signed-off-by: Mohan Srinivasan <srmohan@google.com>
This commit is contained in:
Mohan Srinivasan 2016-08-25 18:31:01 -07:00 committed by Amit Pundir
parent 10bd621d38
commit 8e4b2f84a8
9 changed files with 322 additions and 3 deletions

View file

@ -40,6 +40,8 @@
#include "blk.h"
#include "blk-mq.h"
#include <linux/math64.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@ -3539,3 +3541,83 @@ int __init blk_dev_init(void)
return 0;
}
/*
* Blk IO latency support. We want this to be as cheap as possible, so doing
* this lockless (and avoiding atomics), a few off by a few errors in this
* code is not harmful, and we don't want to do anything that is
* perf-impactful.
* TODO : If necessary, we can make the histograms per-cpu and aggregate
* them when printing them out.
*/
void
blk_zero_latency_hist(struct io_latency_state *s)
{
memset(s->latency_y_axis_read, 0,
sizeof(s->latency_y_axis_read));
memset(s->latency_y_axis_write, 0,
sizeof(s->latency_y_axis_write));
s->latency_reads_elems = 0;
s->latency_writes_elems = 0;
}
ssize_t
blk_latency_hist_show(struct io_latency_state *s, char *buf)
{
int i;
int bytes_written = 0;
u_int64_t num_elem, elem;
int pct;
num_elem = s->latency_reads_elems;
if (num_elem > 0) {
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"IO svc_time Read Latency Histogram (n = %llu):\n",
num_elem);
for (i = 0;
i < ARRAY_SIZE(latency_x_axis_us);
i++) {
elem = s->latency_y_axis_read[i];
pct = div64_u64(elem * 100, num_elem);
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"\t< %5lluus%15llu%15d%%\n",
latency_x_axis_us[i],
elem, pct);
}
/* Last element in y-axis table is overflow */
elem = s->latency_y_axis_read[i];
pct = div64_u64(elem * 100, num_elem);
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"\t> %5dms%15llu%15d%%\n", 10,
elem, pct);
}
num_elem = s->latency_writes_elems;
if (num_elem > 0) {
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"IO svc_time Write Latency Histogram (n = %llu):\n",
num_elem);
for (i = 0;
i < ARRAY_SIZE(latency_x_axis_us);
i++) {
elem = s->latency_y_axis_write[i];
pct = div64_u64(elem * 100, num_elem);
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"\t< %5lluus%15llu%15d%%\n",
latency_x_axis_us[i],
elem, pct);
}
/* Last element in y-axis table is overflow */
elem = s->latency_y_axis_write[i];
pct = div64_u64(elem * 100, num_elem);
bytes_written += scnprintf(buf + bytes_written,
PAGE_SIZE - bytes_written,
"\t> %5dms%15llu%15d%%\n", 10,
elem, pct);
}
return bytes_written;
}

View file

@ -183,6 +183,17 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
pr_debug("%s: %d bytes transferred: %d\n",
mmc_hostname(host),
mrq->data->bytes_xfered, mrq->data->error);
if (mrq->lat_hist_enabled) {
ktime_t completion;
u_int64_t delta_us;
completion = ktime_get();
delta_us = ktime_us_delta(completion,
mrq->io_start);
blk_update_latency_hist(&host->io_lat_s,
(mrq->data->flags & MMC_DATA_READ),
delta_us);
}
trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data);
}
@ -627,6 +638,11 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
}
if (!err && areq) {
if (host->latency_hist_enabled) {
areq->mrq->io_start = ktime_get();
areq->mrq->lat_hist_enabled = 1;
} else
areq->mrq->lat_hist_enabled = 0;
trace_mmc_blk_rw_start(areq->mrq->cmd->opcode,
areq->mrq->cmd->arg,
areq->mrq->data);
@ -1964,7 +1980,7 @@ void mmc_init_erase(struct mmc_card *card)
}
static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
unsigned int arg, unsigned int qty)
unsigned int arg, unsigned int qty)
{
unsigned int erase_timeout;
@ -2907,6 +2923,54 @@ static void __exit mmc_exit(void)
destroy_workqueue(workqueue);
}
static ssize_t
latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct mmc_host *host = cls_dev_to_mmc_host(dev);
return blk_latency_hist_show(&host->io_lat_s, buf);
}
/*
* Values permitted 0, 1, 2.
* 0 -> Disable IO latency histograms (default)
* 1 -> Enable IO latency histograms
* 2 -> Zero out IO latency histograms
*/
static ssize_t
latency_hist_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct mmc_host *host = cls_dev_to_mmc_host(dev);
long value;
if (kstrtol(buf, 0, &value))
return -EINVAL;
if (value == BLK_IO_LAT_HIST_ZERO)
blk_zero_latency_hist(&host->io_lat_s);
else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
host->latency_hist_enabled = value;
return count;
}
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
latency_hist_show, latency_hist_store);
void
mmc_latency_hist_sysfs_init(struct mmc_host *host)
{
if (device_create_file(&host->class_dev, &dev_attr_latency_hist))
dev_err(&host->class_dev,
"Failed to create latency_hist sysfs entry\n");
}
void
mmc_latency_hist_sysfs_exit(struct mmc_host *host)
{
device_remove_file(&host->class_dev, &dev_attr_latency_hist);
}
subsys_initcall(mmc_init);
module_exit(mmc_exit);

View file

@ -32,8 +32,6 @@
#include "slot-gpio.h"
#include "pwrseq.h"
#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
static DEFINE_IDR(mmc_host_idr);
static DEFINE_SPINLOCK(mmc_host_lock);
@ -394,6 +392,8 @@ int mmc_add_host(struct mmc_host *host)
mmc_add_host_debugfs(host);
#endif
mmc_latency_hist_sysfs_init(host);
mmc_start_host(host);
if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
register_pm_notifier(&host->pm_notify);
@ -422,6 +422,8 @@ void mmc_remove_host(struct mmc_host *host)
mmc_remove_host_debugfs(host);
#endif
mmc_latency_hist_sysfs_exit(host);
device_del(&host->class_dev);
led_trigger_unregister_simple(host->led);

View file

@ -12,6 +12,8 @@
#define _MMC_CORE_HOST_H
#include <linux/mmc/host.h>
#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
int mmc_register_host_class(void);
void mmc_unregister_host_class(void);
@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host);
void mmc_retune_release(struct mmc_host *host);
int mmc_retune(struct mmc_host *host);
void mmc_latency_hist_sysfs_init(struct mmc_host *host);
void mmc_latency_hist_sysfs_exit(struct mmc_host *host);
#endif

View file

@ -39,6 +39,7 @@
#include <linux/async.h>
#include <linux/devfreq.h>
#include <linux/blkdev.h>
#include "ufshcd.h"
#include "unipro.h"
@ -1332,6 +1333,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
clear_bit_unlock(tag, &hba->lrb_in_use);
goto out;
}
/* IO svc time latency histogram */
if (hba != NULL && cmd->request != NULL) {
if (hba->latency_hist_enabled &&
(cmd->request->cmd_type == REQ_TYPE_FS)) {
cmd->request->lat_hist_io_start = ktime_get();
cmd->request->lat_hist_enabled = 1;
} else
cmd->request->lat_hist_enabled = 0;
}
WARN_ON(hba->clk_gating.state != CLKS_ON);
lrbp = &hba->lrb[tag];
@ -3160,6 +3172,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
u32 tr_doorbell;
int result;
int index;
struct request *req;
/* Resetting interrupt aggregation counters first and reading the
* DOOR_BELL afterward allows us to handle all the completed requests.
@ -3184,6 +3197,22 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
/* Mark completed command as NULL in LRB */
lrbp->cmd = NULL;
clear_bit_unlock(index, &hba->lrb_in_use);
req = cmd->request;
if (req) {
/* Update IO svc time latency histogram */
if (req->lat_hist_enabled) {
ktime_t completion;
u_int64_t delta_us;
completion = ktime_get();
delta_us = ktime_us_delta(completion,
req->lat_hist_io_start);
/* rq_data_dir() => true if WRITE */
blk_update_latency_hist(&hba->io_lat_s,
(rq_data_dir(req) == READ),
delta_us);
}
}
/* Do not touch lrbp after scsi done */
cmd->scsi_done(cmd);
__ufshcd_release(hba);
@ -5327,6 +5356,54 @@ out:
}
EXPORT_SYMBOL(ufshcd_shutdown);
/*
* Values permitted 0, 1, 2.
* 0 -> Disable IO latency histograms (default)
* 1 -> Enable IO latency histograms
* 2 -> Zero out IO latency histograms
*/
static ssize_t
latency_hist_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
long value;
if (kstrtol(buf, 0, &value))
return -EINVAL;
if (value == BLK_IO_LAT_HIST_ZERO)
blk_zero_latency_hist(&hba->io_lat_s);
else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
hba->latency_hist_enabled = value;
return count;
}
ssize_t
latency_hist_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
return blk_latency_hist_show(&hba->io_lat_s, buf);
}
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
latency_hist_show, latency_hist_store);
static void
ufshcd_init_latency_hist(struct ufs_hba *hba)
{
if (device_create_file(hba->dev, &dev_attr_latency_hist))
dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n");
}
static void
ufshcd_exit_latency_hist(struct ufs_hba *hba)
{
device_create_file(hba->dev, &dev_attr_latency_hist);
}
/**
* ufshcd_remove - de-allocate SCSI host and host memory space
* data structure memory
@ -5342,6 +5419,7 @@ void ufshcd_remove(struct ufs_hba *hba)
scsi_host_put(hba->host);
ufshcd_exit_clk_gating(hba);
ufshcd_exit_latency_hist(hba);
if (ufshcd_is_clkscaling_enabled(hba))
devfreq_remove_device(hba->devfreq);
ufshcd_hba_exit(hba);
@ -5639,6 +5717,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
/* Hold auto suspend until async scan completes */
pm_runtime_get_sync(dev);
ufshcd_init_latency_hist(hba);
/*
* The device-initialize-sequence hasn't been invoked yet.
* Set the device to power-off state
@ -5653,6 +5733,7 @@ out_remove_scsi_host:
scsi_remove_host(hba->host);
exit_gating:
ufshcd_exit_clk_gating(hba);
ufshcd_exit_latency_hist(hba);
out_disable:
hba->is_irq_enabled = false;
scsi_host_put(host);

View file

@ -532,6 +532,9 @@ struct ufs_hba {
struct devfreq *devfreq;
struct ufs_clk_scaling clk_scaling;
bool is_sys_suspended;
int latency_hist_enabled;
struct io_latency_state io_lat_s;
};
/* Returns true if clocks can be gated. Otherwise false */

View file

@ -197,6 +197,9 @@ struct request {
/* for bidi */
struct request *next_rq;
ktime_t lat_hist_io_start;
int lat_hist_enabled;
};
static inline unsigned short req_get_ioprio(struct request *req)
@ -1656,6 +1659,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
extern long bdev_direct_access(struct block_device *, sector_t,
void __pmem **addr, unsigned long *pfn, long size);
/*
* X-axis for IO latency histogram support.
*/
static const u_int64_t latency_x_axis_us[] = {
100,
200,
300,
400,
500,
600,
700,
800,
900,
1000,
1200,
1400,
1600,
1800,
2000,
2500,
3000,
4000,
5000,
6000,
7000,
9000,
10000
};
#define BLK_IO_LAT_HIST_DISABLE 0
#define BLK_IO_LAT_HIST_ENABLE 1
#define BLK_IO_LAT_HIST_ZERO 2
struct io_latency_state {
u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
u_int64_t latency_reads_elems;
u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
u_int64_t latency_writes_elems;
};
static inline void
blk_update_latency_hist(struct io_latency_state *s,
int read,
u_int64_t delta_us)
{
int i;
for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
if (read)
s->latency_y_axis_read[i]++;
else
s->latency_y_axis_write[i]++;
break;
}
}
if (i == ARRAY_SIZE(latency_x_axis_us)) {
/* Overflowed the histogram */
if (read)
s->latency_y_axis_read[i]++;
else
s->latency_y_axis_write[i]++;
}
if (read)
s->latency_reads_elems++;
else
s->latency_writes_elems++;
}
void blk_zero_latency_hist(struct io_latency_state *s);
ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
#else /* CONFIG_BLOCK */
struct block_device;

View file

@ -136,6 +136,8 @@ struct mmc_request {
struct completion completion;
void (*done)(struct mmc_request *);/* completion function */
struct mmc_host *host;
ktime_t io_start;
int lat_hist_enabled;
};
struct mmc_card;

View file

@ -16,6 +16,7 @@
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/fault-inject.h>
#include <linux/blkdev.h>
#include <linux/mmc/core.h>
#include <linux/mmc/card.h>
@ -379,6 +380,9 @@ struct mmc_host {
} embedded_sdio_data;
#endif
int latency_hist_enabled;
struct io_latency_state io_lat_s;
unsigned long private[0] ____cacheline_aligned;
};