Android: MMC/UFS IO Latency Histograms.
This patch adds a new sysfs node (latency_hist) and reports IO (svc time) latency histograms. Disabled by default, can be enabled by echoing 0 into latency_hist, stats can be cleared by writing 2 into latency_hist. This commit fixes the 32 bit build breakage in the previous commit. Tested on both 32 bit and 64 bit arm devices. Bug: 30677035 Change-Id: I9a615a16616d80f87e75676ac4d078a5c429dcf9 Signed-off-by: Mohan Srinivasan <srmohan@google.com>
This commit is contained in:
parent
10bd621d38
commit
8e4b2f84a8
9 changed files with 322 additions and 3 deletions
|
@ -40,6 +40,8 @@
|
|||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
|
||||
#include <linux/math64.h>
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
|
||||
|
@ -3539,3 +3541,83 @@ int __init blk_dev_init(void)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Blk IO latency support. We want this to be as cheap as possible, so doing
|
||||
* this lockless (and avoiding atomics), a few off by a few errors in this
|
||||
* code is not harmful, and we don't want to do anything that is
|
||||
* perf-impactful.
|
||||
* TODO : If necessary, we can make the histograms per-cpu and aggregate
|
||||
* them when printing them out.
|
||||
*/
|
||||
void
|
||||
blk_zero_latency_hist(struct io_latency_state *s)
|
||||
{
|
||||
memset(s->latency_y_axis_read, 0,
|
||||
sizeof(s->latency_y_axis_read));
|
||||
memset(s->latency_y_axis_write, 0,
|
||||
sizeof(s->latency_y_axis_write));
|
||||
s->latency_reads_elems = 0;
|
||||
s->latency_writes_elems = 0;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
blk_latency_hist_show(struct io_latency_state *s, char *buf)
|
||||
{
|
||||
int i;
|
||||
int bytes_written = 0;
|
||||
u_int64_t num_elem, elem;
|
||||
int pct;
|
||||
|
||||
num_elem = s->latency_reads_elems;
|
||||
if (num_elem > 0) {
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"IO svc_time Read Latency Histogram (n = %llu):\n",
|
||||
num_elem);
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(latency_x_axis_us);
|
||||
i++) {
|
||||
elem = s->latency_y_axis_read[i];
|
||||
pct = div64_u64(elem * 100, num_elem);
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"\t< %5lluus%15llu%15d%%\n",
|
||||
latency_x_axis_us[i],
|
||||
elem, pct);
|
||||
}
|
||||
/* Last element in y-axis table is overflow */
|
||||
elem = s->latency_y_axis_read[i];
|
||||
pct = div64_u64(elem * 100, num_elem);
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"\t> %5dms%15llu%15d%%\n", 10,
|
||||
elem, pct);
|
||||
}
|
||||
num_elem = s->latency_writes_elems;
|
||||
if (num_elem > 0) {
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"IO svc_time Write Latency Histogram (n = %llu):\n",
|
||||
num_elem);
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(latency_x_axis_us);
|
||||
i++) {
|
||||
elem = s->latency_y_axis_write[i];
|
||||
pct = div64_u64(elem * 100, num_elem);
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"\t< %5lluus%15llu%15d%%\n",
|
||||
latency_x_axis_us[i],
|
||||
elem, pct);
|
||||
}
|
||||
/* Last element in y-axis table is overflow */
|
||||
elem = s->latency_y_axis_write[i];
|
||||
pct = div64_u64(elem * 100, num_elem);
|
||||
bytes_written += scnprintf(buf + bytes_written,
|
||||
PAGE_SIZE - bytes_written,
|
||||
"\t> %5dms%15llu%15d%%\n", 10,
|
||||
elem, pct);
|
||||
}
|
||||
return bytes_written;
|
||||
}
|
||||
|
|
|
@ -183,6 +183,17 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
|
|||
pr_debug("%s: %d bytes transferred: %d\n",
|
||||
mmc_hostname(host),
|
||||
mrq->data->bytes_xfered, mrq->data->error);
|
||||
if (mrq->lat_hist_enabled) {
|
||||
ktime_t completion;
|
||||
u_int64_t delta_us;
|
||||
|
||||
completion = ktime_get();
|
||||
delta_us = ktime_us_delta(completion,
|
||||
mrq->io_start);
|
||||
blk_update_latency_hist(&host->io_lat_s,
|
||||
(mrq->data->flags & MMC_DATA_READ),
|
||||
delta_us);
|
||||
}
|
||||
trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data);
|
||||
}
|
||||
|
||||
|
@ -627,6 +638,11 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
|
|||
}
|
||||
|
||||
if (!err && areq) {
|
||||
if (host->latency_hist_enabled) {
|
||||
areq->mrq->io_start = ktime_get();
|
||||
areq->mrq->lat_hist_enabled = 1;
|
||||
} else
|
||||
areq->mrq->lat_hist_enabled = 0;
|
||||
trace_mmc_blk_rw_start(areq->mrq->cmd->opcode,
|
||||
areq->mrq->cmd->arg,
|
||||
areq->mrq->data);
|
||||
|
@ -1964,7 +1980,7 @@ void mmc_init_erase(struct mmc_card *card)
|
|||
}
|
||||
|
||||
static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
|
||||
unsigned int arg, unsigned int qty)
|
||||
unsigned int arg, unsigned int qty)
|
||||
{
|
||||
unsigned int erase_timeout;
|
||||
|
||||
|
@ -2907,6 +2923,54 @@ static void __exit mmc_exit(void)
|
|||
destroy_workqueue(workqueue);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct mmc_host *host = cls_dev_to_mmc_host(dev);
|
||||
|
||||
return blk_latency_hist_show(&host->io_lat_s, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Values permitted 0, 1, 2.
|
||||
* 0 -> Disable IO latency histograms (default)
|
||||
* 1 -> Enable IO latency histograms
|
||||
* 2 -> Zero out IO latency histograms
|
||||
*/
|
||||
static ssize_t
|
||||
latency_hist_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct mmc_host *host = cls_dev_to_mmc_host(dev);
|
||||
long value;
|
||||
|
||||
if (kstrtol(buf, 0, &value))
|
||||
return -EINVAL;
|
||||
if (value == BLK_IO_LAT_HIST_ZERO)
|
||||
blk_zero_latency_hist(&host->io_lat_s);
|
||||
else if (value == BLK_IO_LAT_HIST_ENABLE ||
|
||||
value == BLK_IO_LAT_HIST_DISABLE)
|
||||
host->latency_hist_enabled = value;
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
|
||||
latency_hist_show, latency_hist_store);
|
||||
|
||||
void
|
||||
mmc_latency_hist_sysfs_init(struct mmc_host *host)
|
||||
{
|
||||
if (device_create_file(&host->class_dev, &dev_attr_latency_hist))
|
||||
dev_err(&host->class_dev,
|
||||
"Failed to create latency_hist sysfs entry\n");
|
||||
}
|
||||
|
||||
void
|
||||
mmc_latency_hist_sysfs_exit(struct mmc_host *host)
|
||||
{
|
||||
device_remove_file(&host->class_dev, &dev_attr_latency_hist);
|
||||
}
|
||||
|
||||
subsys_initcall(mmc_init);
|
||||
module_exit(mmc_exit);
|
||||
|
||||
|
|
|
@ -32,8 +32,6 @@
|
|||
#include "slot-gpio.h"
|
||||
#include "pwrseq.h"
|
||||
|
||||
#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
|
||||
|
||||
static DEFINE_IDR(mmc_host_idr);
|
||||
static DEFINE_SPINLOCK(mmc_host_lock);
|
||||
|
||||
|
@ -394,6 +392,8 @@ int mmc_add_host(struct mmc_host *host)
|
|||
mmc_add_host_debugfs(host);
|
||||
#endif
|
||||
|
||||
mmc_latency_hist_sysfs_init(host);
|
||||
|
||||
mmc_start_host(host);
|
||||
if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
|
||||
register_pm_notifier(&host->pm_notify);
|
||||
|
@ -422,6 +422,8 @@ void mmc_remove_host(struct mmc_host *host)
|
|||
mmc_remove_host_debugfs(host);
|
||||
#endif
|
||||
|
||||
mmc_latency_hist_sysfs_exit(host);
|
||||
|
||||
device_del(&host->class_dev);
|
||||
|
||||
led_trigger_unregister_simple(host->led);
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#define _MMC_CORE_HOST_H
|
||||
#include <linux/mmc/host.h>
|
||||
|
||||
#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
|
||||
|
||||
int mmc_register_host_class(void);
|
||||
void mmc_unregister_host_class(void);
|
||||
|
||||
|
@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host);
|
|||
void mmc_retune_release(struct mmc_host *host);
|
||||
int mmc_retune(struct mmc_host *host);
|
||||
|
||||
void mmc_latency_hist_sysfs_init(struct mmc_host *host);
|
||||
void mmc_latency_hist_sysfs_exit(struct mmc_host *host);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
|
||||
#include <linux/async.h>
|
||||
#include <linux/devfreq.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include "ufshcd.h"
|
||||
#include "unipro.h"
|
||||
|
@ -1332,6 +1333,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
|
|||
clear_bit_unlock(tag, &hba->lrb_in_use);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* IO svc time latency histogram */
|
||||
if (hba != NULL && cmd->request != NULL) {
|
||||
if (hba->latency_hist_enabled &&
|
||||
(cmd->request->cmd_type == REQ_TYPE_FS)) {
|
||||
cmd->request->lat_hist_io_start = ktime_get();
|
||||
cmd->request->lat_hist_enabled = 1;
|
||||
} else
|
||||
cmd->request->lat_hist_enabled = 0;
|
||||
}
|
||||
|
||||
WARN_ON(hba->clk_gating.state != CLKS_ON);
|
||||
|
||||
lrbp = &hba->lrb[tag];
|
||||
|
@ -3160,6 +3172,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
|
|||
u32 tr_doorbell;
|
||||
int result;
|
||||
int index;
|
||||
struct request *req;
|
||||
|
||||
/* Resetting interrupt aggregation counters first and reading the
|
||||
* DOOR_BELL afterward allows us to handle all the completed requests.
|
||||
|
@ -3184,6 +3197,22 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
|
|||
/* Mark completed command as NULL in LRB */
|
||||
lrbp->cmd = NULL;
|
||||
clear_bit_unlock(index, &hba->lrb_in_use);
|
||||
req = cmd->request;
|
||||
if (req) {
|
||||
/* Update IO svc time latency histogram */
|
||||
if (req->lat_hist_enabled) {
|
||||
ktime_t completion;
|
||||
u_int64_t delta_us;
|
||||
|
||||
completion = ktime_get();
|
||||
delta_us = ktime_us_delta(completion,
|
||||
req->lat_hist_io_start);
|
||||
/* rq_data_dir() => true if WRITE */
|
||||
blk_update_latency_hist(&hba->io_lat_s,
|
||||
(rq_data_dir(req) == READ),
|
||||
delta_us);
|
||||
}
|
||||
}
|
||||
/* Do not touch lrbp after scsi done */
|
||||
cmd->scsi_done(cmd);
|
||||
__ufshcd_release(hba);
|
||||
|
@ -5327,6 +5356,54 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL(ufshcd_shutdown);
|
||||
|
||||
/*
|
||||
* Values permitted 0, 1, 2.
|
||||
* 0 -> Disable IO latency histograms (default)
|
||||
* 1 -> Enable IO latency histograms
|
||||
* 2 -> Zero out IO latency histograms
|
||||
*/
|
||||
static ssize_t
|
||||
latency_hist_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct ufs_hba *hba = dev_get_drvdata(dev);
|
||||
long value;
|
||||
|
||||
if (kstrtol(buf, 0, &value))
|
||||
return -EINVAL;
|
||||
if (value == BLK_IO_LAT_HIST_ZERO)
|
||||
blk_zero_latency_hist(&hba->io_lat_s);
|
||||
else if (value == BLK_IO_LAT_HIST_ENABLE ||
|
||||
value == BLK_IO_LAT_HIST_DISABLE)
|
||||
hba->latency_hist_enabled = value;
|
||||
return count;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
latency_hist_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct ufs_hba *hba = dev_get_drvdata(dev);
|
||||
|
||||
return blk_latency_hist_show(&hba->io_lat_s, buf);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
|
||||
latency_hist_show, latency_hist_store);
|
||||
|
||||
static void
|
||||
ufshcd_init_latency_hist(struct ufs_hba *hba)
|
||||
{
|
||||
if (device_create_file(hba->dev, &dev_attr_latency_hist))
|
||||
dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n");
|
||||
}
|
||||
|
||||
static void
|
||||
ufshcd_exit_latency_hist(struct ufs_hba *hba)
|
||||
{
|
||||
device_create_file(hba->dev, &dev_attr_latency_hist);
|
||||
}
|
||||
|
||||
/**
|
||||
* ufshcd_remove - de-allocate SCSI host and host memory space
|
||||
* data structure memory
|
||||
|
@ -5342,6 +5419,7 @@ void ufshcd_remove(struct ufs_hba *hba)
|
|||
scsi_host_put(hba->host);
|
||||
|
||||
ufshcd_exit_clk_gating(hba);
|
||||
ufshcd_exit_latency_hist(hba);
|
||||
if (ufshcd_is_clkscaling_enabled(hba))
|
||||
devfreq_remove_device(hba->devfreq);
|
||||
ufshcd_hba_exit(hba);
|
||||
|
@ -5639,6 +5717,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
|
|||
/* Hold auto suspend until async scan completes */
|
||||
pm_runtime_get_sync(dev);
|
||||
|
||||
ufshcd_init_latency_hist(hba);
|
||||
|
||||
/*
|
||||
* The device-initialize-sequence hasn't been invoked yet.
|
||||
* Set the device to power-off state
|
||||
|
@ -5653,6 +5733,7 @@ out_remove_scsi_host:
|
|||
scsi_remove_host(hba->host);
|
||||
exit_gating:
|
||||
ufshcd_exit_clk_gating(hba);
|
||||
ufshcd_exit_latency_hist(hba);
|
||||
out_disable:
|
||||
hba->is_irq_enabled = false;
|
||||
scsi_host_put(host);
|
||||
|
|
|
@ -532,6 +532,9 @@ struct ufs_hba {
|
|||
struct devfreq *devfreq;
|
||||
struct ufs_clk_scaling clk_scaling;
|
||||
bool is_sys_suspended;
|
||||
|
||||
int latency_hist_enabled;
|
||||
struct io_latency_state io_lat_s;
|
||||
};
|
||||
|
||||
/* Returns true if clocks can be gated. Otherwise false */
|
||||
|
|
|
@ -197,6 +197,9 @@ struct request {
|
|||
|
||||
/* for bidi */
|
||||
struct request *next_rq;
|
||||
|
||||
ktime_t lat_hist_io_start;
|
||||
int lat_hist_enabled;
|
||||
};
|
||||
|
||||
static inline unsigned short req_get_ioprio(struct request *req)
|
||||
|
@ -1656,6 +1659,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *,
|
|||
struct writeback_control *);
|
||||
extern long bdev_direct_access(struct block_device *, sector_t,
|
||||
void __pmem **addr, unsigned long *pfn, long size);
|
||||
|
||||
/*
|
||||
* X-axis for IO latency histogram support.
|
||||
*/
|
||||
static const u_int64_t latency_x_axis_us[] = {
|
||||
100,
|
||||
200,
|
||||
300,
|
||||
400,
|
||||
500,
|
||||
600,
|
||||
700,
|
||||
800,
|
||||
900,
|
||||
1000,
|
||||
1200,
|
||||
1400,
|
||||
1600,
|
||||
1800,
|
||||
2000,
|
||||
2500,
|
||||
3000,
|
||||
4000,
|
||||
5000,
|
||||
6000,
|
||||
7000,
|
||||
9000,
|
||||
10000
|
||||
};
|
||||
|
||||
#define BLK_IO_LAT_HIST_DISABLE 0
|
||||
#define BLK_IO_LAT_HIST_ENABLE 1
|
||||
#define BLK_IO_LAT_HIST_ZERO 2
|
||||
|
||||
struct io_latency_state {
|
||||
u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
|
||||
u_int64_t latency_reads_elems;
|
||||
u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
|
||||
u_int64_t latency_writes_elems;
|
||||
};
|
||||
|
||||
static inline void
|
||||
blk_update_latency_hist(struct io_latency_state *s,
|
||||
int read,
|
||||
u_int64_t delta_us)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
|
||||
if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
|
||||
if (read)
|
||||
s->latency_y_axis_read[i]++;
|
||||
else
|
||||
s->latency_y_axis_write[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == ARRAY_SIZE(latency_x_axis_us)) {
|
||||
/* Overflowed the histogram */
|
||||
if (read)
|
||||
s->latency_y_axis_read[i]++;
|
||||
else
|
||||
s->latency_y_axis_write[i]++;
|
||||
}
|
||||
if (read)
|
||||
s->latency_reads_elems++;
|
||||
else
|
||||
s->latency_writes_elems++;
|
||||
}
|
||||
|
||||
void blk_zero_latency_hist(struct io_latency_state *s);
|
||||
ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
|
||||
|
||||
#else /* CONFIG_BLOCK */
|
||||
|
||||
struct block_device;
|
||||
|
|
|
@ -136,6 +136,8 @@ struct mmc_request {
|
|||
struct completion completion;
|
||||
void (*done)(struct mmc_request *);/* completion function */
|
||||
struct mmc_host *host;
|
||||
ktime_t io_start;
|
||||
int lat_hist_enabled;
|
||||
};
|
||||
|
||||
struct mmc_card;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include <linux/mmc/core.h>
|
||||
#include <linux/mmc/card.h>
|
||||
|
@ -379,6 +380,9 @@ struct mmc_host {
|
|||
} embedded_sdio_data;
|
||||
#endif
|
||||
|
||||
int latency_hist_enabled;
|
||||
struct io_latency_state io_lat_s;
|
||||
|
||||
unsigned long private[0] ____cacheline_aligned;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue