Squashfs: replace buffer_head with BIO

The 'll_rw_block' has been deprecated and BIO is now the basic container
for block I/O within the kernel.

Switching to BIO offers 2 advantages:
  1/ It removes synchronous wait for the up-to-date buffers: SquashFS
     now deals with decompressions/copies asynchronously.
     Implementing an asynchronous mechanism to read data is needed to
     efficiently implement .readpages().
  2/ Prior to this patch, merging the read requests entirely depends on
     the IO scheduler. SquashFS has more information than the IO
     scheduler about what could be merged. Moreover, merging the reads
     at the FS level means that we rely less on the IO scheduler.

Signed-off-by: Adrien Schildknecht <adriens@google.com>
Change-Id: I775d2e11f017476e1899518ab52d9d0a8a0bce28
This commit is contained in:
Adrien Schildknecht 2016-11-07 12:37:55 -08:00 committed by Daniel Rosenberg
parent 417aca479b
commit c9994560db
4 changed files with 464 additions and 280 deletions

View file

@ -28,9 +28,12 @@
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@ -38,45 +41,355 @@
#include "decompressor.h"
#include "page_actor.h"
/*
* Read the metadata block length, this is stored in the first two
* bytes of the metadata block.
*/
static struct buffer_head *get_block_length(struct super_block *sb,
u64 *cur_index, int *offset, int *length)
static struct workqueue_struct *squashfs_read_wq;
struct squashfs_read_request {
struct super_block *sb;
u64 index;
int length;
int compressed;
int offset;
u64 read_end;
struct squashfs_page_actor *output;
enum {
SQUASHFS_COPY,
SQUASHFS_DECOMPRESS,
SQUASHFS_METADATA,
} data_processing;
bool synchronous;
/*
* If the read is synchronous, it is possible to retrieve information
* about the request by setting these pointers.
*/
int *res;
int *bytes_read;
int *bytes_uncompressed;
int nr_buffers;
struct buffer_head **bh;
struct work_struct offload;
};
struct squashfs_bio_request {
struct buffer_head **bh;
int nr_buffers;
};
static int squashfs_bio_submit(struct squashfs_read_request *req);
int squashfs_init_read_wq(void)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head *bh;
squashfs_read_wq = create_workqueue("SquashFS read wq");
return !!squashfs_read_wq;
}
bh = sb_bread(sb, *cur_index);
if (bh == NULL)
return NULL;
void squashfs_destroy_read_wq(void)
{
flush_workqueue(squashfs_read_wq);
destroy_workqueue(squashfs_read_wq);
}
if (msblk->devblksize - *offset == 1) {
*length = (unsigned char) bh->b_data[*offset];
put_bh(bh);
bh = sb_bread(sb, ++(*cur_index));
if (bh == NULL)
return NULL;
*length |= (unsigned char) bh->b_data[0] << 8;
*offset = 1;
} else {
*length = (unsigned char) bh->b_data[*offset] |
(unsigned char) bh->b_data[*offset + 1] << 8;
*offset += 2;
static void free_read_request(struct squashfs_read_request *req, int error)
{
if (!req->synchronous)
squashfs_page_actor_free(req->output, error);
if (req->res)
*(req->res) = error;
kfree(req->bh);
kfree(req);
}
if (*offset == msblk->devblksize) {
put_bh(bh);
bh = sb_bread(sb, ++(*cur_index));
if (bh == NULL)
return NULL;
*offset = 0;
static void squashfs_process_blocks(struct squashfs_read_request *req)
{
int error = 0;
int bytes, i, length;
struct squashfs_sb_info *msblk = req->sb->s_fs_info;
struct squashfs_page_actor *actor = req->output;
struct buffer_head **bh = req->bh;
int nr_buffers = req->nr_buffers;
for (i = 0; i < nr_buffers; ++i) {
if (!bh[i])
continue;
wait_on_buffer(bh[i]);
if (!buffer_uptodate(bh[i]))
error = -EIO;
}
if (error)
goto cleanup;
if (req->data_processing == SQUASHFS_METADATA) {
/* Extract the length of the metadata block */
if (req->offset != msblk->devblksize - 1)
length = *((u16 *)(bh[0]->b_data + req->offset));
else {
length = bh[0]->b_data[req->offset];
length |= bh[1]->b_data[0] << 8;
}
req->compressed = SQUASHFS_COMPRESSED(length);
req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
: SQUASHFS_COPY;
length = SQUASHFS_COMPRESSED_SIZE(length);
if (req->index + length + 2 > req->read_end) {
for (i = 0; i < nr_buffers; ++i)
put_bh(bh[i]);
kfree(bh);
req->length = length;
req->index += 2;
squashfs_bio_submit(req);
return;
}
req->length = length;
req->offset = (req->offset + 2) % PAGE_SIZE;
if (req->offset < 2) {
put_bh(bh[0]);
++bh;
--nr_buffers;
}
}
if (req->bytes_read)
*(req->bytes_read) = req->length;
if (req->data_processing == SQUASHFS_COPY) {
squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
req->length, msblk->devblksize);
} else if (req->data_processing == SQUASHFS_DECOMPRESS) {
req->length = squashfs_decompress(msblk, bh, nr_buffers,
req->offset, req->length, actor);
if (req->length < 0) {
error = -EIO;
goto cleanup;
}
}
return bh;
/* Last page may have trailing bytes not filled */
bytes = req->length % PAGE_SIZE;
if (bytes && actor->page[actor->pages - 1])
zero_user_segment(actor->page[actor->pages - 1], bytes,
PAGE_SIZE);
cleanup:
if (req->bytes_uncompressed)
*(req->bytes_uncompressed) = req->length;
if (error) {
for (i = 0; i < nr_buffers; ++i)
if (bh[i])
put_bh(bh[i]);
}
free_read_request(req, error);
}
static void read_wq_handler(struct work_struct *work)
{
squashfs_process_blocks(container_of(work,
struct squashfs_read_request, offload));
}
static void squashfs_bio_end_io(struct bio *bio)
{
int i;
int error = bio->bi_error;
struct squashfs_bio_request *bio_req = bio->bi_private;
bio_put(bio);
for (i = 0; i < bio_req->nr_buffers; ++i) {
if (!bio_req->bh[i])
continue;
if (!error)
set_buffer_uptodate(bio_req->bh[i]);
else
clear_buffer_uptodate(bio_req->bh[i]);
unlock_buffer(bio_req->bh[i]);
}
kfree(bio_req);
}
static int actor_getblks(struct squashfs_read_request *req, u64 block)
{
int i;
req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
if (!req->bh)
return -ENOMEM;
for (i = 0; i < req->nr_buffers; ++i) {
req->bh[i] = sb_getblk(req->sb, block + i);
if (!req->bh[i]) {
while (--i) {
if (req->bh[i])
put_bh(req->bh[i]);
}
return -1;
}
}
return 0;
}
static int squashfs_bio_submit(struct squashfs_read_request *req)
{
struct bio *bio = NULL;
struct buffer_head *bh;
struct squashfs_bio_request *bio_req = NULL;
int b = 0, prev_block = 0;
struct squashfs_sb_info *msblk = req->sb->s_fs_info;
u64 read_start = round_down(req->index, msblk->devblksize);
u64 read_end = round_up(req->index + req->length, msblk->devblksize);
sector_t block = read_start >> msblk->devblksize_log2;
sector_t block_end = read_end >> msblk->devblksize_log2;
int offset = read_start - round_down(req->index, PAGE_SIZE);
int nr_buffers = block_end - block;
int blksz = msblk->devblksize;
int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
: nr_buffers;
/* Setup the request */
req->read_end = read_end;
req->offset = req->index - read_start;
req->nr_buffers = nr_buffers;
if (actor_getblks(req, block) < 0)
goto getblk_failed;
/* Create and submit the BIOs */
for (b = 0; b < nr_buffers; ++b, offset += blksz) {
bh = req->bh[b];
if (!bh || !trylock_buffer(bh))
continue;
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
continue;
}
offset %= PAGE_SIZE;
/* Append the buffer to the current BIO if it is contiguous */
if (bio && bio_req && prev_block + 1 == b) {
if (bio_add_page(bio, bh->b_page, blksz, offset)) {
bio_req->nr_buffers += 1;
prev_block = b;
continue;
}
}
/* Otherwise, submit the current BIO and create a new one */
if (bio)
submit_bio(READ, bio);
bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
GFP_NOIO);
if (!bio_req)
goto req_alloc_failed;
bio_req->bh = &req->bh[b];
bio = bio_alloc(GFP_NOIO, bio_max_pages);
if (!bio)
goto bio_alloc_failed;
bio->bi_bdev = req->sb->s_bdev;
bio->bi_iter.bi_sector = (block + b)
<< (msblk->devblksize_log2 - 9);
bio->bi_private = bio_req;
bio->bi_end_io = squashfs_bio_end_io;
bio_add_page(bio, bh->b_page, blksz, offset);
bio_req->nr_buffers += 1;
prev_block = b;
}
if (bio)
submit_bio(READ, bio);
if (req->synchronous)
squashfs_process_blocks(req);
else {
INIT_WORK(&req->offload, read_wq_handler);
schedule_work(&req->offload);
}
return 0;
bio_alloc_failed:
kfree(bio_req);
req_alloc_failed:
unlock_buffer(bh);
while (--nr_buffers >= b)
if (req->bh[nr_buffers])
put_bh(req->bh[nr_buffers]);
while (--b >= 0)
if (req->bh[b])
wait_on_buffer(req->bh[b]);
getblk_failed:
free_read_request(req, -ENOMEM);
return -ENOMEM;
}
static int read_metadata_block(struct squashfs_read_request *req,
u64 *next_index)
{
int ret, error, bytes_read = 0, bytes_uncompressed = 0;
struct squashfs_sb_info *msblk = req->sb->s_fs_info;
if (req->index + 2 > msblk->bytes_used) {
free_read_request(req, -EINVAL);
return -EINVAL;
}
req->length = 2;
/* Do not read beyond the end of the device */
if (req->index + req->length > msblk->bytes_used)
req->length = msblk->bytes_used - req->index;
req->data_processing = SQUASHFS_METADATA;
/*
* Reading metadata is always synchronous because we don't know the
* length in advance and the function is expected to update
* 'next_index' and return the length.
*/
req->synchronous = true;
req->res = &error;
req->bytes_read = &bytes_read;
req->bytes_uncompressed = &bytes_uncompressed;
TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
req->index, req->compressed ? "" : "un", bytes_read,
req->output->length);
ret = squashfs_bio_submit(req);
if (ret)
return ret;
if (error)
return error;
if (next_index)
*next_index += 2 + bytes_read;
return bytes_uncompressed;
}
static int read_data_block(struct squashfs_read_request *req, int length,
u64 *next_index, bool synchronous)
{
int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
: SQUASHFS_COPY;
req->synchronous = synchronous;
if (synchronous) {
req->res = &error;
req->bytes_read = &bytes_read;
req->bytes_uncompressed = &bytes_uncompressed;
}
TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
req->index, req->compressed ? "" : "un", req->length,
req->output->length);
ret = squashfs_bio_submit(req);
if (ret)
return ret;
if (synchronous)
ret = error ? error : bytes_uncompressed;
if (next_index)
*next_index += length;
return ret;
}
/*
* Read and decompress a metadata block or datablock. Length is non-zero
@ -87,128 +400,50 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
int squashfs_read_data(struct super_block *sb, u64 index, int length,
u64 *next_index, struct squashfs_page_actor *output)
static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
u64 *next_index, struct squashfs_page_actor *output, bool sync)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
int offset = index & ((1 << msblk->devblksize_log2) - 1);
u64 cur_index = index >> msblk->devblksize_log2;
int bytes, compressed, b = 0, k = 0, avail, i;
struct squashfs_read_request *req;
bh = kcalloc(((output->length + msblk->devblksize - 1)
>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
if (bh == NULL)
req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
if (!req) {
if (!sync)
squashfs_page_actor_free(output, -ENOMEM);
return -ENOMEM;
if (length) {
/*
* Datablock.
*/
bytes = -offset;
compressed = SQUASHFS_COMPRESSED_BLOCK(length);
length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
if (next_index)
*next_index = index + length;
TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
index, compressed ? "" : "un", length, output->length);
if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto read_failure;
for (b = 0; bytes < length; b++, cur_index++) {
bh[b] = sb_getblk(sb, cur_index);
if (bh[b] == NULL)
goto block_release;
bytes += msblk->devblksize;
}
ll_rw_block(READ, b, bh);
} else {
/*
* Metadata block.
*/
if ((index + 2) > msblk->bytes_used)
goto read_failure;
bh[0] = get_block_length(sb, &cur_index, &offset, &length);
if (bh[0] == NULL)
goto read_failure;
b = 1;
bytes = msblk->devblksize - offset;
compressed = SQUASHFS_COMPRESSED(length);
length = SQUASHFS_COMPRESSED_SIZE(length);
if (next_index)
*next_index = index + length + 2;
TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
compressed ? "" : "un", length);
if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto block_release;
for (; bytes < length; b++) {
bh[b] = sb_getblk(sb, ++cur_index);
if (bh[b] == NULL)
goto block_release;
bytes += msblk->devblksize;
}
ll_rw_block(READ, b - 1, bh + 1);
}
for (i = 0; i < b; i++) {
wait_on_buffer(bh[i]);
if (!buffer_uptodate(bh[i]))
goto block_release;
req->sb = sb;
req->index = index;
req->output = output;
if (next_index)
*next_index = index;
if (length)
length = read_data_block(req, length, next_index, sync);
else
length = read_metadata_block(req, next_index);
if (length < 0) {
ERROR("squashfs_read_data failed to read block 0x%llx\n",
(unsigned long long)index);
return -EIO;
}
if (compressed) {
length = squashfs_decompress(msblk, bh, b, offset, length,
output);
if (length < 0)
goto read_failure;
} else {
/*
* Block is uncompressed.
*/
int in, pg_offset = 0;
void *data = squashfs_first_page(output);
for (bytes = length; k < b; k++) {
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
while (in) {
if (pg_offset == PAGE_CACHE_SIZE) {
data = squashfs_next_page(output);
pg_offset = 0;
}
avail = min_t(int, in, PAGE_CACHE_SIZE -
pg_offset);
memcpy(data + pg_offset, bh[k]->b_data + offset,
avail);
in -= avail;
pg_offset += avail;
offset += avail;
}
offset = 0;
put_bh(bh[k]);
}
squashfs_finish_page(output);
}
kfree(bh);
return length;
block_release:
for (; k < b; k++)
put_bh(bh[k]);
read_failure:
ERROR("squashfs_read_data failed to read block 0x%llx\n",
(unsigned long long) index);
kfree(bh);
return -EIO;
}
int squashfs_read_data(struct super_block *sb, u64 index, int length,
u64 *next_index, struct squashfs_page_actor *output)
{
return __squashfs_read_data(sb, index, length, next_index, output,
true);
}
int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
u64 *next_index, struct squashfs_page_actor *output)
{
return __squashfs_read_data(sb, index, length, next_index, output,
false);
}

View file

@ -20,8 +20,68 @@
#include "squashfs.h"
#include "page_actor.h"
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
int pages, struct page **page);
static void release_actor_pages(struct page **page, int pages, int error)
{
int i;
for (i = 0; i < pages; i++) {
if (!page[i])
continue;
flush_dcache_page(page[i]);
if (!error)
SetPageUptodate(page[i]);
else {
SetPageError(page[i]);
zero_user_segment(page[i], 0, PAGE_CACHE_SIZE);
}
unlock_page(page[i]);
put_page(page[i]);
}
kfree(page);
}
/*
* Create a "page actor" which will kmap and kunmap the
* page cache pages appropriately within the decompressor
*/
static struct squashfs_page_actor *actor_from_page_cache(
struct page *target_page, int start_index, int nr_pages)
{
int i, n;
struct page **page;
struct squashfs_page_actor *actor;
page = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
if (!page)
return NULL;
/* Try to grab all the pages covered by the SquashFS block */
for (i = 0, n = start_index; i < nr_pages; i++, n++) {
if (target_page->index == n) {
page[i] = target_page;
} else {
page[i] = grab_cache_page_nowait(target_page->mapping,
n);
if (page[i] == NULL)
continue;
}
if (PageUptodate(page[i])) {
unlock_page(page[i]);
put_page(page[i]);
page[i] = NULL;
}
}
actor = squashfs_page_actor_init(page, nr_pages, 0,
release_actor_pages);
if (!actor) {
release_actor_pages(page, nr_pages, -ENOMEM);
kfree(page);
return NULL;
}
return actor;
}
/* Read separately compressed datablock directly into page cache */
int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
@ -34,143 +94,19 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
int start_index = target_page->index & ~mask;
int end_index = start_index | mask;
int i, n, pages, missing_pages, bytes, res = -ENOMEM;
struct page **page;
int pages, res = -ENOMEM;
struct squashfs_page_actor *actor;
void *pageaddr;
if (end_index > file_end)
end_index = file_end;
pages = end_index - start_index + 1;
page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
if (page == NULL)
return res;
actor = actor_from_page_cache(target_page, start_index, pages);
if (!actor)
return -ENOMEM;
/*
* Create a "page actor" which will kmap and kunmap the
* page cache pages appropriately within the decompressor
*/
actor = squashfs_page_actor_init(page, pages, 0, NULL);
if (actor == NULL)
goto out;
/* Try to grab all the pages covered by the Squashfs block */
for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
page[i] = (n == target_page->index) ? target_page :
grab_cache_page_nowait(target_page->mapping, n);
if (page[i] == NULL) {
missing_pages++;
continue;
}
if (PageUptodate(page[i])) {
unlock_page(page[i]);
page_cache_release(page[i]);
page[i] = NULL;
missing_pages++;
}
}
if (missing_pages) {
/*
* Couldn't get one or more pages, this page has either
* been VM reclaimed, but others are still in the page cache
* and uptodate, or we're racing with another thread in
* squashfs_readpage also trying to grab them. Fall back to
* using an intermediate buffer.
*/
res = squashfs_read_cache(target_page, block, bsize, pages,
page);
if (res < 0)
goto mark_errored;
goto out;
}
/* Decompress directly into the page cache buffers */
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
if (res < 0)
goto mark_errored;
/* Last page may have trailing bytes not filled */
bytes = res % PAGE_CACHE_SIZE;
if (bytes) {
pageaddr = kmap_atomic(page[pages - 1]);
memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(pageaddr);
}
/* Mark pages as uptodate, unlock and release */
for (i = 0; i < pages; i++) {
flush_dcache_page(page[i]);
SetPageUptodate(page[i]);
unlock_page(page[i]);
if (page[i] != target_page)
page_cache_release(page[i]);
}
kfree(actor);
kfree(page);
return 0;
mark_errored:
/* Decompression failed, mark pages as errored. Target_page is
* dealt with by the caller
*/
for (i = 0; i < pages; i++) {
if (page[i] == NULL || page[i] == target_page)
continue;
flush_dcache_page(page[i]);
SetPageError(page[i]);
unlock_page(page[i]);
page_cache_release(page[i]);
}
out:
squashfs_page_actor_free(actor, 0);
kfree(page);
return res;
}
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
int pages, struct page **page)
{
struct inode *i = target_page->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
block, bsize);
int bytes = buffer->length, res = buffer->error, n, offset = 0;
void *pageaddr;
if (res) {
ERROR("Unable to read page, block %llx, size %x\n", block,
bsize);
goto out;
}
for (n = 0; n < pages && bytes > 0; n++,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
if (page[n] == NULL)
continue;
pageaddr = kmap_atomic(page[n]);
squashfs_copy_data(pageaddr, buffer, offset, avail);
memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
kunmap_atomic(pageaddr);
flush_dcache_page(page[n]);
SetPageUptodate(page[n]);
unlock_page(page[n]);
if (page[n] != target_page)
page_cache_release(page[n]);
}
out:
squashfs_cache_put(buffer);
return res;
get_page(target_page);
res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
actor);
return res < 0 ? res : 0;
}

View file

@ -28,8 +28,14 @@
#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
/* block.c */
extern int squashfs_init_read_wq(void);
extern void squashfs_destroy_read_wq(void);
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
struct squashfs_page_actor *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);

View file

@ -444,9 +444,15 @@ static int __init init_squashfs_fs(void)
if (err)
return err;
if (!squashfs_init_read_wq()) {
destroy_inodecache();
return -ENOMEM;
}
err = register_filesystem(&squashfs_fs_type);
if (err) {
destroy_inodecache();
squashfs_destroy_read_wq();
return err;
}
@ -460,6 +466,7 @@ static void __exit exit_squashfs_fs(void)
{
unregister_filesystem(&squashfs_fs_type);
destroy_inodecache();
squashfs_destroy_read_wq();
}