f2fs: backport from (4c1fad64 - Merge tag 'for-f2fs-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs)

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Jaegeuk Kim 2016-10-12 17:18:56 -07:00 committed by Jaegeuk Kim
parent 650b6a5c41
commit c1286ff41c
43 changed files with 7560 additions and 3943 deletions

View file

@ -102,14 +102,16 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection
will be truned off. If background_gc=sync, it will turn
will be turned off. If background_gc=sync, it will turn
on synchronous garbage collection running in background.
Default value for this option is on. So garbage
collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine
norecovery Disable the roll-forward recovery routine, mounted read-
only (i.e., -o ro,disable_roll_forward)
discard Issue discard/TRIM commands when a segment is cleaned.
discard/nodiscard Enable/disable real-time discard in f2fs, if discard is
enabled, f2fs will issue discard/TRIM commands when a
segment is cleaned.
no_heap Disable heap-style segment allocation which finds free
segments for data from the beginning of main area, while
for node from the end of main area.
@ -129,6 +131,7 @@ inline_dentry Enable the inline dir feature: data in new created
directory entries can be written into inode block. The
space of inode block which is used to store inline
dentries is limited to ~3.4k.
noinline_dentry Diable the inline dentry feature.
flush_merge Merge concurrent cache_flush commands as much as possible
to eliminate redundant command issues. If the underlying
device handles the cache_flush command relatively slowly,
@ -145,10 +148,15 @@ extent_cache Enable an extent cache based on rb-tree, it can cache
as many as extent which map between contiguous logical
address and physical address per inode, resulting in
increasing the cache hit ratio. Set by default.
noextent_cache Diable an extent cache based on rb-tree explicitly, see
noextent_cache Disable an extent cache based on rb-tree explicitly, see
the above extent_cache mount option.
noinline_data Disable the inline data feature, inline data feature is
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
================================================================================
DEBUGFS ENTRIES
@ -192,7 +200,7 @@ Files in /sys/fs/f2fs/<devname>
policy for garbage collection. Setting gc_idle = 0
(default) will disable this option. Setting
gc_idle = 1 will select the Cost Benefit approach
& setting gc_idle = 2 will select the greedy aproach.
& setting gc_idle = 2 will select the greedy approach.
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
@ -298,7 +306,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
file. Each file is dump_ssa and dump_sit.
The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
It shows on-disk inode information reconized by a given inode number, and is
It shows on-disk inode information recognized by a given inode number, and is
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.

View file

@ -73,6 +73,8 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
source "fs/crypto/Kconfig"
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"

View file

@ -30,6 +30,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o

18
fs/crypto/Kconfig Normal file
View file

@ -0,0 +1,18 @@
config FS_ENCRYPTION
tristate "FS Encryption (Per-file encryption)"
depends on BLOCK
select CRYPTO
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
select ENCRYPTED_KEYS
help
Enable encryption of files and directories. This
feature is similar to ecryptfs, but it is more memory
efficient since it avoids caching the encrypted and
decrypted pages in the page cache.

3
fs/crypto/Makefile Normal file
View file

@ -0,0 +1,3 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
fscrypto-y := crypto.o fname.o policy.o keyinfo.o

568
fs/crypto/crypto.c Normal file
View file

@ -0,0 +1,568 @@
/*
* This contains encryption functions for per-file encryption.
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* Written by Michael Halcrow, 2014.
*
* Filename encryption additions
* Uday Savagaonkar, 2014
* Encryption policy handling additions
* Ildar Muslukhov, 2014
* Add fscrypt_pullback_bio_page()
* Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/fscrypto.h>
#include <linux/ecryptfs.h>
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
module_param(num_prealloc_crypto_pages, uint, 0444);
MODULE_PARM_DESC(num_prealloc_crypto_pages,
"Number of crypto pages to preallocate");
module_param(num_prealloc_crypto_ctxs, uint, 0444);
MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
"Number of crypto contexts to preallocate");
static mempool_t *fscrypt_bounce_page_pool = NULL;
static LIST_HEAD(fscrypt_free_ctxs);
static DEFINE_SPINLOCK(fscrypt_ctx_lock);
static struct workqueue_struct *fscrypt_read_workqueue;
static DEFINE_MUTEX(fscrypt_init_mutex);
static struct kmem_cache *fscrypt_ctx_cachep;
struct kmem_cache *fscrypt_info_cachep;
/**
* fscrypt_release_ctx() - Releases an encryption context
* @ctx: The encryption context to release.
*
* If the encryption context was allocated from the pre-allocated pool, returns
* it to that pool. Else, frees it.
*
* If there's a bounce page in the context, this frees that.
*/
void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
ctx->w.bounce_page = NULL;
}
ctx->w.control_page = NULL;
if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
kmem_cache_free(fscrypt_ctx_cachep, ctx);
} else {
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
list_add(&ctx->free_list, &fscrypt_free_ctxs);
spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
}
}
EXPORT_SYMBOL(fscrypt_release_ctx);
/**
* fscrypt_get_ctx() - Gets an encryption context
* @inode: The inode for which we are doing the crypto
* @gfp_flags: The gfp flag for memory allocation
*
* Allocates and initializes an encryption context.
*
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx = NULL;
struct fscrypt_info *ci = inode->i_crypt_info;
unsigned long flags;
if (ci == NULL)
return ERR_PTR(-ENOKEY);
/*
* We first try getting the ctx from a free list because in
* the common case the ctx will have an allocated and
* initialized crypto tfm, so it's probably a worthwhile
* optimization. For the bounce page, we first try getting it
* from the kernel allocator because that's just about as fast
* as getting it from a list and because a cache of free pages
* should generally be a "last resort" option for a filesystem
* to be able to do its job.
*/
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
struct fscrypt_ctx, free_list);
if (ctx)
list_del(&ctx->free_list);
spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
if (!ctx) {
ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags);
if (!ctx)
return ERR_PTR(-ENOMEM);
ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
ctx->flags &= ~FS_WRITE_PATH_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
/**
* fscrypt_complete() - The completion callback for page encryption
* @req: The asynchronous encryption request context
* @res: The result of the encryption operation
*/
static void fscrypt_complete(struct crypto_async_request *req, int res)
{
struct fscrypt_completion_result *ecr = req->data;
if (res == -EINPROGRESS)
return;
ecr->res = res;
complete(&ecr->completion);
}
typedef enum {
FS_DECRYPT = 0,
FS_ENCRYPT,
} fscrypt_direction_t;
static int do_page_crypto(struct inode *inode,
fscrypt_direction_t rw, pgoff_t index,
struct page *src_page, struct page *dest_page,
gfp_t gfp_flags)
{
u8 xts_tweak[FS_XTS_TWEAK_SIZE];
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
__func__);
return -ENOMEM;
}
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
fscrypt_complete, &ecr);
BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
memcpy(xts_tweak, &index, sizeof(index));
memset(&xts_tweak[sizeof(index)], 0,
FS_XTS_TWEAK_SIZE - sizeof(index));
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_SIZE, 0);
skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
xts_tweak);
if (rw == FS_DECRYPT)
res = crypto_skcipher_decrypt(req);
else
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
skcipher_request_free(req);
if (res) {
printk_ratelimited(KERN_ERR
"%s: crypto_skcipher_encrypt() returned %d\n",
__func__, res);
return res;
}
return 0;
}
static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
{
ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= FS_WRITE_PATH_FL;
return ctx->w.bounce_page;
}
/**
* fscypt_encrypt_page() - Encrypts a page
* @inode: The inode for which the encryption should take place
* @plaintext_page: The page to encrypt. Must be locked.
* @gfp_flags: The gfp flag for memory allocation
*
* Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
* encryption context.
*
* Called on the page write path. The caller must call
* fscrypt_restore_control_page() on the returned ciphertext page to
* release the bounce buffer and the encryption context.
*
* Return: An allocated page with the encrypted content on success. Else, an
* error value or NULL.
*/
struct page *fscrypt_encrypt_page(struct inode *inode,
struct page *plaintext_page, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
int err;
BUG_ON(!PageLocked(plaintext_page));
ctx = fscrypt_get_ctx(inode, gfp_flags);
if (IS_ERR(ctx))
return (struct page *)ctx;
/* The encryption operation will require a bounce page. */
ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
plaintext_page, ciphertext_page,
gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
goto errout;
}
SetPagePrivate(ciphertext_page);
set_page_private(ciphertext_page, (unsigned long)ctx);
lock_page(ciphertext_page);
return ciphertext_page;
errout:
fscrypt_release_ctx(ctx);
return ciphertext_page;
}
EXPORT_SYMBOL(fscrypt_encrypt_page);
/**
* f2crypt_decrypt_page() - Decrypts a page in-place
* @page: The page to decrypt. Must be locked.
*
* Decrypts page in-place using the ctx encryption context.
*
* Called from the read completion callback.
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_decrypt_page(struct page *page)
{
BUG_ON(!PageLocked(page));
return do_page_crypto(page->mapping->host,
FS_DECRYPT, page->index, page, page, GFP_NOFS);
}
EXPORT_SYMBOL(fscrypt_decrypt_page);
int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
int ret, err = 0;
BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
while (len--) {
err = do_page_crypto(inode, FS_ENCRYPT, lblk,
ZERO_PAGE(0), ciphertext_page,
GFP_NOFS);
if (err)
goto errout;
bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
/* should never happen! */
WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
}
err = submit_bio_wait(WRITE, bio);
if ((err == 0) && bio->bi_error)
err = -EIO;
bio_put(bio);
if (err)
goto errout;
lblk++;
pblk++;
}
err = 0;
errout:
fscrypt_release_ctx(ctx);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
/*
* Validate dentries for encrypted directories to make sure we aren't
* potentially caching stale data after a key has been added or
* removed.
*/
static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
{
struct dentry *dir;
struct fscrypt_info *ci;
int dir_has_key, cached_with_key;
if (flags & LOOKUP_RCU)
return -ECHILD;
dir = dget_parent(dentry);
if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) {
dput(dir);
return 0;
}
ci = d_inode(dir)->i_crypt_info;
if (ci && ci->ci_keyring_key &&
(ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_DEAD))))
ci = NULL;
/* this should eventually be an flag in d_flags */
spin_lock(&dentry->d_lock);
cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
dir_has_key = (ci != NULL);
dput(dir);
/*
* If the dentry was cached without the key, and it is a
* negative dentry, it might be a valid name. We can't check
* if the key has since been made available due to locking
* reasons, so we fail the validation so ext4_lookup() can do
* this check.
*
* We also fail the validation if the dentry was created with
* the key present, but we no longer have the key, or vice versa.
*/
if ((!cached_with_key && d_is_negative(dentry)) ||
(!cached_with_key && dir_has_key) ||
(cached_with_key && !dir_has_key))
return 0;
return 1;
}
const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
EXPORT_SYMBOL(fscrypt_d_ops);
/*
* Call fscrypt_decrypt_page on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct fscrypt_ctx *ctx =
container_of(work, struct fscrypt_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = fscrypt_decrypt_page(page);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
fscrypt_release_ctx(ctx);
bio_put(bio);
}
void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
struct fscrypt_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct fscrypt_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
if (restore)
fscrypt_restore_control_page(bounce_page);
}
EXPORT_SYMBOL(fscrypt_pullback_bio_page);
void fscrypt_restore_control_page(struct page *page)
{
struct fscrypt_ctx *ctx;
ctx = (struct fscrypt_ctx *)page_private(page);
set_page_private(page, (unsigned long)NULL);
ClearPagePrivate(page);
unlock_page(page);
fscrypt_release_ctx(ctx);
}
EXPORT_SYMBOL(fscrypt_restore_control_page);
static void fscrypt_destroy(void)
{
struct fscrypt_ctx *pos, *n;
list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list)
kmem_cache_free(fscrypt_ctx_cachep, pos);
INIT_LIST_HEAD(&fscrypt_free_ctxs);
mempool_destroy(fscrypt_bounce_page_pool);
fscrypt_bounce_page_pool = NULL;
}
/**
* fscrypt_initialize() - allocate major buffers for fs encryption.
*
* We only call this when we start accessing encrypted files, since it
* results in memory getting allocated that wouldn't otherwise be used.
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_initialize(void)
{
int i, res = -ENOMEM;
if (fscrypt_bounce_page_pool)
return 0;
mutex_lock(&fscrypt_init_mutex);
if (fscrypt_bounce_page_pool)
goto already_initialized;
for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
struct fscrypt_ctx *ctx;
ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
if (!ctx)
goto fail;
list_add(&ctx->free_list, &fscrypt_free_ctxs);
}
fscrypt_bounce_page_pool =
mempool_create_page_pool(num_prealloc_crypto_pages, 0);
if (!fscrypt_bounce_page_pool)
goto fail;
already_initialized:
mutex_unlock(&fscrypt_init_mutex);
return 0;
fail:
fscrypt_destroy();
mutex_unlock(&fscrypt_init_mutex);
return res;
}
EXPORT_SYMBOL(fscrypt_initialize);
/**
* fscrypt_init() - Set up for fs encryption.
*/
static int __init fscrypt_init(void)
{
fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
WQ_HIGHPRI, 0);
if (!fscrypt_read_workqueue)
goto fail;
fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT);
if (!fscrypt_ctx_cachep)
goto fail_free_queue;
fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT);
if (!fscrypt_info_cachep)
goto fail_free_ctx;
return 0;
fail_free_ctx:
kmem_cache_destroy(fscrypt_ctx_cachep);
fail_free_queue:
destroy_workqueue(fscrypt_read_workqueue);
fail:
return -ENOMEM;
}
module_init(fscrypt_init)
/**
* fscrypt_exit() - Shutdown the fs encryption system
*/
static void __exit fscrypt_exit(void)
{
fscrypt_destroy();
if (fscrypt_read_workqueue)
destroy_workqueue(fscrypt_read_workqueue);
kmem_cache_destroy(fscrypt_ctx_cachep);
kmem_cache_destroy(fscrypt_info_cachep);
}
module_exit(fscrypt_exit);
MODULE_LICENSE("GPL");

View file

@ -1,46 +1,32 @@
/*
* linux/fs/f2fs/crypto_fname.c
*
* Copied from linux/fs/ext4/crypto.c
* This contains functions for filename crypto management
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* This contains functions for filename crypto management in f2fs
*
* Written by Uday Savagaonkar, 2014.
*
* Adjust f2fs dentry structure
* Jaegeuk Kim, 2015.
* Modified by Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*/
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
#include <linux/crypto.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
#include <linux/list.h>
#include <linux/mempool.h>
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <linux/spinlock_types.h>
#include <linux/f2fs_fs.h>
#include <linux/ratelimit.h>
#include <linux/fscrypto.h>
#include "f2fs.h"
#include "f2fs_crypto.h"
#include "xattr.h"
static u32 size_round_up(size_t size, size_t blksize)
{
return ((size + blksize - 1) / blksize) * blksize;
}
/**
* f2fs_dir_crypt_complete() -
* dir_crypt_complete() -
*/
static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
static void dir_crypt_complete(struct crypto_async_request *req, int res)
{
struct f2fs_completion_result *ecr = req->data;
struct fscrypt_completion_result *ecr = req->data;
if (res == -EINPROGRESS)
return;
@ -48,45 +34,35 @@ static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
bool f2fs_valid_filenames_enc_mode(uint32_t mode)
{
return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS);
}
static unsigned max_name_len(struct inode *inode)
{
return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
F2FS_NAME_LEN;
}
/**
* f2fs_fname_encrypt() -
* fname_encrypt() -
*
* This function encrypts the input filename, and returns the length of the
* ciphertext. Errors are returned as negative numbers. We trust the caller to
* allocate sufficient memory to oname string.
*/
static int f2fs_fname_encrypt(struct inode *inode,
const struct qstr *iname, struct f2fs_str *oname)
static int fname_encrypt(struct inode *inode,
const struct qstr *iname, struct fscrypt_str *oname)
{
u32 ciphertext_len;
struct ablkcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[F2FS_CRYPTO_BLOCK_SIZE];
char iv[FS_CRYPTO_BLOCK_SIZE];
struct scatterlist src_sg, dst_sg;
int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
char *workbuf, buf[32], *alloc_buf = NULL;
unsigned lim = max_name_len(inode);
unsigned lim;
lim = inode->i_sb->s_cop->max_namelen(inode);
if (iname->len <= 0 || iname->len > lim)
return -EIO;
ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ?
F2FS_CRYPTO_BLOCK_SIZE : iname->len;
ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding);
ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ?
FS_CRYPTO_BLOCK_SIZE : iname->len;
ciphertext_len = size_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
if (ciphertext_len <= sizeof(buf)) {
@ -99,16 +75,16 @@ static int f2fs_fname_encrypt(struct inode *inode,
}
/* Allocate request */
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
ablkcipher_request_set_callback(req,
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_dir_crypt_complete, &ecr);
dir_crypt_complete, &ecr);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
@ -116,79 +92,78 @@ static int f2fs_fname_encrypt(struct inode *inode,
memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
/* Initialize IV */
memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
/* Create encryption request */
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
res = crypto_ablkcipher_encrypt(req);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
kfree(alloc_buf);
ablkcipher_request_free(req);
if (res < 0) {
skcipher_request_free(req);
if (res < 0)
printk_ratelimited(KERN_ERR
"%s: Error (error code %d)\n", __func__, res);
}
oname->len = ciphertext_len;
return res;
}
/*
* f2fs_fname_decrypt()
* fname_decrypt()
* This function decrypts the input filename, and returns
* the length of the plaintext.
* Errors are returned as negative numbers.
* We trust the caller to allocate sufficient memory to oname string.
*/
static int f2fs_fname_decrypt(struct inode *inode,
const struct f2fs_str *iname, struct f2fs_str *oname)
static int fname_decrypt(struct inode *inode,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
struct ablkcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[F2FS_CRYPTO_BLOCK_SIZE];
unsigned lim = max_name_len(inode);
char iv[FS_CRYPTO_BLOCK_SIZE];
unsigned lim;
lim = inode->i_sb->s_cop->max_namelen(inode);
if (iname->len <= 0 || iname->len > lim)
return -EIO;
/* Allocate request */
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
return -ENOMEM;
}
ablkcipher_request_set_callback(req,
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_dir_crypt_complete, &ecr);
dir_crypt_complete, &ecr);
/* Initialize IV */
memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
/* Create decryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
res = crypto_ablkcipher_decrypt(req);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
res = crypto_skcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
ablkcipher_request_free(req);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
"%s: Error in f2fs_fname_decrypt (error code %d)\n",
__func__, res);
"%s: Error (error code %d)\n", __func__, res);
return res;
}
@ -200,7 +175,7 @@ static const char *lookup_table =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
/**
* f2fs_fname_encode_digest() -
* digest_encode() -
*
* Encodes the input digest using characters from the set [a-zA-Z0-9_+].
* The encoded string is roughly 4/3 times the size of the input string.
@ -249,148 +224,152 @@ static int digest_decode(const char *src, int len, char *dst)
return cp - dst;
}
/**
* f2fs_fname_crypto_round_up() -
*
* Return: The next multiple of block size
*/
u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize)
u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
{
return ((size + blksize - 1) / blksize) * blksize;
int padding = 32;
struct fscrypt_info *ci = inode->i_crypt_info;
if (ci)
padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
if (ilen < FS_CRYPTO_BLOCK_SIZE)
ilen = FS_CRYPTO_BLOCK_SIZE;
return size_round_up(ilen, padding);
}
EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
/**
* f2fs_fname_crypto_alloc_obuff() -
* fscrypt_fname_crypto_alloc_obuff() -
*
* Allocates an output buffer that is sufficient for the crypto operation
* specified by the context and the direction.
*/
int f2fs_fname_crypto_alloc_buffer(struct inode *inode,
u32 ilen, struct f2fs_str *crypto_str)
int fscrypt_fname_alloc_buffer(struct inode *inode,
u32 ilen, struct fscrypt_str *crypto_str)
{
unsigned int olen;
int padding = 16;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
if (ci)
padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
if (padding < F2FS_CRYPTO_BLOCK_SIZE)
padding = F2FS_CRYPTO_BLOCK_SIZE;
olen = f2fs_fname_crypto_round_up(ilen, padding);
crypto_str->len = olen;
if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
/* Allocated buffer can hold one more character to null-terminate the
* string */
if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
/*
* Allocated buffer can hold one more character to null-terminate the
* string
*/
crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
if (!(crypto_str->name))
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_alloc_buffer);
/**
* f2fs_fname_crypto_free_buffer() -
* fscrypt_fname_crypto_free_buffer() -
*
* Frees the buffer allocated for crypto operation.
*/
void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str)
void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
{
if (!crypto_str)
return;
kfree(crypto_str->name);
crypto_str->name = NULL;
}
EXPORT_SYMBOL(fscrypt_fname_free_buffer);
/**
* f2fs_fname_disk_to_usr() - converts a filename from disk space to user space
* fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
* space
*/
int f2fs_fname_disk_to_usr(struct inode *inode,
f2fs_hash_t *hash,
const struct f2fs_str *iname,
struct f2fs_str *oname)
int fscrypt_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
int ret;
if (is_dot_dotdot(&qname)) {
if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
return oname->len;
}
if (F2FS_I(inode)->i_crypt_info)
return f2fs_fname_decrypt(inode, iname, oname);
if (iname->len < FS_CRYPTO_BLOCK_SIZE)
return -EUCLEAN;
if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) {
if (inode->i_crypt_info)
return fname_decrypt(inode, iname, oname);
if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
ret = digest_encode(iname->name, iname->len, oname->name);
oname->len = ret;
return ret;
}
if (hash) {
memcpy(buf, hash, 4);
memset(buf + 4, 0, 4);
} else
memcpy(buf, &hash, 4);
memcpy(buf + 4, &minor_hash, 4);
} else {
memset(buf, 0, 8);
memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
}
memcpy(buf + 8, iname->name + iname->len - 16, 16);
oname->name[0] = '_';
ret = digest_encode(buf, 24, oname->name + 1);
oname->len = ret + 1;
return ret + 1;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
/**
* f2fs_fname_usr_to_disk() - converts a filename from user space to disk space
* fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
* space
*/
int f2fs_fname_usr_to_disk(struct inode *inode,
int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct f2fs_str *oname)
struct fscrypt_str *oname)
{
int res;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
if (is_dot_dotdot(iname)) {
if (fscrypt_is_dot_dotdot(iname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
return oname->len;
}
if (ci) {
res = f2fs_fname_encrypt(inode, iname, oname);
return res;
}
/* Without a proper key, a user is not allowed to modify the filenames
if (inode->i_crypt_info)
return fname_encrypt(inode, iname, oname);
/*
* Without a proper key, a user is not allowed to modify the filenames
* in a directory. Consequently, a user space name cannot be mapped to
* a disk-space name */
* a disk-space name
*/
return -EACCES;
}
EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct f2fs_filename *fname)
int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
struct f2fs_crypt_info *ci;
int ret = 0, bigname = 0;
memset(fname, 0, sizeof(struct f2fs_filename));
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) {
if (!dir->i_sb->s_cop->is_encrypted(dir) ||
fscrypt_is_dot_dotdot(iname)) {
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
ret = f2fs_get_encryption_info(dir);
if (ret)
ret = get_crypt_info(dir);
if (ret && ret != -EOPNOTSUPP)
return ret;
ci = F2FS_I(dir)->i_crypt_info;
if (ci) {
ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len,
&fname->crypto_buf);
if (dir->i_crypt_info) {
ret = fscrypt_fname_alloc_buffer(dir, iname->len,
&fname->crypto_buf);
if (ret < 0)
return ret;
ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf);
ret = fname_encrypt(dir, iname, &fname->crypto_buf);
if (ret < 0)
goto errout;
fname->disk_name.name = fname->crypto_buf.name;
@ -400,18 +379,19 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
if (!lookup)
return -EACCES;
/* We don't have the key and we are doing a lookup; decode the
/*
* We don't have the key and we are doing a lookup; decode the
* user-supplied name
*/
if (iname->name[0] == '_')
bigname = 1;
if ((bigname && (iname->len != 33)) ||
(!bigname && (iname->len > 43)))
if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43)))
return -ENOENT;
fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
ret = digest_decode(iname->name + bigname, iname->len - bigname,
fname->crypto_buf.name);
if (ret < 0) {
@ -421,20 +401,24 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
fname->crypto_buf.len = ret;
if (bigname) {
memcpy(&fname->hash, fname->crypto_buf.name, 4);
memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4);
} else {
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
}
return 0;
errout:
f2fs_fname_crypto_free_buffer(&fname->crypto_buf);
fscrypt_fname_free_buffer(&fname->crypto_buf);
return ret;
}
EXPORT_SYMBOL(fscrypt_setup_filename);
void f2fs_fname_free_filename(struct f2fs_filename *fname)
void fscrypt_free_filename(struct fscrypt_name *fname)
{
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
}
EXPORT_SYMBOL(fscrypt_free_filename);

304
fs/crypto/keyinfo.c Normal file
View file

@ -0,0 +1,304 @@
/*
* key management facility for FS encryption support.
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions.
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <uapi/linux/keyctl.h>
#include <linux/fscrypto.h>
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
{
struct fscrypt_completion_result *ecr = req->data;
if (rc == -EINPROGRESS)
return;
ecr->res = rc;
complete(&ecr->completion);
}
/**
* derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
* Return: Zero on success; non-zero otherwise.
*/
static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
u8 source_key[FS_AES_256_XTS_KEY_SIZE],
u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
{
int res = 0;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
derive_crypt_complete, &ecr);
res = crypto_skcipher_setkey(tfm, deriving_key,
FS_AES_128_ECB_KEY_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
skcipher_request_set_crypt(req, &src_sg, &dst_sg,
FS_AES_256_XTS_KEY_SIZE, NULL);
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
out:
skcipher_request_free(req);
crypto_free_skcipher(tfm);
return res;
}
static int validate_user_key(struct fscrypt_info *crypt_info,
struct fscrypt_context *ctx, u8 *raw_key,
u8 *prefix, int prefix_size)
{
u8 *full_key_descriptor;
struct key *keyring_key;
struct fscrypt_key *master_key;
const struct user_key_payload *ukp;
int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
int res;
full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
if (!full_key_descriptor)
return -ENOMEM;
memcpy(full_key_descriptor, prefix, prefix_size);
sprintf(full_key_descriptor + prefix_size,
"%*phN", FS_KEY_DESCRIPTOR_SIZE,
ctx->master_key_descriptor);
full_key_descriptor[full_key_len - 1] = '\0';
keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
kfree(full_key_descriptor);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
"%s: key type must be logon\n", __func__);
res = -ENOKEY;
goto out;
}
down_read(&keyring_key->sem);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
up_read(&keyring_key->sem);
goto out;
}
master_key = (struct fscrypt_key *)ukp->data;
BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
printk_once(KERN_WARNING
"%s: key size incorrect: %d\n",
__func__, master_key->size);
res = -ENOKEY;
up_read(&keyring_key->sem);
goto out;
}
res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
crypt_info->ci_keyring_key = keyring_key;
return 0;
out:
key_put(keyring_key);
return res;
}
static void put_crypt_info(struct fscrypt_info *ci)
{
if (!ci)
return;
key_put(ci->ci_keyring_key);
crypto_free_skcipher(ci->ci_ctfm);
kmem_cache_free(fscrypt_info_cachep, ci);
}
int get_crypt_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
struct crypto_skcipher *ctfm;
const char *cipher_str;
u8 raw_key[FS_MAX_KEY_SIZE];
u8 mode;
int res;
res = fscrypt_initialize();
if (res)
return res;
if (!inode->i_sb->s_cop->get_context)
return -EOPNOTSUPP;
retry:
crypt_info = ACCESS_ONCE(inode->i_crypt_info);
if (crypt_info) {
if (!crypt_info->ci_keyring_key ||
key_validate(crypt_info->ci_keyring_key) == 0)
return 0;
fscrypt_put_encryption_info(inode, crypt_info);
goto retry;
}
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode))
return res;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
res = 0;
crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
mode = crypt_info->ci_data_mode;
else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
mode = crypt_info->ci_filename_mode;
else
BUG();
switch (mode) {
case FS_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
break;
case FS_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
default:
printk_once(KERN_WARNING
"%s: unsupported key mode %d (ino %u)\n",
__func__, mode, (unsigned) inode->i_ino);
res = -ENOKEY;
goto out;
}
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
}
res = validate_user_key(crypt_info, &ctx, raw_key,
FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
if (res && inode->i_sb->s_cop->key_prefix) {
u8 *prefix = NULL;
int prefix_size, res2;
prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
res2 = validate_user_key(crypt_info, &ctx, raw_key,
prefix, prefix_size);
if (res2) {
if (res2 == -ENOKEY)
res = -ENOKEY;
goto out;
}
} else if (res) {
goto out;
}
got_key:
ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
"%s: error %d (inode %u) allocating crypto tfm\n",
__func__, res, (unsigned) inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
res = crypto_skcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode));
if (res)
goto out;
memzero_explicit(raw_key, sizeof(raw_key));
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
put_crypt_info(crypt_info);
goto retry;
}
return 0;
out:
if (res == -ENOKEY)
res = 0;
put_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
{
struct fscrypt_info *prev;
if (ci == NULL)
ci = ACCESS_ONCE(inode->i_crypt_info);
if (ci == NULL)
return;
prev = cmpxchg(&inode->i_crypt_info, ci, NULL);
if (prev != ci)
return;
put_crypt_info(ci);
}
EXPORT_SYMBOL(fscrypt_put_encryption_info);
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *ci = inode->i_crypt_info;
if (!ci ||
(ci->ci_keyring_key &&
(ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_DEAD)))))
return get_crypt_info(inode);
return 0;
}
EXPORT_SYMBOL(fscrypt_get_encryption_info);

246
fs/crypto/policy.c Normal file
View file

@ -0,0 +1,246 @@
/*
* Encryption policy functions for per-file encryption support.
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#include <linux/random.h>
#include <linux/string.h>
#include <linux/fscrypto.h>
#include <linux/mount.h>
static int inode_has_encryption_context(struct inode *inode)
{
if (!inode->i_sb->s_cop->get_context)
return 0;
return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
}
/*
* check whether the policy is consistent with the encryption context
* for the inode
*/
static int is_encryption_context_consistent_with_policy(struct inode *inode,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->get_context)
return 0;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res != sizeof(ctx))
return 0;
return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx.flags == policy->flags) &&
(ctx.contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx.filenames_encryption_mode ==
policy->filenames_encryption_mode));
}
static int create_encryption_context_from_policy(struct inode *inode,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
if (inode->i_sb->s_cop->prepare_context) {
res = inode->i_sb->s_cop->prepare_context(inode);
if (res)
return res;
}
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (!fscrypt_valid_contents_enc_mode(
policy->contents_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid contents encryption mode %d\n", __func__,
policy->contents_encryption_mode);
return -EINVAL;
}
if (!fscrypt_valid_filenames_enc_mode(
policy->filenames_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid filenames encryption mode %d\n", __func__,
policy->filenames_encryption_mode);
return -EINVAL;
}
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
ctx.contents_encryption_mode = policy->contents_encryption_mode;
ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
ctx.flags = policy->flags;
BUILD_BUG_ON(sizeof(ctx.nonce) != FS_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
}
int fscrypt_process_policy(struct file *filp,
const struct fscrypt_policy *policy)
{
struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (policy->version != 0)
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
if (!inode_has_encryption_context(inode)) {
if (!S_ISDIR(inode->i_mode))
ret = -EINVAL;
else if (!inode->i_sb->s_cop->empty_dir)
ret = -EOPNOTSUPP;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
ret = create_encryption_context_from_policy(inode,
policy);
} else if (!is_encryption_context_consistent_with_policy(inode,
policy)) {
printk(KERN_WARNING
"%s: Policy inconsistent with encryption context\n",
__func__);
ret = -EINVAL;
}
mnt_drop_write_file(filp);
return ret;
}
EXPORT_SYMBOL(fscrypt_process_policy);
int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->get_context ||
!inode->i_sb->s_cop->is_encrypted(inode))
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res != sizeof(ctx))
return -ENODATA;
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
policy->version = 0;
policy->contents_encryption_mode = ctx.contents_encryption_mode;
policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
policy->flags = ctx.flags;
memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
return 0;
}
EXPORT_SYMBOL(fscrypt_get_policy);
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
{
struct fscrypt_info *parent_ci, *child_ci;
int res;
if ((parent == NULL) || (child == NULL)) {
printk(KERN_ERR "parent %p child %p\n", parent, child);
BUG_ON(1);
}
/* no restrictions if the parent directory is not encrypted */
if (!parent->i_sb->s_cop->is_encrypted(parent))
return 1;
/* if the child directory is not encrypted, this is always a problem */
if (!parent->i_sb->s_cop->is_encrypted(child))
return 0;
res = fscrypt_get_encryption_info(parent);
if (res)
return 0;
res = fscrypt_get_encryption_info(child);
if (res)
return 0;
parent_ci = parent->i_crypt_info;
child_ci = child->i_crypt_info;
if (!parent_ci && !child_ci)
return 1;
if (!parent_ci || !child_ci)
return 0;
return (memcmp(parent_ci->ci_master_key,
child_ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags));
}
EXPORT_SYMBOL(fscrypt_has_permitted_context);
/**
* fscrypt_inherit_context() - Sets a child context from its parent
* @parent: Parent inode from which the context is inherited.
* @child: Child inode that inherits the context from @parent.
* @fs_data: private data given by FS.
* @preload: preload child i_crypt_info
*
* Return: Zero on success, non-zero otherwise
*/
int fscrypt_inherit_context(struct inode *parent, struct inode *child,
void *fs_data, bool preload)
{
struct fscrypt_context ctx;
struct fscrypt_info *ci;
int res;
if (!parent->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
res = fscrypt_get_encryption_info(parent);
if (res < 0)
return res;
ci = parent->i_crypt_info;
if (ci == NULL)
return -ENOKEY;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
if (fscrypt_dummy_context_enabled(parent)) {
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
}
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);
if (res)
return res;
return preload ? fscrypt_get_encryption_info(child): 0;
}
EXPORT_SYMBOL(fscrypt_inherit_context);

View file

@ -1,6 +1,9 @@
config F2FS_FS
tristate "F2FS filesystem support"
depends on BLOCK
select CRYPTO
select KEYS
select CRYPTO_CRC32
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
@ -76,15 +79,7 @@ config F2FS_FS_ENCRYPTION
bool "F2FS Encryption"
depends on F2FS_FS
depends on F2FS_FS_XATTR
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
select ENCRYPTED_KEYS
select FS_ENCRYPTION
help
Enable encryption of f2fs files and directories. This
feature is similar to ecryptfs, but it is more memory
@ -100,3 +95,11 @@ config F2FS_IO_TRACE
information and block IO patterns in the filesystem level.
If unsure, say N.
config F2FS_FAULT_INJECTION
bool "F2FS fault injection facility"
depends on F2FS_FS
help
Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on.
If unsure, say N.

View file

@ -7,5 +7,3 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \
crypto_key.o crypto_fname.o

View file

@ -109,14 +109,16 @@ fail:
return ERR_PTR(-EINVAL);
}
static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
const struct posix_acl *acl, size_t *size)
{
struct f2fs_acl_header *f2fs_acl;
struct f2fs_acl_entry *entry;
int i;
f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
sizeof(struct f2fs_acl_entry), GFP_NOFS);
f2fs_acl = f2fs_kmalloc(sbi, sizeof(struct f2fs_acl_header) +
acl->a_count * sizeof(struct f2fs_acl_entry),
GFP_NOFS);
if (!f2fs_acl)
return ERR_PTR(-ENOMEM);
@ -175,7 +177,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
if (retval > 0) {
value = kmalloc(retval, GFP_F2FS_ZERO);
value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value,
@ -204,7 +206,6 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
static int __f2fs_set_acl(struct inode *inode, int type,
struct posix_acl *acl, struct page *ipage)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
int name_index;
void *value = NULL;
size_t size = 0;
@ -213,11 +214,13 @@ static int __f2fs_set_acl(struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl && !ipage) {
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
if (error)
if (acl) {
error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
return error;
set_acl_inode(fi, inode->i_mode);
set_acl_inode(inode, inode->i_mode);
if (error == 0)
acl = NULL;
}
break;
@ -232,9 +235,9 @@ static int __f2fs_set_acl(struct inode *inode, int type,
}
if (acl) {
value = f2fs_acl_to_disk(acl, &size);
value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
if (IS_ERR(value)) {
clear_inode_flag(fi, FI_ACL_MODE);
clear_inode_flag(inode, FI_ACL_MODE);
return (int)PTR_ERR(value);
}
}
@ -245,7 +248,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
if (!error)
set_cached_acl(inode, type, acl);
clear_inode_flag(fi, FI_ACL_MODE);
clear_inode_flag(inode, FI_ACL_MODE);
return error;
}
@ -386,6 +389,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
if (error)
return error;
f2fs_mark_inode_dirty_sync(inode);
if (default_acl) {
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
ipage);

View file

@ -37,11 +37,10 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
extern struct posix_acl *f2fs_get_acl(struct inode *, int);
extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
extern int f2fs_set_acl(struct inode *, struct posix_acl *, int);
extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
struct page *);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL

View file

@ -26,6 +26,14 @@
static struct kmem_cache *ino_entry_slab;
struct kmem_cache *inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_bios(sbi);
}
/*
* We guarantee no failure on the returned page.
*/
@ -34,13 +42,14 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
page = f2fs_grab_cache_page(mapping, index, false);
if (!page) {
cond_resched();
goto repeat;
}
f2fs_wait_on_page_writeback(page, META);
SetPageUptodate(page);
f2fs_wait_on_page_writeback(page, META, true);
if (!PageUptodate(page))
SetPageUptodate(page);
return page;
}
@ -56,14 +65,15 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.sbi = sbi,
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
.blk_addr = index,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
};
if (unlikely(!is_meta))
fio.rw &= ~REQ_META;
repeat:
page = grab_cache_page(mapping, index);
page = f2fs_grab_cache_page(mapping, index, false);
if (!page) {
cond_resched();
goto repeat;
@ -90,7 +100,7 @@ repeat:
* meta page.
*/
if (unlikely(!PageUptodate(page)))
f2fs_stop_checkpoint(sbi);
f2fs_stop_checkpoint(sbi, false);
out:
return page;
}
@ -143,7 +153,6 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{
block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
struct f2fs_io_info fio = {
@ -152,10 +161,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
.encrypted_page = NULL,
};
struct blk_plug plug;
if (unlikely(type == META_POR))
fio.rw &= ~REQ_META;
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
if (!is_valid_blkaddr(sbi, blkno, type))
@ -167,27 +178,25 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
/* get nat block addr */
fio.blk_addr = current_nat_addr(sbi,
fio.new_blkaddr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
fio.blk_addr = current_sit_addr(sbi,
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
goto out;
prev_blk_addr = fio.blk_addr;
break;
case META_SSA:
case META_CP:
case META_POR:
fio.blk_addr = blkno;
fio.new_blkaddr = blkno;
break;
default:
BUG();
}
page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
page = f2fs_grab_cache_page(META_MAPPING(sbi),
fio.new_blkaddr, false);
if (!page)
continue;
if (PageUptodate(page)) {
@ -196,11 +205,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
fio.old_blkaddr = fio.new_blkaddr;
f2fs_submit_page_mbio(&fio);
f2fs_put_page(page, 0);
}
out:
f2fs_submit_merged_bio(sbi, META, READ);
blk_finish_plug(&plug);
return blkno - start;
}
@ -210,7 +221,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
bool readahead = false;
page = find_get_page(META_MAPPING(sbi), index);
if (!page || (page && !PageUptodate(page)))
if (!page || !PageUptodate(page))
readahead = true;
f2fs_put_page(page, 0);
@ -232,13 +243,17 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
if (wbc->for_reclaim)
f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
unlock_page(page);
if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
redirty_out:
@ -252,13 +267,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff, written;
trace_f2fs_writepages(mapping->host, wbc, META);
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
diff = nr_pages_to_write(sbi, META, wbc);
@ -269,6 +284,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
trace_f2fs_writepages(mapping->host, wbc, META);
return 0;
}
@ -276,15 +292,18 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
struct pagevec pvec;
long nwritten = 0;
struct writeback_control wbc = {
.for_reclaim = 0,
};
struct blk_plug plug;
pagevec_init(&pvec, 0);
blk_start_plug(&plug);
while (index <= end) {
int i, nr_pages;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
@ -296,7 +315,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (prev == LONG_MAX)
if (prev == ULONG_MAX)
prev = page->index - 1;
if (nr_to_write != LONG_MAX && page->index != prev + 1) {
pagevec_release(&pvec);
@ -315,6 +334,9 @@ continue_unlock:
goto continue_unlock;
}
f2fs_wait_on_page_writeback(page, META, true);
BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@ -334,6 +356,8 @@ stop:
if (nwritten)
f2fs_submit_merged_bio(sbi, type, WRITE);
blk_finish_plug(&plug);
return nwritten;
}
@ -341,9 +365,10 @@ static int f2fs_set_meta_page_dirty(struct page *page)
{
trace_f2fs_set_page_dirty(page, META);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
f2fs_set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
SetPagePrivate(page);
f2fs_trace_pid(page);
@ -358,6 +383,9 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
#endif
};
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@ -410,13 +438,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&im->ino_lock);
}
void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, type);
}
void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
@ -434,12 +462,12 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
void release_dirty_inode(struct f2fs_sb_info *sbi)
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
{
struct ino_entry *e, *tmp;
int i;
for (i = APPEND_INO; i <= UPDATE_INO; i++) {
for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
struct inode_management *im = &sbi->im[i];
spin_lock(&im->ino_lock);
@ -459,6 +487,13 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
int err = 0;
spin_lock(&im->ino_lock);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
return -ENOSPC;
}
#endif
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@ -478,10 +513,11 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
spin_unlock(&im->ino_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
void add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
__add_ino_entry(sbi, ino, ORPHAN_INO);
__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
update_inode_page(inode);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@ -493,8 +529,20 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
int err = acquire_orphan_inode(sbi);
inode = f2fs_iget(sbi->sb, ino);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: orphan failed (ino=%x), run fsck to fix.",
__func__, ino);
return err;
}
__add_ino_entry(sbi, ino, ORPHAN_INO);
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
/*
* there should be a bug that we can't find the entry
@ -508,6 +556,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
get_node_info(sbi, ino, &ni);
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: orphan failed (ino=%x), run fsck to fix.",
__func__, ino);
return -EIO;
}
__remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
}
@ -516,7 +576,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
int err;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@ -540,7 +600,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
}
@ -601,45 +661,55 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
}
}
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
struct f2fs_checkpoint **cp_block, struct page **cp_page,
unsigned long long *version)
{
unsigned long blk_size = sbi->blocksize;
size_t crc_offset = 0;
__u32 crc = 0;
*cp_page = get_meta_page(sbi, cp_addr);
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset >= blk_size) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ crc_offset)));
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
*version = cur_cp_version(*cp_block);
return 0;
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
block_t cp_addr, unsigned long long *version)
{
struct page *cp_page_1, *cp_page_2 = NULL;
unsigned long blk_size = sbi->blocksize;
struct f2fs_checkpoint *cp_block;
struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
struct f2fs_checkpoint *cp_block = NULL;
unsigned long long cur_version = 0, pre_version = 0;
size_t crc_offset;
__u32 crc = 0;
int err;
/* Read the 1st cp block in this CP pack */
cp_page_1 = get_meta_page(sbi, cp_addr);
/* get the version number */
cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
crc_offset = le32_to_cpu(cp_block->checksum_offset);
if (crc_offset >= blk_size)
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
goto invalid_cp1;
pre_version = *version;
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp1;
pre_version = cur_cp_version(cp_block);
/* Read the 2nd cp block in this CP pack */
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
cp_page_2 = get_meta_page(sbi, cp_addr);
cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
crc_offset = le32_to_cpu(cp_block->checksum_offset);
if (crc_offset >= blk_size)
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
goto invalid_cp2;
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp2;
cur_version = cur_cp_version(cp_block);
cur_version = *version;
if (cur_version == pre_version) {
*version = cur_version;
@ -696,6 +766,10 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
goto fail_no_cp;
if (cp_blks <= 1)
goto done;
@ -722,118 +796,94 @@ fail_no_cp:
return -EINVAL;
}
static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
return -EEXIST;
if (is_inode_flag_set(inode, flag))
return;
set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
F2FS_I(inode)->dirty_dir = new;
list_add_tail(&new->list, &sbi->dir_inode_list);
stat_inc_dirty_dir(sbi);
return 0;
set_inode_flag(inode, flag);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
{
int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
return;
list_del_init(&F2FS_I(inode)->dirty_list);
clear_inode_flag(inode, flag);
stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
void update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *new;
int ret = 0;
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
if (!S_ISDIR(inode->i_mode)) {
inode_inc_dirty_pages(inode);
goto out;
}
new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
ret = __add_dirty_inode(inode, new);
spin_lock(&sbi->inode_lock[type]);
if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
__add_dirty_inode(inode, type);
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->dir_inode_lock);
spin_unlock(&sbi->inode_lock[type]);
if (ret)
kmem_cache_free(inode_entry_slab, new);
out:
SetPagePrivate(page);
f2fs_trace_pid(page);
}
void add_dirty_dir_inode(struct inode *inode)
void remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *new =
f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
int ret = 0;
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
if (ret)
kmem_cache_free(inode_entry_slab, new);
}
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
if (get_dirty_pages(inode) ||
!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
spin_unlock(&sbi->dir_inode_lock);
if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
return;
}
entry = F2FS_I(inode)->dirty_dir;
list_del(&entry->list);
F2FS_I(inode)->dirty_dir = NULL;
clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
kmem_cache_free(inode_entry_slab, entry);
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
iput(inode);
}
spin_lock(&sbi->inode_lock[type]);
__remove_dirty_inode(inode, type);
spin_unlock(&sbi->inode_lock[type]);
}
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
struct inode_entry *entry;
struct inode *inode;
struct f2fs_inode_info *fi;
bool is_dir = (type == DIR_INODE);
trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
spin_lock(&sbi->dir_inode_lock);
spin_lock(&sbi->inode_lock[type]);
head = &sbi->dir_inode_list;
head = &sbi->inode_list[type];
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
spin_unlock(&sbi->inode_lock[type]);
trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
entry = list_entry(head->next, struct inode_entry, list);
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
filemap_fdatawrite(inode->i_mapping);
iput(inode);
@ -848,6 +898,34 @@ retry:
goto retry;
}
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
struct list_head *head = &sbi->inode_list[DIRTY_META];
struct inode *inode;
struct f2fs_inode_info *fi;
s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);
while (total--) {
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
spin_lock(&sbi->inode_lock[DIRTY_META]);
if (list_empty(head)) {
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
}
fi = list_entry(head->next, struct f2fs_inode_info,
gdirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
if (inode) {
update_inode_page(inode);
iput(inode);
}
};
return 0;
}
/*
* Freeze all the FS-operations for checkpoint.
*/
@ -868,11 +946,17 @@ retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
sync_dirty_dir_inodes(sbi);
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
goto retry_flush_dents;
}
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
}
goto retry_flush_dents;
}
@ -885,10 +969,9 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
if (unlikely(f2fs_cp_error(sbi))) {
err = sync_node_pages(sbi, &wbc);
if (err) {
f2fs_unlock_all(sbi);
err = -EIO;
goto out;
}
goto retry_flush_nodes;
@ -901,6 +984,8 @@ out:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
build_free_nids(sbi);
f2fs_unlock_all(sbi);
}
@ -911,18 +996,48 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
if (!get_pages(sbi, F2FS_WRITEBACK))
if (!atomic_read(&sbi->nr_wb_bios))
break;
io_schedule();
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
}
static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
spin_lock(&sbi->cp_lock);
if (cpc->reason == CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
if (orphan_num)
__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
/* set this flag to activate crc|cp_ver for recovery */
__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
spin_unlock(&sbi->cp_lock);
}
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
@ -931,21 +1046,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
bool invalidate = false;
/*
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
if (discard_next_dnode(sbi, discard_blk))
invalidate = true;
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
}
next_free_nid(sbi, &last_nid);
@ -980,10 +1089,12 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
spin_lock(&sbi->cp_lock);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
spin_unlock(&sbi->cp_lock);
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@ -998,29 +1109,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
cp_payload_blks + data_sum_blocks +
orphan_blocks);
if (cpc->reason == CP_UMOUNT)
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
if (orphan_num)
set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
set_ckpt_flags(ckpt, CP_FSCK_FLAG);
/* update ckpt flag for checkpoint */
update_ckpt_flags(sbi, cpc);
/* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@ -1030,7 +1126,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
@ -1046,6 +1142,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
write_data_summaries(sbi, start_blk);
start_blk += data_sum_blocks;
/* Record write statistics in the hot node summary */
kbytes_written = sbi->kbytes_written;
if (sb->s_bdev->bd_part)
kbytes_written += BD_PART_WRITTEN(sbi);
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
if (__remain_node_summaries(cpc->reason)) {
write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
@ -1058,14 +1162,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->alloc_valid_block_count = 0;
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
@ -1073,30 +1177,36 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
/*
* invalidate meta page which is used temporarily for zeroing out
* block at the end of warm node chain.
*/
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
release_dirty_inode(sbi);
release_ino_entry(sbi, false);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
/*
* redirty superblock if metadata like node page or inode cache is
* updated during writing checkpoint.
*/
if (get_pages(sbi, F2FS_DIRTY_NODES) ||
get_pages(sbi, F2FS_DIRTY_IMETA))
set_sbi_flag(sbi, SBI_IS_DIRTY);
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
return 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
int err = 0;
mutex_lock(&sbi->cp_mutex);
@ -1104,21 +1214,35 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto out;
if (f2fs_readonly(sbi->sb))
}
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
if (block_operations(sbi))
err = block_operations(sbi);
if (err)
goto out;
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
f2fs_submit_merged_bio(sbi, DATA, WRITE);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
f2fs_submit_merged_bio(sbi, META, WRITE);
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
f2fs_bug_on(sbi, prefree_segments(sbi));
flush_sit_entries(sbi, cpc);
clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
goto out;
}
/*
* update checkpoint pack index
@ -1133,7 +1257,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
do_checkpoint(sbi, cpc);
err = do_checkpoint(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@ -1143,10 +1267,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
"checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
f2fs_update_time(sbi, CP_TIME);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
return err;
}
void init_ino_entry_info(struct f2fs_sb_info *sbi)

View file

@ -1,491 +0,0 @@
/*
* linux/fs/f2fs/crypto.c
*
* Copied from linux/fs/ext4/crypto.c
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* This contains encryption functions for f2fs
*
* Written by Michael Halcrow, 2014.
*
* Filename encryption additions
* Uday Savagaonkar, 2014
* Encryption policy handling additions
* Ildar Muslukhov, 2014
* Remove ext4_encrypted_zeroout(),
* add f2fs_restore_and_release_control_page()
* Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
#include <linux/crypto.h>
#include <linux/ecryptfs.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
#include <linux/list.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <linux/spinlock_types.h>
#include <linux/f2fs_fs.h>
#include <linux/ratelimit.h>
#include <linux/bio.h>
#include "f2fs.h"
#include "xattr.h"
/* Encryption added and removed here! (L: */
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
module_param(num_prealloc_crypto_pages, uint, 0444);
MODULE_PARM_DESC(num_prealloc_crypto_pages,
"Number of crypto pages to preallocate");
module_param(num_prealloc_crypto_ctxs, uint, 0444);
MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
"Number of crypto contexts to preallocate");
static mempool_t *f2fs_bounce_page_pool;
static LIST_HEAD(f2fs_free_crypto_ctxs);
static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock);
static struct workqueue_struct *f2fs_read_workqueue;
static DEFINE_MUTEX(crypto_init);
static struct kmem_cache *f2fs_crypto_ctx_cachep;
struct kmem_cache *f2fs_crypt_info_cachep;
/**
* f2fs_release_crypto_ctx() - Releases an encryption context
* @ctx: The encryption context to release.
*
* If the encryption context was allocated from the pre-allocated pool, returns
* it to that pool. Else, frees it.
*
* If there's a bounce page in the context, this frees that.
*/
void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
{
unsigned long flags;
if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) {
mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool);
ctx->w.bounce_page = NULL;
}
ctx->w.control_page = NULL;
if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
kmem_cache_free(f2fs_crypto_ctx_cachep, ctx);
} else {
spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
}
}
/**
* f2fs_get_crypto_ctx() - Gets an encryption context
* @inode: The inode for which we are doing the crypto
*
* Allocates and initializes an encryption context.
*
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
{
struct f2fs_crypto_ctx *ctx = NULL;
unsigned long flags;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
if (ci == NULL)
return ERR_PTR(-ENOKEY);
/*
* We first try getting the ctx from a free list because in
* the common case the ctx will have an allocated and
* initialized crypto tfm, so it's probably a worthwhile
* optimization. For the bounce page, we first try getting it
* from the kernel allocator because that's just about as fast
* as getting it from a list and because a cache of free pages
* should generally be a "last resort" option for a filesystem
* to be able to do its job.
*/
spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs,
struct f2fs_crypto_ctx, free_list);
if (ctx)
list_del(&ctx->free_list);
spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
if (!ctx) {
ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS);
if (!ctx)
return ERR_PTR(-ENOMEM);
ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
} else {
ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
ctx->flags &= ~F2FS_WRITE_PATH_FL;
return ctx;
}
/*
* Call f2fs_decrypt on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct f2fs_crypto_ctx *ctx =
container_of(work, struct f2fs_crypto_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = f2fs_decrypt(ctx, page);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else
SetPageUptodate(page);
unlock_page(page);
}
f2fs_release_crypto_ctx(ctx);
bio_put(bio);
}
void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(f2fs_read_workqueue, &ctx->r.work);
}
static void f2fs_crypto_destroy(void)
{
struct f2fs_crypto_ctx *pos, *n;
list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list)
kmem_cache_free(f2fs_crypto_ctx_cachep, pos);
INIT_LIST_HEAD(&f2fs_free_crypto_ctxs);
if (f2fs_bounce_page_pool)
mempool_destroy(f2fs_bounce_page_pool);
f2fs_bounce_page_pool = NULL;
}
/**
* f2fs_crypto_initialize() - Set up for f2fs encryption.
*
* We only call this when we start accessing encrypted files, since it
* results in memory getting allocated that wouldn't otherwise be used.
*
* Return: Zero on success, non-zero otherwise.
*/
int f2fs_crypto_initialize(void)
{
int i, res = -ENOMEM;
if (f2fs_bounce_page_pool)
return 0;
mutex_lock(&crypto_init);
if (f2fs_bounce_page_pool)
goto already_initialized;
for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
struct f2fs_crypto_ctx *ctx;
ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL);
if (!ctx)
goto fail;
list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
}
/* must be allocated at the last step to avoid race condition above */
f2fs_bounce_page_pool =
mempool_create_page_pool(num_prealloc_crypto_pages, 0);
if (!f2fs_bounce_page_pool)
goto fail;
already_initialized:
mutex_unlock(&crypto_init);
return 0;
fail:
f2fs_crypto_destroy();
mutex_unlock(&crypto_init);
return res;
}
/**
* f2fs_exit_crypto() - Shutdown the f2fs encryption system
*/
void f2fs_exit_crypto(void)
{
f2fs_crypto_destroy();
if (f2fs_read_workqueue)
destroy_workqueue(f2fs_read_workqueue);
if (f2fs_crypto_ctx_cachep)
kmem_cache_destroy(f2fs_crypto_ctx_cachep);
if (f2fs_crypt_info_cachep)
kmem_cache_destroy(f2fs_crypt_info_cachep);
}
int __init f2fs_init_crypto(void)
{
int res = -ENOMEM;
f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0);
if (!f2fs_read_workqueue)
goto fail;
f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx,
SLAB_RECLAIM_ACCOUNT);
if (!f2fs_crypto_ctx_cachep)
goto fail;
f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info,
SLAB_RECLAIM_ACCOUNT);
if (!f2fs_crypt_info_cachep)
goto fail;
return 0;
fail:
f2fs_exit_crypto();
return res;
}
void f2fs_restore_and_release_control_page(struct page **page)
{
struct f2fs_crypto_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
f2fs_restore_control_page(bounce_page);
}
void f2fs_restore_control_page(struct page *data_page)
{
struct f2fs_crypto_ctx *ctx =
(struct f2fs_crypto_ctx *)page_private(data_page);
set_page_private(data_page, (unsigned long)NULL);
ClearPagePrivate(data_page);
unlock_page(data_page);
f2fs_release_crypto_ctx(ctx);
}
/**
* f2fs_crypt_complete() - The completion callback for page encryption
* @req: The asynchronous encryption request context
* @res: The result of the encryption operation
*/
static void f2fs_crypt_complete(struct crypto_async_request *req, int res)
{
struct f2fs_completion_result *ecr = req->data;
if (res == -EINPROGRESS)
return;
ecr->res = res;
complete(&ecr->completion);
}
typedef enum {
F2FS_DECRYPT = 0,
F2FS_ENCRYPT,
} f2fs_direction_t;
static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
struct inode *inode,
f2fs_direction_t rw,
pgoff_t index,
struct page *src_page,
struct page *dest_page)
{
u8 xts_tweak[F2FS_XTS_TWEAK_SIZE];
struct ablkcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
int res = 0;
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
__func__);
return -ENOMEM;
}
ablkcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_crypt_complete, &ecr);
BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index));
memcpy(xts_tweak, &index, sizeof(index));
memset(&xts_tweak[sizeof(index)], 0,
F2FS_XTS_TWEAK_SIZE - sizeof(index));
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
xts_tweak);
if (rw == F2FS_DECRYPT)
res = crypto_ablkcipher_decrypt(req);
else
res = crypto_ablkcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
ablkcipher_request_free(req);
if (res) {
printk_ratelimited(KERN_ERR
"%s: crypto_ablkcipher_encrypt() returned %d\n",
__func__, res);
return res;
}
return 0;
}
static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx)
{
ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= F2FS_WRITE_PATH_FL;
return ctx->w.bounce_page;
}
/**
* f2fs_encrypt() - Encrypts a page
* @inode: The inode for which the encryption should take place
* @plaintext_page: The page to encrypt. Must be locked.
*
* Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
* encryption context.
*
* Called on the page write path. The caller must call
* f2fs_restore_control_page() on the returned ciphertext page to
* release the bounce buffer and the encryption context.
*
* Return: An allocated page with the encrypted content on success. Else, an
* error value or NULL.
*/
struct page *f2fs_encrypt(struct inode *inode,
struct page *plaintext_page)
{
struct f2fs_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
int err;
BUG_ON(!PageLocked(plaintext_page));
ctx = f2fs_get_crypto_ctx(inode);
if (IS_ERR(ctx))
return (struct page *)ctx;
/* The encryption operation will require a bounce page. */
ciphertext_page = alloc_bounce_page(ctx);
if (IS_ERR(ciphertext_page))
goto err_out;
ctx->w.control_page = plaintext_page;
err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index,
plaintext_page, ciphertext_page);
if (err) {
ciphertext_page = ERR_PTR(err);
goto err_out;
}
SetPagePrivate(ciphertext_page);
set_page_private(ciphertext_page, (unsigned long)ctx);
lock_page(ciphertext_page);
return ciphertext_page;
err_out:
f2fs_release_crypto_ctx(ctx);
return ciphertext_page;
}
/**
* f2fs_decrypt() - Decrypts a page in-place
* @ctx: The encryption context.
* @page: The page to decrypt. Must be locked.
*
* Decrypts page in-place using the ctx encryption context.
*
* Called from the read completion callback.
*
* Return: Zero on success, non-zero otherwise.
*/
int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page)
{
BUG_ON(!PageLocked(page));
return f2fs_page_crypto(ctx, page->mapping->host,
F2FS_DECRYPT, page->index, page, page);
}
/*
* Convenience function which takes care of allocating and
* deallocating the encryption context
*/
int f2fs_decrypt_one(struct inode *inode, struct page *page)
{
struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode);
int ret;
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ret = f2fs_decrypt(ctx, page);
f2fs_release_crypto_ctx(ctx);
return ret;
}
bool f2fs_valid_contents_enc_mode(uint32_t mode)
{
return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS);
}
/**
* f2fs_validate_encryption_key_size() - Validate the encryption key size
* @mode: The key mode.
* @size: The key size to validate.
*
* Return: The validated key size for @mode. Zero if invalid.
*/
uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size)
{
if (size == f2fs_encryption_key_size(mode))
return size;
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -38,23 +38,30 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree;
si->ext_tree = atomic_read(&sbi->total_ext_tree);
si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
si->wb_bios = atomic_read(&sbi->nr_wb_bios);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
si->discard_blks = discard_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
@ -105,7 +112,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
@ -140,6 +147,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
si->base_mem += 2 * sizeof(struct f2fs_inode_info);
si->base_mem += sizeof(*sbi->ckpt);
si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
/* build sm */
si->base_mem += sizeof(struct f2fs_sm_info);
@ -148,7 +156,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct sit_info);
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
if (f2fs_discard_en(sbi))
si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
if (sbi->segs_per_sec > 1)
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
@ -161,7 +171,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build curseg */
si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
si->base_mem += PAGE_SIZE * NR_CURSEG_TYPE;
/* build dirty segmap */
si->base_mem += sizeof(struct dirty_seglist_info);
@ -189,18 +199,18 @@ get_cache:
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
for (i = 0; i <= ORPHAN_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node);
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
static int stat_show(struct seq_file *s, void *v)
@ -211,20 +221,24 @@ static int stat_show(struct seq_file *s, void *v)
mutex_lock(&f2fs_stat_mutex);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
char devname[BDEVNAME_SIZE];
update_general_status(si->sbi);
seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
bdevname(si->sbi->sb->s_bdev, devname), i++);
seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n",
si->sbi->sb->s_bdev, i++,
f2fs_readonly(si->sbi->sb) ? "RO": "RW");
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
si->ssa_area_segs, si->main_area_segs);
seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
si->overp_segs, si->rsvd_segs);
seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
si->utilization, si->valid_count);
if (test_opt(si->sbi, DISCARD))
seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n",
si->utilization, si->valid_count, si->discard_blks);
else
seq_printf(s, "Utilization: %u%% (%u valid blocks)\n",
si->utilization, si->valid_count);
seq_printf(s, " - Node: %u (Inode: %u, ",
si->valid_node_count, si->valid_inode_count);
seq_printf(s, "Other: %u)\n - Data: %u\n",
@ -236,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
seq_printf(s, " - Orphan Inode: %u\n",
si->orphans);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@ -269,7 +285,8 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "CP calls: %d (BG: %d)\n",
si->cp_count, si->bg_cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
@ -290,17 +307,21 @@ static int stat_show(struct seq_file *s, void *v)
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
seq_printf(s, " - nodes: %4d in %4d\n",
seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n",
si->inmem_pages, si->wb_bios);
seq_printf(s, " - nodes: %4lld in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - meta: %4d in %4d\n",
seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n",
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
seq_printf(s, " - datas: %4lld in files:%4d\n",
si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4lld in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4lld\n",
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d\n",
@ -407,20 +428,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
kfree(si);
}
void __init f2fs_create_root_stats(void)
int __init f2fs_create_root_stats(void)
{
struct dentry *file;
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
return;
return -ENOMEM;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
if (!file) {
debugfs_remove(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
return -ENOMEM;
}
return 0;
}
void f2fs_destroy_root_stats(void)

View file

@ -17,8 +17,8 @@
static unsigned long dir_blocks(struct inode *inode)
{
return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
>> PAGE_CACHE_SHIFT;
return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1))
>> PAGE_SHIFT;
}
static unsigned int dir_buckets(unsigned int level, int dir_level)
@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_UNKNOWN] = DT_UNKNOWN,
[F2FS_FT_REG_FILE] = DT_REG,
[F2FS_FT_DIR] = DT_DIR,
@ -48,7 +48,6 @@ unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_SYMLINK] = DT_LNK,
};
#define S_SHIFT 12
static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
@ -64,6 +63,13 @@ void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
unsigned char get_de_type(struct f2fs_dir_entry *de)
{
if (de->file_type < F2FS_FT_MAX)
return f2fs_filetype_table[de->file_type];
return DT_UNKNOWN;
}
static unsigned long dir_block_index(unsigned int level,
int dir_level, unsigned int idx)
{
@ -77,7 +83,7 @@ static unsigned long dir_block_index(unsigned int level,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
struct f2fs_filename *fname,
struct fscrypt_name *fname,
f2fs_hash_t namehash,
int *max_slots,
struct page **res_page)
@ -95,23 +101,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
else
kunmap(dentry_page);
/*
* For the most part, it should be a bug when name_len is zero.
* We stop here for figuring out where the bugs has occurred.
*/
f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
return de;
}
struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
struct f2fs_str de_name = FSTR_INIT(NULL, 0);
struct f2fs_str *name = &fname->disk_name;
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct fscrypt_str *name = &fname->disk_name;
if (max_slots)
*max_slots = 0;
@ -124,37 +125,28 @@ struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
de = &d->dentry[bit_pos];
if (de->hash_code != namehash)
goto not_match;
if (unlikely(!de->name_len)) {
bit_pos++;
continue;
}
/* encrypted case */
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
if (unlikely(!name->name)) {
if (fname->usr_fname->name[0] == '_') {
if (de_name.len > 32 &&
!memcmp(de_name.name + ((de_name.len - 17) & ~15),
fname->crypto_buf.name + 8, 16))
goto found;
goto not_match;
}
name->name = fname->crypto_buf.name;
name->len = fname->crypto_buf.len;
}
#endif
if (de_name.len == name->len &&
!memcmp(de_name.name, name->name, name->len))
/* show encrypted name */
if (fname->hash) {
if (de->hash_code == fname->hash)
goto found;
} else if (de_name.len == name->len &&
de->hash_code == namehash &&
!memcmp(de_name.name, name->name, name->len))
goto found;
not_match:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
max_len = 0;
/* remain bug on condition */
if (unlikely(!de->name_len))
d->max = -1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
@ -167,7 +159,7 @@ found:
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
struct f2fs_filename *fname,
struct fscrypt_name *fname,
struct page **res_page)
{
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
@ -180,9 +172,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
int max_slots;
f2fs_hash_t namehash;
namehash = f2fs_dentry_hash(&name, fname);
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
if(fname->hash)
namehash = cpu_to_le32(fname->hash);
else
namehash = f2fs_dentry_hash(&name);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@ -195,8 +188,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
room = true;
continue;
if (PTR_ERR(dentry_page) == -ENOENT) {
room = true;
continue;
} else {
*res_page = dentry_page;
break;
}
}
de = find_in_block(dentry_page, fname, namehash, &max_slots,
@ -217,6 +215,44 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
return de;
}
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
if (f2fs_has_inline_dentry(dir)) {
*res_page = NULL;
de = find_in_inline_dir(dir, fname, res_page);
goto out;
}
if (npages == 0) {
*res_page = NULL;
goto out;
}
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
"Corrupted max_depth of %lu: %u",
dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
f2fs_i_depth_write(dir, max_depth);
}
for (level = 0; level < max_depth; level++) {
*res_page = NULL;
de = find_in_level(dir, level, fname, res_page);
if (de || IS_ERR(*res_page))
break;
}
out:
return de;
}
/*
* Find an entry in the specified directory with the wanted name.
* It returns the page where the entry was found (as a parameter - res_page),
@ -224,72 +260,42 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
* Entry is guaranteed to be valid.
*/
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
const struct qstr *child, struct page **res_page)
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
struct f2fs_filename fname;
struct fscrypt_name fname;
int err;
*res_page = NULL;
err = f2fs_fname_setup_filename(dir, child, 1, &fname);
if (err)
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
*res_page = ERR_PTR(err);
return NULL;
if (f2fs_has_inline_dentry(dir)) {
de = find_in_inline_dir(dir, &fname, res_page);
goto out;
}
if (npages == 0)
goto out;
de = __f2fs_find_entry(dir, &fname, res_page);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, &fname, res_page);
if (de)
break;
}
out:
f2fs_fname_free_filename(&fname);
fscrypt_free_filename(&fname);
return de;
}
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
{
struct page *page;
struct f2fs_dir_entry *de;
struct f2fs_dentry_block *dentry_blk;
struct qstr dotdot = QSTR_INIT("..", 2);
if (f2fs_has_inline_dentry(dir))
return f2fs_parent_inline_dir(dir, p);
page = get_lock_data_page(dir, 0, false);
if (IS_ERR(page))
return NULL;
dentry_blk = kmap(page);
de = &dentry_blk->dentry[1];
*p = page;
unlock_page(page);
return de;
return f2fs_find_entry(dir, &dotdot, p);
}
ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
struct page **page)
{
ino_t res = 0;
struct f2fs_dir_entry *de;
struct page *page;
de = f2fs_find_entry(dir, qstr, &page);
de = f2fs_find_entry(dir, qstr, page);
if (de) {
res = le32_to_cpu(de->ino);
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
f2fs_dentry_kunmap(dir, *page);
f2fs_put_page(*page, 0);
}
return res;
@ -300,14 +306,14 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
f2fs_wait_on_page_writeback(page, type);
f2fs_wait_on_page_writeback(page, type, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
f2fs_dentry_kunmap(dir, page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
f2fs_mark_inode_dirty_sync(dir);
f2fs_put_page(page, 1);
}
@ -315,7 +321,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_inode *ri;
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@ -345,24 +351,14 @@ int update_dent_inode(struct inode *inode, struct inode *to,
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
struct qstr dot = QSTR_INIT(".", 1);
struct qstr dotdot = QSTR_INIT("..", 2);
de = &d->dentry[0];
de->name_len = cpu_to_le16(1);
de->hash_code = 0;
de->ino = cpu_to_le32(inode->i_ino);
memcpy(d->filename[0], ".", 1);
set_de_type(de, inode->i_mode);
/* update dirent of "." */
f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0);
de = &d->dentry[1];
de->hash_code = 0;
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
memcpy(d->filename[1], "..", 2);
set_de_type(de, parent->i_mode);
test_and_set_bit_le(0, (void *)d->bitmap);
test_and_set_bit_le(1, (void *)d->bitmap);
/* update dirent of ".." */
f2fs_update_dentry(parent->i_ino, parent->i_mode, d, &dotdot, 0, 1);
}
static int make_empty_dir(struct inode *inode,
@ -392,32 +388,38 @@ static int make_empty_dir(struct inode *inode,
}
struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
const struct qstr *name, struct page *dpage)
const struct qstr *new_name, const struct qstr *orig_name,
struct page *dpage)
{
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
if (is_inode_flag_set(inode, FI_NEW_INODE)) {
page = new_inode_page(inode);
if (IS_ERR(page))
return page;
if (S_ISDIR(inode->i_mode)) {
/* in order to handle error case */
get_page(page);
err = make_empty_dir(inode, dir, page);
if (err)
goto error;
if (err) {
lock_page(page);
goto put_error;
}
put_page(page);
}
err = f2fs_init_acl(inode, dir, page, dpage);
if (err)
goto put_error;
err = f2fs_init_security(inode, dir, name, page);
err = f2fs_init_security(inode, dir, orig_name, page);
if (err)
goto put_error;
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) {
err = f2fs_inherit_context(dir, inode, page);
err = fscrypt_inherit_context(dir, inode, page, false);
if (err)
goto put_error;
}
@ -429,14 +431,14 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
if (name)
init_dent_inode(name, page);
if (new_name)
init_dent_inode(new_name, page);
/*
* This file should be checkpointed during fsync.
* We lost i_pino from now on.
*/
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
if (is_inode_flag_set(inode, FI_INC_LINK)) {
file_lost_pino(inode);
/*
* If link the tmpfile to alias through linkat path,
@ -444,41 +446,33 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
*/
if (inode->i_nlink == 0)
remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
inc_nlink(inode);
f2fs_i_links_write(inode, true);
}
return page;
put_error:
clear_nlink(inode);
update_inode(inode, page);
f2fs_put_page(page, 1);
error:
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0, false);
remove_dirty_dir_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
}
void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
if (S_ISDIR(inode->i_mode)) {
inc_nlink(dir);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) {
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, true);
clear_inode_flag(inode, FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
f2fs_mark_inode_dirty_sync(dir);
if (F2FS_I(dir)->i_current_depth != current_depth) {
F2FS_I(dir)->i_current_depth = current_depth;
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
if (F2FS_I(dir)->i_current_depth != current_depth)
f2fs_i_depth_write(dir, current_depth);
if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
if (inode && is_inode_flag_set(inode, FI_INC_LINK))
clear_inode_flag(inode, FI_INC_LINK);
}
int room_for_filename(const void *bitmap, int slots, int max_slots)
@ -515,15 +509,16 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
memcpy(d->filename[bit_pos], name->name, name->len);
de->ino = cpu_to_le32(ino);
set_de_type(de, mode);
for (i = 0; i < slots; i++)
test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
for (i = 0; i < slots; i++) {
__set_bit_le(bit_pos + i, (void *)d->bitmap);
/* avoid wrong garbage data for readdir */
if (i)
(de + i)->name_len = 0;
}
}
/*
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
@ -536,28 +531,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
struct page *page = NULL;
struct f2fs_filename fname;
struct qstr new_name;
int slots, err;
err = f2fs_fname_setup_filename(dir, name, 0, &fname);
if (err)
return err;
new_name.name = fname_name(&fname);
new_name.len = fname_len(&fname);
if (f2fs_has_inline_dentry(dir)) {
err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
if (!err || err != -EAGAIN)
goto out;
else
err = 0;
}
int slots, err = 0;
level = 0;
slots = GET_DENTRY_SLOTS(new_name.len);
dentry_hash = f2fs_dentry_hash(&new_name, NULL);
slots = GET_DENTRY_SLOTS(new_name->len);
dentry_hash = f2fs_dentry_hash(new_name);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@ -566,10 +544,12 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
}
start:
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) {
err = -ENOSPC;
goto out;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
return -ENOSPC;
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
/* Increase the depth, if required */
if (level == current_depth)
@ -583,10 +563,8 @@ start:
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
goto out;
}
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = kmap(dentry_page);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
@ -602,11 +580,12 @@ start:
++level;
goto start;
add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA);
f2fs_wait_on_page_writeback(dentry_page, DATA, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, &new_name, NULL);
page = init_inode_metadata(inode, dir, new_name,
orig_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@ -616,14 +595,12 @@ add_dentry:
}
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos);
f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
if (inode) {
/* we don't need to mark_inode_dirty now */
F2FS_I(inode)->i_pino = dir->i_ino;
update_inode(inode, page);
f2fs_i_pino_write(inode, dir->i_ino);
f2fs_put_page(page, 1);
}
@ -632,14 +609,49 @@ fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode_page(dir);
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
out:
f2fs_fname_free_filename(&fname);
return err;
}
int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
struct inode *inode, nid_t ino, umode_t mode)
{
struct qstr new_name;
int err = -EAGAIN;
new_name.name = fname_name(fname);
new_name.len = fname_len(fname);
if (f2fs_has_inline_dentry(dir))
err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname,
inode, ino, mode);
if (err == -EAGAIN)
err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname,
inode, ino, mode);
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
/*
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
fscrypt_free_filename(&fname);
return err;
}
@ -649,46 +661,39 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, NULL, NULL);
page = init_inode_metadata(inode, dir, NULL, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
/* we don't need to mark_inode_dirty now */
update_inode(inode, page);
f2fs_put_page(page, 1);
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
clear_inode_flag(inode, FI_NEW_INODE);
fail:
up_write(&F2FS_I(inode)->i_sem);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
down_write(&F2FS_I(inode)->i_sem);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
if (page)
update_inode(dir, page);
else
update_inode_page(dir);
}
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, false);
inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
f2fs_i_links_write(inode, false);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(inode);
i_size_write(inode, 0);
f2fs_i_links_write(inode, false);
f2fs_i_size_write(inode, 0);
}
up_write(&F2FS_I(inode)->i_sem);
update_inode_page(inode);
if (inode->i_nlink == 0)
add_orphan_inode(sbi, inode->i_ino);
add_orphan_inode(inode);
else
release_orphan_inode(sbi);
}
@ -705,11 +710,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@ -724,9 +731,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
f2fs_mark_inode_dirty_sync(dir);
if (inode)
f2fs_drop_nlink(dir, inode, NULL);
f2fs_drop_nlink(dir, inode);
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
@ -777,12 +785,12 @@ bool f2fs_empty_dir(struct inode *dir)
}
bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct f2fs_str *fstr)
unsigned int start_pos, struct fscrypt_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
unsigned int bit_pos;
struct f2fs_dir_entry *de = NULL;
struct f2fs_str de_name = FSTR_INIT(NULL, 0);
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
bit_pos = ((unsigned long)ctx->pos % d->max);
@ -792,10 +800,13 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
break;
de = &d->dentry[bit_pos];
if (de->file_type < F2FS_FT_MAX)
d_type = f2fs_filetype_table[de->file_type];
else
d_type = DT_UNKNOWN;
if (de->name_len == 0) {
bit_pos++;
ctx->pos = start_pos + bit_pos;
continue;
}
d_type = get_de_type(de);
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
@ -804,15 +815,9 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int save_len = fstr->len;
int ret;
de_name.name = kmalloc(de_name.len, GFP_NOFS);
if (!de_name.name)
return false;
memcpy(de_name.name, d->filename[bit_pos], de_name.len);
ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
&de_name, fstr);
kfree(de_name.name);
ret = fscrypt_fname_disk_to_usr(d->inode,
(u32)de->hash_code, 0,
&de_name, fstr);
if (ret < 0)
return true;
@ -839,16 +844,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
struct f2fs_str fstr = FSTR_INIT(NULL, 0);
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
int err = 0;
if (f2fs_encrypted_inode(inode)) {
err = f2fs_get_encryption_info(inode);
if (err)
err = fscrypt_get_encryption_info(inode);
if (err && err != -ENOKEY)
return err;
err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN,
&fstr);
err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
if (err < 0)
return err;
}
@ -865,36 +869,47 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page))
continue;
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT)
continue;
else
goto out;
}
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
goto stop;
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
break;
}
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
dentry_page = NULL;
}
stop:
if (dentry_page && !IS_ERR(dentry_page)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
err = 0;
out:
f2fs_fname_crypto_free_buffer(&fstr);
fscrypt_fname_free_buffer(&fstr);
return err;
}
static int f2fs_dir_open(struct inode *inode, struct file *filp)
{
if (f2fs_encrypted_inode(inode))
return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
return 0;
}
const struct file_operations f2fs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate = f2fs_readdir,
.fsync = f2fs_sync_file,
.open = f2fs_dir_open,
.unlocked_ioctl = f2fs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = f2fs_compat_ioctl,

View file

@ -33,10 +33,11 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
en->et = et;
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&et->node_cnt);
atomic_inc(&sbi->total_ext_node);
return en;
}
@ -45,11 +46,29 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&et->node_cnt);
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
kmem_cache_free(extent_node_slab, en);
}
/*
* Flow to release an extent_node:
* 1. list_del_init
* 2. __detach_extent_node
* 3. kmem_cache_free.
*/
static void __release_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
spin_lock(&sbi->extent_lock);
f2fs_bug_on(sbi, list_empty(&en->list));
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
__detach_extent_node(sbi, et, en);
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
@ -68,11 +87,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
INIT_LIST_HEAD(&et->list);
atomic_set(&et->node_cnt, 0);
atomic_inc(&sbi->total_ext_tree);
} else {
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
@ -127,32 +148,21 @@ static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, bool free_all)
struct extent_tree *et)
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
unsigned int count = atomic_read(&et->node_cnt);
node = rb_first(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
if (free_all) {
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
}
if (free_all || list_empty(&en->list)) {
__detach_extent_node(sbi, et, en);
kmem_cache_free(extent_node_slab, en);
}
__release_extent_node(sbi, et, en);
node = next;
}
return count - et->count;
return count - atomic_read(&et->node_cnt);
}
static void __drop_largest_extent(struct inode *inode,
@ -160,38 +170,38 @@ static void __drop_largest_extent(struct inode *inode,
{
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
largest->len = 0;
f2fs_mark_inode_dirty_sync(inode);
}
}
void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs, 1);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
/* return true, if inode page is changed */
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
if (!f2fs_may_extent_tree(inode))
return;
if (!f2fs_may_extent_tree(inode)) {
/* drop largest extent */
if (i_ext && i_ext->len) {
i_ext->len = 0;
return true;
}
return false;
}
et = __grab_extent_tree(inode);
if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
if (!i_ext || !i_ext->len)
return false;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
get_extent_info(&ei, i_ext);
write_lock(&et->lock);
if (et->count)
if (atomic_read(&et->node_cnt))
goto out;
en = __init_extent_tree(sbi, et, &ei);
@ -202,6 +212,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
}
out:
write_unlock(&et->lock);
return false;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
@ -230,9 +241,10 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
}
@ -325,12 +337,12 @@ lookup_neighbors:
return en;
}
static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
static struct extent_node *__try_merge_extent_node(struct inode *inode,
struct extent_tree *et, struct extent_info *ei,
struct extent_node **den,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_node *en = NULL;
if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
@ -340,28 +352,34 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
if (en) {
__detach_extent_node(sbi, et, prev_ex);
*den = prev_ex;
}
if (en)
__release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
en = next_ex;
}
if (en) {
__try_update_largest_extent(et, en);
if (!en)
return NULL;
__try_update_largest_extent(inode, et, en);
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
return en;
}
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
static struct extent_node *__insert_extent_tree(struct inode *inode,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct rb_node **p = &et->root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
@ -388,8 +406,13 @@ do_insert:
if (!en)
return NULL;
__try_update_largest_extent(et, en);
__try_update_largest_extent(inode, et, en);
/* update in global extent list */
spin_lock(&sbi->extent_lock);
list_add_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
spin_unlock(&sbi->extent_lock);
return en;
}
@ -412,7 +435,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
}
@ -454,7 +477,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
org_end - end);
en1 = __insert_extent_tree(sbi, et, &ei,
en1 = __insert_extent_tree(inode, et, &ei,
NULL, NULL);
next_en = en1;
} else {
@ -475,9 +498,9 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
}
if (parts)
__try_update_largest_extent(et, en);
__try_update_largest_extent(inode, et, en);
else
__detach_extent_node(sbi, et, en);
__release_extent_node(sbi, et, en);
/*
* if original extent is split into zero or two parts, extent
@ -488,58 +511,28 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
insert_p = NULL;
insert_parent = NULL;
}
/* update in global extent list */
spin_lock(&sbi->extent_lock);
if (!parts && !list_empty(&en->list))
list_del(&en->list);
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
/* release extent node */
if (!parts)
kmem_cache_free(extent_node_slab, en);
en = next_en;
}
/* 3. update extent in extent cache */
if (blkaddr) {
struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
en1 = __try_merge_extent_node(sbi, et, &ei, &den,
prev_en, next_en);
if (!en1)
en1 = __insert_extent_tree(sbi, et, &ei,
if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
__insert_extent_tree(inode, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
prev.len < F2FS_MIN_EXTENT_LEN &&
et->largest.len < F2FS_MIN_EXTENT_LEN) {
et->largest.len = 0;
set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
__drop_largest_extent(inode, 0, UINT_MAX);
set_inode_flag(inode, FI_NO_EXTENT);
}
spin_lock(&sbi->extent_lock);
if (en1) {
if (list_empty(&en1->list))
list_add_tail(&en1->list, &sbi->extent_list);
else
list_move_tail(&en1->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
spin_unlock(&sbi->extent_lock);
if (den)
kmem_cache_free(extent_node_slab, den);
}
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
__free_extent_tree(sbi, et, true);
if (is_inode_flag_set(inode, FI_NO_EXTENT))
__free_extent_tree(sbi, et);
write_unlock(&et->lock);
@ -548,46 +541,42 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
struct extent_tree *et, *next;
struct extent_node *en;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
if (!atomic_read(&et->refcount)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
if (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et);
write_unlock(&et->lock);
}
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
list_del_init(&et->list);
radix_tree_delete(&sbi->extent_tree_root, et->ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
atomic_dec(&sbi->total_zombie_tree);
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
cond_resched();
}
up_write(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
@ -595,34 +584,29 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
remained = nr_shrink - (node_cnt + tree_cnt);
spin_lock(&sbi->extent_lock);
list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
if (!remained--)
for (; remained > 0; remained--) {
if (list_empty(&sbi->extent_list))
break;
en = list_first_entry(&sbi->extent_list,
struct extent_node, list);
et = en->et;
if (!write_trylock(&et->lock)) {
/* refresh this extent node's position in extent list */
list_move_tail(&en->list, &sbi->extent_list);
continue;
}
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
__detach_extent_node(sbi, et, en);
write_unlock(&et->lock);
node_cnt++;
spin_lock(&sbi->extent_lock);
}
spin_unlock(&sbi->extent_lock);
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
}
unlock_out:
up_write(&sbi->extent_tree_lock);
out:
@ -637,16 +621,29 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
if (!et || !atomic_read(&et->node_cnt))
return 0;
write_lock(&et->lock);
node_cnt = __free_extent_tree(sbi, et, true);
node_cnt = __free_extent_tree(sbi, et);
write_unlock(&et->lock);
return node_cnt;
}
void f2fs_drop_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
set_inode_flag(inode, FI_NO_EXTENT);
write_lock(&et->lock);
__free_extent_tree(sbi, et);
__drop_largest_extent(inode, 0, UINT_MAX);
write_unlock(&et->lock);
}
void f2fs_destroy_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@ -656,8 +653,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (!et)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(&et->refcount);
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
down_write(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
up_write(&sbi->extent_tree_lock);
return;
}
@ -666,11 +667,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
atomic_dec(&et->refcount);
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
@ -689,20 +689,20 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
block_t blkaddr;
if (!f2fs_may_extent_tree(dn->inode))
return;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
if (dn->data_blkaddr == NEW_ADDR)
blkaddr = NULL_ADDR;
else
blkaddr = dn->data_blkaddr;
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
sync_inode_page(dn);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
dn->ofs_in_node;
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
}
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
@ -712,8 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
if (!f2fs_may_extent_tree(dn->inode))
return;
if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
sync_inode_page(dn);
f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
}
void init_extent_cache_info(struct f2fs_sb_info *sbi)
@ -722,7 +721,9 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_tree, 0);
INIT_LIST_HEAD(&sbi->zombie_list);
atomic_set(&sbi->total_zombie_tree, 0);
atomic_set(&sbi->total_ext_node, 0);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -16,7 +16,6 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/blkdev.h>
#include "f2fs.h"
#include "node.h"
@ -48,6 +47,11 @@ static int gc_thread_func(void *data)
continue;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT))
f2fs_stop_checkpoint(sbi, false);
#endif
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
@ -97,7 +101,7 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
dev_t dev = sbi->sb->s_bdev->bd_dev;
int err = 0;
gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
if (!gc_th) {
err = -ENOMEM;
goto out;
@ -173,9 +177,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
{
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return 1 << sbi->log_blocks_per_seg;
return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
return sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
@ -246,6 +250,18 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
return get_cb_cost(sbi, segno);
}
static unsigned int count_bits(const unsigned long *addr,
unsigned int offset, unsigned int len)
{
unsigned int end = offset + len, sum = 0;
while (offset < end) {
if (test_bit(offset++, addr))
++sum;
}
return sum;
}
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@ -259,9 +275,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p;
unsigned int secno, max_cost;
unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
int nsearched = 0;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
@ -269,11 +285,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
p.min_cost = max_cost = get_max_cost(sbi, &p);
p.min_cost = get_max_cost(sbi, &p);
if (p.max_search == 0)
goto out;
last_victim = sbi->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
@ -296,27 +313,35 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
p.offset = segno + p.ofs_unit;
if (p.ofs_unit > 1)
if (p.ofs_unit > 1) {
p.offset -= segno % p.ofs_unit;
nsearched += count_bits(p.dirty_segmap,
p.offset - p.ofs_unit,
p.ofs_unit);
} else {
nsearched++;
}
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
continue;
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
continue;
goto next;
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
} else if (unlikely(cost == max_cost)) {
continue;
}
if (nsearched++ >= p.max_search) {
sbi->last_victim[p.gc_mode] = segno;
next:
if (nsearched >= p.max_search) {
if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
sbi->last_victim[p.gc_mode] = last_victim + 1;
else
sbi->last_victim[p.gc_mode] = segno + 1;
break;
}
}
@ -400,13 +425,13 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
* On validity, copy that node with cold status, otherwise (invalid node)
* ignore that.
*/
static int gc_node_segment(struct f2fs_sb_info *sbi,
static void gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
bool initial = true;
struct f2fs_summary *entry;
block_t start_addr;
int off;
int phase = 0;
start_addr = START_BLOCK(sbi, segno);
@ -419,16 +444,24 @@ next_step:
struct node_info ni;
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return 0;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
if (initial) {
if (phase == 0) {
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
ra_node_page(sbi, nid);
continue;
}
/* phase == 2 */
node_page = get_node_page(sbi, nid);
if (IS_ERR(node_page))
continue;
@ -445,36 +478,12 @@ next_step:
continue;
}
/* set page dirty and write it */
if (gc_type == FG_GC) {
f2fs_wait_on_page_writeback(node_page, NODE);
set_page_dirty(node_page);
} else {
if (!PageWriteback(node_page))
set_page_dirty(node_page);
}
f2fs_put_page(node_page, 1);
move_node_page(node_page, gc_type);
stat_inc_node_blk_count(sbi, 1, gc_type);
}
if (initial) {
initial = false;
if (++phase < 3)
goto next_step;
}
if (gc_type == FG_GC) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
sync_node_pages(sbi, 0, &wbc);
/* return 1 only if FG_GC succefully reclaimed one */
if (get_valid_blocks(sbi, segno, 1) == 0)
return 1;
}
return 0;
}
/*
@ -484,7 +493,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@ -501,7 +510,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@ -547,6 +556,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
struct f2fs_summary sum;
struct node_info ni;
struct page *page;
block_t newaddr;
int err;
/* do not read out */
@ -568,21 +578,24 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
f2fs_wait_on_page_writeback(page, DATA);
f2fs_wait_on_page_writeback(page, DATA, true);
get_node_info(fio.sbi, dn.nid, &ni);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
fio.page = page;
fio.blk_addr = dn.data_blkaddr;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
fio.blk_addr,
FGP_LOCK|FGP_CREAT,
GFP_NOFS);
if (!fio.encrypted_page)
goto put_out;
allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA);
fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
goto recover_block;
}
err = f2fs_submit_page_bio(&fio);
if (err)
@ -591,33 +604,39 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
/* write page */
lock_page(fio.encrypted_page);
if (unlikely(!PageUptodate(fio.encrypted_page)))
if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
err = -EIO;
goto put_page_out;
if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi)))
}
if (unlikely(!PageUptodate(fio.encrypted_page))) {
err = -EIO;
goto put_page_out;
}
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
set_page_writeback(fio.encrypted_page);
/* allocate block address */
f2fs_wait_on_page_writeback(dn.node_page, NODE);
allocate_data_block(fio.sbi, NULL, fio.blk_addr,
&fio.blk_addr, &sum, CURSEG_COLD_DATA);
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
fio.rw = WRITE_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
dn.data_blkaddr = fio.blk_addr;
set_data_blkaddr(&dn);
f2fs_update_extent_cache(&dn);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
put_page_out:
f2fs_put_page(fio.encrypted_page, 1);
recover_block:
if (err)
__f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
true, true);
put_out:
f2fs_put_dnode(&dn);
out:
@ -645,12 +664,23 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
.page = page,
.encrypted_page = NULL,
};
bool is_dirty = PageDirty(page);
int err;
retry:
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
inode_dec_dirty_pages(inode);
set_cold_data(page);
do_write_data_page(&fio);
err = do_write_data_page(&fio);
if (err == -ENOMEM && is_dirty) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
clear_cold_data(page);
}
out:
@ -664,7 +694,7 @@ out:
* If the parent node is not valid or the data block address is different,
* the victim data block is ignored.
*/
static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
@ -684,16 +714,23 @@ next_step:
struct node_info dni; /* dnode info for the data */
unsigned int ofs_in_node, nofs;
block_t start_bidx;
nid_t nid = le32_to_cpu(entry->nid);
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return 0;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
if (phase == 0) {
ra_node_page(sbi, le32_to_cpu(entry->nid));
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
META_NAT, true);
continue;
}
if (phase == 1) {
ra_node_page(sbi, nid);
continue;
}
@ -701,14 +738,14 @@ next_step:
if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
continue;
if (phase == 1) {
if (phase == 2) {
ra_node_page(sbi, dni.ino);
continue;
}
ofs_in_node = le16_to_cpu(entry->ofs_in_node);
if (phase == 2) {
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
@ -720,7 +757,7 @@ next_step:
continue;
}
start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
start_bidx = start_bidx_of_node(nofs, inode);
data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, READA, true);
if (IS_ERR(data_page)) {
@ -733,30 +770,41 @@ next_step:
continue;
}
/* phase 3 */
/* phase 4 */
inode = find_gc_inode(gc_list, dni.ino);
if (inode) {
start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
struct f2fs_inode_info *fi = F2FS_I(inode);
bool locked = false;
if (S_ISREG(inode->i_mode)) {
if (!down_write_trylock(&fi->dio_rwsem[READ]))
continue;
if (!down_write_trylock(
&fi->dio_rwsem[WRITE])) {
up_write(&fi->dio_rwsem[READ]);
continue;
}
locked = true;
}
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx);
else
move_data_page(inode, start_bidx, gc_type);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
up_write(&fi->dio_rwsem[READ]);
}
stat_inc_data_blk_count(sbi, 1, gc_type);
}
}
if (++phase < 4)
if (++phase < 5)
goto next_step;
if (gc_type == FG_GC) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
/* return 1 only if FG_GC succefully reclaimed one */
if (get_valid_blocks(sbi, segno, 1) == 0)
return 1;
}
return 0;
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@ -772,51 +820,84 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
return ret;
}
static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
static int do_garbage_collect(struct f2fs_sb_info *sbi,
unsigned int start_segno,
struct gc_inode_list *gc_list, int gc_type)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
struct blk_plug plug;
int nfree = 0;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
int sec_freed = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
/* read segment summary of victim */
sum_page = get_sum_page(sbi, segno);
/* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1)
ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
sbi->segs_per_sec, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
sum_page = get_sum_page(sbi, segno++);
unlock_page(sum_page);
}
blk_start_plug(&plug);
sum = page_address(sum_page);
for (segno = start_segno; segno < end_segno; segno++) {
/*
* this is to avoid deadlock:
* - lock_page(sum_page) - f2fs_replace_block
* - check_valid_map() - mutex_lock(sentry_lock)
* - mutex_lock(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
unlock_page(sum_page);
if (get_valid_blocks(sbi, segno, 1) == 0 ||
unlikely(f2fs_cp_error(sbi)))
goto next;
switch (GET_SUM_TYPE((&sum->footer))) {
case SUM_TYPE_NODE:
nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
break;
case SUM_TYPE_DATA:
nfree = gc_data_segment(sbi, sum->entries, gc_list,
segno, gc_type);
break;
/* find segment summary of victim */
sum_page = find_get_page(META_MAPPING(sbi),
GET_SUM_BLOCK(sbi, segno));
f2fs_bug_on(sbi, !PageUptodate(sum_page));
f2fs_put_page(sum_page, 0);
sum = page_address(sum_page);
f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
/*
* this is to avoid deadlock:
* - lock_page(sum_page) - f2fs_replace_block
* - check_valid_map() - mutex_lock(sentry_lock)
* - mutex_lock(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
gc_node_segment(sbi, sum->entries, segno, gc_type);
else
gc_data_segment(sbi, sum->entries, gc_list, segno,
gc_type);
stat_inc_seg_count(sbi, type, gc_type);
next:
f2fs_put_page(sum_page, 0);
}
if (gc_type == FG_GC)
f2fs_submit_merged_bio(sbi,
(type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
blk_finish_plug(&plug);
stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0)
sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
f2fs_put_page(sum_page, 0);
return nfree;
return sec_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
{
unsigned int segno, i;
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret = -EINVAL;
@ -832,46 +913,48 @@ gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
gc_type = FG_GC;
if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc);
/*
* If there is no victim and no prefree segment but still not
* enough free sections, we should flush dent/node blocks and do
* garbage collections.
*/
if (__get_victim(sbi, &segno, gc_type) ||
prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
segno = NULL_SEGNO;
} else if (has_not_enough_free_secs(sbi, 0, 0)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
/* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1)
ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
META_SSA, true);
for (i = 0; i < sbi->segs_per_sec; i++) {
/*
* for FG_GC case, halt gcing left segments once failed one
* of segments in selected section to avoid long latency.
*/
if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
gc_type == FG_GC)
break;
}
if (i == sbi->segs_per_sec && gc_type == FG_GC)
if (do_garbage_collect(sbi, segno, &gc_list, gc_type) &&
gc_type == FG_GC)
sec_freed++;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed))
if (has_not_enough_free_secs(sbi, sec_freed, 0))
goto gc_more;
if (gc_type == FG_GC)
write_checkpoint(sbi, &cpc);
ret = write_checkpoint(sbi, &cpc);
}
stop:
mutex_unlock(&sbi->gc_mutex);

View file

@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
return true;
return false;
}
static inline int is_idle(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
struct request_list *rl = &q->root_rl;
return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
}

View file

@ -70,8 +70,7 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
struct f2fs_filename *fname)
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
@ -80,10 +79,6 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
const unsigned char *name = name_info->name;
size_t len = name_info->len;
/* encrypted bigname case */
if (fname && !fname->disk_name.name)
return cpu_to_le32(fname->hash);
if (is_dot_dotdot(name_info))
return 0;

View file

@ -17,9 +17,6 @@
bool f2fs_may_inline_data(struct inode *inode)
{
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
return false;
if (f2fs_is_atomic_file(inode))
return false;
@ -55,7 +52,7 @@ void read_inline_data(struct page *page, struct page *ipage)
f2fs_bug_on(F2FS_P_SB(page), page->index);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
@ -63,7 +60,8 @@ void read_inline_data(struct page *page, struct page *ipage)
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
flush_dcache_page(page);
kunmap_atomic(dst_addr);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
}
bool truncate_inline_inode(struct page *ipage, u64 from)
@ -75,9 +73,9 @@ bool truncate_inline_inode(struct page *ipage, u64 from)
addr = inline_data_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
set_page_dirty(ipage);
return true;
}
@ -112,11 +110,12 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
}
if (page->index)
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
zero_user_segment(page, 0, PAGE_SIZE);
else
read_inline_data(page, ipage);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
f2fs_put_page(ipage, 1);
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
@ -126,7 +125,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
void *src_addr, *dst_addr;
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
@ -136,8 +134,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
};
int dirty, err;
f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
if (!f2fs_exist_data(dn->inode))
goto clear_out;
@ -145,21 +141,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
f2fs_wait_on_page_writeback(page, DATA);
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
if (PageUptodate(page))
goto no_update;
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(dn->inode_page);
dst_addr = kmap_atomic(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
flush_dcache_page(page);
kunmap_atomic(dst_addr);
SetPageUptodate(page);
no_update:
read_inline_data(page, dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
@ -167,23 +151,21 @@ no_update:
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.blk_addr = dn->data_blkaddr;
fio.old_blkaddr = dn->data_blkaddr;
write_data_page(dn, &fio);
set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
f2fs_wait_on_page_writeback(page, DATA);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty)
inode_dec_dirty_pages(dn->inode);
/* this converted inline_data should be recovered. */
set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
sync_inode_page(dn);
f2fs_put_dnode(dn);
return 0;
}
@ -195,7 +177,10 @@ int f2fs_convert_inline_inode(struct inode *inode)
struct page *ipage, *page;
int err = 0;
page = grab_cache_page(inode->i_mapping, 0);
if (!f2fs_has_inline_data(inode))
return 0;
page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
if (!page)
return -ENOMEM;
@ -217,6 +202,9 @@ out:
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
@ -238,16 +226,17 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
f2fs_wait_on_page_writeback(dn.inode_page, NODE);
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
sync_inode_page(&dn);
clear_inline_node(dn.inode_page);
f2fs_put_dnode(&dn);
return 0;
}
@ -276,16 +265,16 @@ process_inline:
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
set_inode_flag(inode, FI_INLINE_DATA);
set_inode_flag(inode, FI_DATA_EXIST);
update_inode(inode, ipage);
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
return true;
}
@ -296,7 +285,6 @@ process_inline:
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (truncate_blocks(inode, 0, false))
@ -307,7 +295,7 @@ process_inline:
}
struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
struct f2fs_filename *fname, struct page **res_page)
struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_inline_dentry *inline_dentry;
@ -318,10 +306,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
f2fs_hash_t namehash;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
if (IS_ERR(ipage)) {
*res_page = ipage;
return NULL;
}
namehash = f2fs_dentry_hash(&name, fname);
namehash = f2fs_dentry_hash(&name);
inline_dentry = inline_data_addr(ipage);
@ -333,30 +323,6 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
else
f2fs_put_page(ipage, 0);
/*
* For the most part, it should be a bug when name_len is zero.
* We stop here for figuring out where the bugs has occurred.
*/
f2fs_bug_on(sbi, d.max < 0);
return de;
}
struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
struct page **p)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
struct f2fs_dir_entry *de;
struct f2fs_inline_dentry *dentry_blk;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return NULL;
dentry_blk = inline_data_addr(ipage);
de = &dentry_blk->dentry[1];
*p = ipage;
unlock_page(ipage);
return de;
}
@ -374,10 +340,8 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
set_page_dirty(ipage);
/* update i_size to MAX_INLINE_DATA */
if (i_size_read(inode) < MAX_INLINE_DATA) {
i_size_write(inode, MAX_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
if (i_size_read(inode) < MAX_INLINE_DATA)
f2fs_i_size_write(inode, MAX_INLINE_DATA);
return 0;
}
@ -385,7 +349,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
* release ipage in this function.
*/
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
struct page *page;
@ -393,7 +357,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_dentry_block *dentry_blk;
int err;
page = grab_cache_page(dir->i_mapping, 0);
page = f2fs_grab_cache_page(dir->i_mapping, 0, false);
if (!page) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
@ -404,8 +368,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
if (err)
goto out;
f2fs_wait_on_page_writeback(page, DATA);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
dentry_blk = kmap_atomic(page);
@ -426,37 +390,132 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
NR_INLINE_DENTRY * F2FS_SLOT_LEN);
kunmap_atomic(dentry_blk);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_inode(ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
clear_inode_flag(dir, FI_INLINE_DENTRY);
if (i_size_read(dir) < PAGE_CACHE_SIZE) {
i_size_write(dir, PAGE_CACHE_SIZE);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
sync_inode_page(&dn);
f2fs_i_depth_write(dir, 1);
if (i_size_read(dir) < PAGE_SIZE)
f2fs_i_size_write(dir, PAGE_SIZE);
out:
f2fs_put_page(page, 1);
return err;
}
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
static int f2fs_add_inline_entries(struct inode *dir,
struct f2fs_inline_dentry *inline_dentry)
{
struct f2fs_dentry_ptr d;
unsigned long bit_pos = 0;
int err = 0;
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
struct qstr new_name;
nid_t ino;
umode_t fake_mode;
if (!test_bit_le(bit_pos, d.bitmap)) {
bit_pos++;
continue;
}
de = &d.dentry[bit_pos];
if (unlikely(!de->name_len)) {
bit_pos++;
continue;
}
new_name.name = d.filename[bit_pos];
new_name.len = de->name_len;
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
ino, fake_mode);
if (err)
goto punch_dentry_pages;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
return 0;
punch_dentry_pages:
truncate_inode_pages(&dir->i_data, 0);
truncate_blocks(dir, 0, false);
remove_dirty_inode(dir);
return err;
}
static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
struct f2fs_inline_dentry *backup_dentry;
int err;
backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir),
sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO);
if (!backup_dentry) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
truncate_inline_inode(ipage, 0);
unlock_page(ipage);
err = f2fs_add_inline_entries(dir, backup_dentry);
if (err)
goto recover;
lock_page(ipage);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA);
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
kfree(backup_dentry);
return err;
}
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
if (!F2FS_I(dir)->i_dir_level)
return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
else
return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
}
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
size_t namelen = name->len;
struct f2fs_inline_dentry *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(namelen);
int slots = GET_DENTRY_SLOTS(new_name->len);
struct page *page = NULL;
int err = 0;
@ -477,25 +536,27 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name, ipage);
page = init_inode_metadata(inode, dir, new_name,
orig_name, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(name, NULL);
name_hash = f2fs_dentry_hash(new_name);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
/* we don't need to mark_inode_dirty now */
if (inode) {
F2FS_I(inode)->i_pino = dir->i_ino;
update_inode(inode, page);
f2fs_i_pino_write(inode, dir->i_ino);
f2fs_put_page(page, 1);
}
@ -503,11 +564,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode(dir, ipage);
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
out:
f2fs_put_page(ipage, 1);
return err;
@ -522,22 +578,22 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
int i;
lock_page(page);
f2fs_wait_on_page_writeback(page, NODE);
f2fs_wait_on_page_writeback(page, NODE, true);
inline_dentry = inline_data_addr(page);
bit_pos = dentry - inline_dentry->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i,
__clear_bit_le(bit_pos + i,
&inline_dentry->dentry_bitmap);
set_page_dirty(page);
f2fs_put_page(page, 1);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
f2fs_mark_inode_dirty_sync(dir);
if (inode)
f2fs_drop_nlink(dir, inode, page);
f2fs_put_page(page, 1);
f2fs_drop_nlink(dir, inode);
}
bool f2fs_empty_inline_dir(struct inode *dir)
@ -565,7 +621,7 @@ bool f2fs_empty_inline_dir(struct inode *dir)
}
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct f2fs_str *fstr)
struct fscrypt_str *fstr)
{
struct inode *inode = file_inode(file);
struct f2fs_inline_dentry *inline_dentry = NULL;

View file

@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include "f2fs.h"
@ -18,6 +19,13 @@
#include <trace/events/f2fs.h>
void f2fs_mark_inode_dirty_sync(struct inode *inode)
{
if (f2fs_inode_dirtied(inode))
return;
mark_inode_dirty_sync(inode);
}
void f2fs_set_inode_flags(struct inode *inode)
{
unsigned int flags = F2FS_I(inode)->i_flags;
@ -35,6 +43,7 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
f2fs_mark_inode_dirty_sync(inode);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@ -83,10 +92,10 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
while (start < end) {
if (*start++) {
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
set_inode_flag(inode, FI_DATA_EXIST);
set_raw_inline(inode, F2FS_INODE(ipage));
set_page_dirty(ipage);
return;
}
@ -138,9 +147,10 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
f2fs_init_extent_tree(inode, &ri->i_ext);
if (f2fs_init_extent_tree(inode, &ri->i_ext))
set_page_dirty(node_page);
get_inline_info(fi, ri);
get_inline_info(inode, ri);
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
@ -150,7 +160,10 @@ static int do_read_inode(struct inode *inode)
__get_inode_rdev(inode, ri);
if (__written_first_block(ri))
set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
if (!need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
f2fs_put_page(node_page, 1);
@ -202,6 +215,7 @@ make_now:
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
@ -221,11 +235,27 @@ bad_inode:
return ERR_PTR(ret);
}
void update_inode(struct inode *inode, struct page *node_page)
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
{
struct inode *inode;
retry:
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
}
return inode;
}
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
f2fs_wait_on_page_writeback(node_page, NODE);
f2fs_inode_synced(inode);
f2fs_wait_on_page_writeback(node_page, NODE, true);
ri = F2FS_INODE(node_page);
@ -242,7 +272,7 @@ void update_inode(struct inode *inode, struct page *node_page)
&ri->i_ext);
else
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
set_raw_inline(F2FS_I(inode), ri);
set_raw_inline(inode, ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
@ -259,15 +289,19 @@ void update_inode(struct inode *inode, struct page *node_page)
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
set_page_dirty(node_page);
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
/* deleted inode */
if (inode->i_nlink == 0)
clear_inline_node(node_page);
return set_page_dirty(node_page);
}
void update_inode_page(struct inode *inode)
int update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
int ret = 0;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
@ -276,12 +310,14 @@ retry:
cond_resched();
goto retry;
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi);
f2fs_stop_checkpoint(sbi, false);
}
return;
f2fs_inode_synced(inode);
return 0;
}
update_inode(inode, node_page);
ret = update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
return ret;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
@ -292,16 +328,15 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_ino == F2FS_META_INO(sbi))
return 0;
if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE))
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
/*
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
update_inode_page(inode);
f2fs_balance_fs(sbi);
if (update_inode_page(inode))
f2fs_balance_fs(sbi, true);
return 0;
}
@ -311,13 +346,12 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t xnid = fi->i_xattr_nid;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int err = 0;
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
commit_inmem_pages(inode, true);
drop_inmem_pages(inode);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
@ -327,19 +361,24 @@ void f2fs_evict_inode(struct inode *inode)
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
remove_dirty_dir_inode(inode);
remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
sb_start_intwrite(inode->i_sb);
set_inode_flag(fi, FI_NO_ALLOC);
i_size_write(inode, 0);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE))
goto no_delete;
#endif
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode, true);
err = f2fs_truncate(inode);
if (!err) {
f2fs_lock_op(sbi);
@ -347,6 +386,14 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_unlock_op(sbi);
}
/* give more chances, if ENOMEM case */
if (err == -ENOMEM) {
err = 0;
goto retry;
}
if (err)
update_inode_page(inode);
sb_end_intwrite(inode->i_sb);
no_delete:
stat_dec_inline_xattr(inode);
@ -356,36 +403,18 @@ no_delete:
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(fi, FI_APPEND_WRITE))
add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
else
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(fi, FI_FREE_NID);
}
if (err && err != -ENOENT) {
if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) {
/*
* get here because we failed to release resource
* of inode previously, reminder our user to run fsck
* for fixing.
*/
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"inode (ino:%lu) resource leak, run fsck "
"to fix this issue!", inode->i_ino);
}
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
}
f2fs_bug_on(sbi, err &&
!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
out_clear:
#ifdef CONFIG_F2FS_FS_ENCRYPTION
if (fi->i_crypt_info)
f2fs_free_encryption_info(inode, fi->i_crypt_info);
#endif
fscrypt_put_encryption_info(inode, NULL);
clear_inode(inode);
}
@ -393,37 +422,32 @@ out_clear:
void handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err = 0;
struct node_info ni;
clear_nlink(inode);
make_bad_inode(inode);
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);
i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode, false);
if (!err)
err = remove_inode_page(inode);
/*
* if we skip truncate_node in remove_inode_page bacause we failed
* before, it's better to find another way to release resource of
* this inode (e.g. valid block count, node block or nid). Here we
* choose to add this inode to orphan list, so that we can call iput
* for releasing in orphan recovery flow.
*
* Note: we should add inode to orphan list before f2fs_unlock_op()
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
if (err && err != -ENOENT) {
err = acquire_orphan_inode(sbi);
if (!err)
add_orphan_inode(sbi, inode->i_ino);
get_node_info(sbi, inode->i_ino, &ni);
if (ni.blk_addr != NULL_ADDR) {
int err = acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"Too many orphan inodes, run fsck to fix.");
} else {
add_orphan_inode(inode);
}
alloc_nid_done(sbi, inode->i_ino);
} else {
set_inode_flag(inode, FI_FREE_NID);
}
set_inode_flag(F2FS_I(inode), FI_FREE_NID);
f2fs_unlock_op(sbi);
/* iput will drop the inode object */

View file

@ -60,10 +60,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_may_inline_data(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
set_inode_flag(inode, FI_NEW_INODE);
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
set_inode_flag(inode, FI_INLINE_DENTRY);
f2fs_init_extent_tree(inode, NULL);
@ -72,14 +76,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
stat_inc_inline_dir(inode);
trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;
fail:
trace_f2fs_new_inode(inode, err);
make_bad_inode(inode);
if (nid_free)
set_inode_flag(F2FS_I(inode), FI_FREE_NID);
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
}
@ -88,18 +91,23 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
int i;
/*
* filename format of multimedia file should be defined as:
* "filename + '.' + extension".
* "filename + '.' + extension + (optional: '.' + temp extension)".
*/
if (slen < sublen + 2)
return 0;
if (s[slen - sublen - 1] != '.')
return 0;
for (i = 1; i < slen - sublen; i++) {
if (s[i] != '.')
continue;
if (!strncasecmp(s + i + 1, sub, sublen))
return 1;
}
return !strncasecmp(s + slen - sublen, sub, sublen);
return 0;
}
/*
@ -128,8 +136,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -142,6 +148,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@ -169,15 +177,15 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
int err;
if (f2fs_encrypted_inode(dir) &&
!f2fs_is_child_context_consistent_with_parent(dir, inode))
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@ -190,7 +198,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
clear_inode_flag(inode, FI_INC_LINK);
iput(inode);
f2fs_unlock_op(sbi);
return err;
@ -199,10 +207,14 @@ out:
struct dentry *f2fs_get_parent(struct dentry *child)
{
struct qstr dotdot = QSTR_INIT("..", 2);
unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot);
if (!ino)
struct page *page;
unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page);
if (!ino) {
if (IS_ERR(page))
return ERR_CAST(page);
return ERR_PTR(-ENOENT);
return d_obtain_alias(f2fs_iget(d_inode(child)->i_sb, ino));
}
return d_obtain_alias(f2fs_iget(child->d_sb, ino));
}
static int __recover_dot_dentries(struct inode *dir, nid_t pino)
@ -214,12 +226,24 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
struct page *page;
int err = 0;
if (f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"skip recovering inline_dots inode (ino:%lu, pino:%u) "
"in readonly mountpoint", dir->i_ino, pino);
return 0;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
de = f2fs_find_entry(dir, &dot, &page);
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
} else {
err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
if (err)
@ -230,14 +254,14 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
}
out:
if (!err) {
clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
mark_inode_dirty(dir);
}
if (!err)
clear_inode_flag(dir, FI_INLINE_DOTS);
f2fs_unlock_op(sbi);
return err;
@ -251,13 +275,32 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct page *page;
nid_t ino;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
if (f2fs_encrypted_inode(dir)) {
int res = fscrypt_get_encryption_info(dir);
/*
* DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
* created while the directory was encrypted and we
* don't have access to the key.
*/
if (fscrypt_has_encryption_key(dir))
fscrypt_set_encrypted_dentry(dentry);
fscrypt_set_d_op(dentry);
if (res && res != -ENOKEY)
return ERR_PTR(res);
}
if (dentry->d_name.len > F2FS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
if (!de) {
if (IS_ERR(page))
return (struct dentry *)page;
return d_splice_alias(inode, dentry);
}
ino = le32_to_cpu(de->ino);
f2fs_dentry_kunmap(dir, page);
@ -267,15 +310,29 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
return ERR_CAST(inode);
if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) {
err = __recover_dot_dentries(dir, root_ino);
if (err)
goto err_out;
}
if (f2fs_has_inline_dots(inode)) {
err = __recover_dot_dentries(inode, dir->i_ino);
if (err)
goto err_out;
}
if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
goto err_out;
}
return d_splice_alias(inode, dentry);
err_out:
iget_failed(inode);
iput(inode);
return ERR_PTR(err);
}
@ -288,11 +345,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
int err = -ENOENT;
trace_f2fs_unlink_enter(dir, dentry);
f2fs_balance_fs(sbi);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
if (!de) {
if (IS_ERR(page))
err = PTR_ERR(page);
goto fail;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
@ -305,9 +366,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
fail:
@ -332,16 +390,24 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
size_t len = strlen(symname);
size_t p_len;
char *p_str;
struct f2fs_str disk_link = FSTR_INIT(NULL, 0);
struct f2fs_encrypted_symlink_data *sd = NULL;
struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1);
struct fscrypt_symlink_data *sd = NULL;
int err;
if (len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
if (f2fs_encrypted_inode(dir)) {
err = fscrypt_get_encryption_info(dir);
if (err)
return err;
f2fs_balance_fs(sbi);
if (!fscrypt_has_encryption_key(dir))
return -EPERM;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
}
if (disk_link.len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
@ -351,8 +417,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@ -360,42 +429,36 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
if (f2fs_encrypted_inode(dir)) {
if (f2fs_encrypted_inode(inode)) {
struct qstr istr = QSTR_INIT(symname, len);
struct fscrypt_str ostr;
err = f2fs_get_encryption_info(inode);
if (err)
goto err_out;
err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link);
if (err)
goto err_out;
err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link);
if (err < 0)
goto err_out;
p_len = encrypted_symlink_data_len(disk_link.len) + 1;
if (p_len > dir->i_sb->s_blocksize) {
err = -ENAMETOOLONG;
goto err_out;
}
sd = kzalloc(p_len, GFP_NOFS);
sd = kzalloc(disk_link.len, GFP_NOFS);
if (!sd) {
err = -ENOMEM;
goto err_out;
}
memcpy(sd->encrypted_path, disk_link.name, disk_link.len);
sd->len = cpu_to_le16(disk_link.len);
p_str = (char *)sd;
} else {
p_len = len + 1;
p_str = (char *)symname;
err = fscrypt_get_encryption_info(inode);
if (err)
goto err_out;
if (!fscrypt_has_encryption_key(inode)) {
err = -EPERM;
goto err_out;
}
ostr.name = sd->encrypted_path;
ostr.len = disk_link.len;
err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
if (err < 0)
goto err_out;
sd->len = cpu_to_le16(ostr.len);
disk_link.name = (char *)sd;
}
err = page_symlink(inode, p_str, p_len);
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
d_instantiate(dentry, inode);
@ -411,7 +474,8 @@ err_out:
* performance regression.
*/
if (!err) {
filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
filemap_write_and_wait_range(inode->i_mapping, 0,
disk_link.len - 1);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@ -420,7 +484,6 @@ err_out:
}
kfree(sd);
f2fs_fname_crypto_free_buffer(&disk_link);
return err;
out:
handle_failed_inode(inode);
@ -433,8 +496,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -444,7 +505,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
f2fs_balance_fs(sbi, true);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@ -461,7 +524,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0;
out_fail:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
clear_inode_flag(inode, FI_INC_LINK);
handle_failed_inode(inode);
return err;
}
@ -481,8 +544,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -490,6 +551,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@ -516,9 +579,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
if (!whiteout)
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -532,6 +592,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
@ -545,17 +607,17 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
* add this non-linked tmpfile to orphan list, in this way we could
* remove all unused data of tmpfile after abnormal power-off.
*/
add_orphan_inode(sbi, inode->i_ino);
f2fs_unlock_op(sbi);
add_orphan_inode(inode);
alloc_nid_done(sbi, inode->i_ino);
if (whiteout) {
inode_dec_link_count(inode);
f2fs_i_links_write(inode, false);
*whiteout = inode;
} else {
d_tmpfile(dentry, inode);
}
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
return 0;
@ -569,7 +631,7 @@ out:
static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
if (f2fs_encrypted_inode(dir)) {
int err = f2fs_get_encryption_info(dir);
int err = fscrypt_get_encryption_info(dir);
if (err)
return err;
}
@ -595,26 +657,29 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!f2fs_is_child_context_consistent_with_parent(new_dir,
old_inode)) {
!fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
goto out;
}
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
if (!old_entry) {
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
goto out;
}
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
if (!old_dir_entry)
if (!old_dir_entry) {
if (IS_ERR(old_dir_page))
err = PTR_ERR(old_dir_page);
goto out_old;
}
}
if (flags & RENAME_WHITEOUT) {
@ -632,8 +697,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
err = -ENOENT;
new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
&new_page);
if (!new_entry)
if (!new_entry) {
if (IS_ERR(new_page))
err = PTR_ERR(new_page);
goto out_whiteout;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
@ -641,8 +711,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
if (update_dent_inode(old_inode, new_inode,
&new_dentry->d_name)) {
err = update_dent_inode(old_inode, new_inode,
&new_dentry->d_name);
if (err) {
release_orphan_inode(sbi);
goto put_out_dir;
}
@ -652,20 +723,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode->i_ctime = CURRENT_TIME;
down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
f2fs_i_links_write(new_inode, false);
f2fs_i_links_write(new_inode, false);
up_write(&F2FS_I(new_inode)->i_sem);
mark_inode_dirty(new_inode);
if (!new_inode->i_nlink)
add_orphan_inode(sbi, new_inode->i_ino);
add_orphan_inode(new_inode);
else
release_orphan_inode(sbi);
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(new_dentry, old_inode);
@ -674,9 +742,29 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_whiteout;
}
if (old_dir_entry) {
inc_nlink(new_dir);
update_inode_page(new_dir);
if (old_dir_entry)
f2fs_i_links_write(new_dir, true);
/*
* old entry and new entry can locate in the same inline
* dentry in inode, when attaching new entry in inline dentry,
* it could force inline dentry conversion, after that,
* old_entry and old_page will point to wrong address, in
* order to avoid this, let's do the check and update here.
*/
if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) {
f2fs_put_page(old_page, 0);
old_page = NULL;
old_entry = f2fs_find_entry(old_dir,
&old_dentry->d_name, &old_page);
if (!old_entry) {
err = -ENOENT;
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
f2fs_unlock_op(sbi);
goto out_whiteout;
}
}
}
@ -687,13 +775,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
f2fs_mark_inode_dirty_sync(old_inode);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
if (whiteout) {
whiteout->i_state |= I_LINKABLE;
set_inode_flag(F2FS_I(whiteout), FI_INC_LINK);
set_inode_flag(whiteout, FI_INC_LINK);
err = f2fs_add_link(old_dentry, whiteout);
if (err)
goto put_out_dir;
@ -705,14 +793,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir && !whiteout) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
update_inode_page(old_inode);
} else {
f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
mark_inode_dirty(old_dir);
update_inode_page(old_dir);
f2fs_i_links_write(old_dir, false);
}
f2fs_unlock_op(sbi);
@ -756,39 +841,45 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
(!f2fs_is_child_context_consistent_with_parent(new_dir,
old_inode) ||
!f2fs_is_child_context_consistent_with_parent(old_dir,
new_inode)))
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
!fscrypt_has_permitted_context(old_dir, new_inode)))
return -EPERM;
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
if (!old_entry) {
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
goto out;
}
new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_entry)
if (!new_entry) {
if (IS_ERR(new_page))
err = PTR_ERR(new_page);
goto out_old;
}
/* prepare for updating ".." directory entry info later */
if (old_dir != new_dir) {
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
old_dir_entry = f2fs_parent_dir(old_inode,
&old_dir_page);
if (!old_dir_entry)
if (!old_dir_entry) {
if (IS_ERR(old_dir_page))
err = PTR_ERR(old_dir_page);
goto out_new;
}
}
if (S_ISDIR(new_inode->i_mode)) {
err = -EIO;
new_dir_entry = f2fs_parent_dir(new_inode,
&new_dir_page);
if (!new_dir_entry)
if (!new_dir_entry) {
if (IS_ERR(new_dir_page))
err = PTR_ERR(new_dir_page);
goto out_old_dir;
}
}
}
@ -807,6 +898,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_new_dir;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
@ -836,19 +929,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
file_lost_pino(old_inode);
up_write(&F2FS_I(old_inode)->i_sem);
update_inode_page(old_inode);
old_dir->i_ctime = CURRENT_TIME;
if (old_nlink) {
down_write(&F2FS_I(old_dir)->i_sem);
if (old_nlink < 0)
drop_nlink(old_dir);
else
inc_nlink(old_dir);
f2fs_i_links_write(old_dir, old_nlink > 0);
up_write(&F2FS_I(old_dir)->i_sem);
}
mark_inode_dirty(old_dir);
update_inode_page(old_dir);
f2fs_mark_inode_dirty_sync(old_dir);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
@ -857,19 +944,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
file_lost_pino(new_inode);
up_write(&F2FS_I(new_inode)->i_sem);
update_inode_page(new_inode);
new_dir->i_ctime = CURRENT_TIME;
if (new_nlink) {
down_write(&F2FS_I(new_dir)->i_sem);
if (new_nlink < 0)
drop_nlink(new_dir);
else
inc_nlink(new_dir);
f2fs_i_links_write(new_dir, new_nlink > 0);
up_write(&F2FS_I(new_dir)->i_sem);
}
mark_inode_dirty(new_dir);
update_inode_page(new_dir);
f2fs_mark_inode_dirty_sync(new_dir);
f2fs_unlock_op(sbi);
@ -922,89 +1003,85 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
#ifdef CONFIG_F2FS_FS_ENCRYPTION
static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct f2fs_str cstr;
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
struct fscrypt_str cstr = FSTR_INIT(NULL, 0);
struct fscrypt_str pstr = FSTR_INIT(NULL, 0);
struct fscrypt_symlink_data *sd;
struct inode *inode = d_inode(dentry);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
res = f2fs_get_encryption_info(inode);
if (!dentry)
return ERR_PTR(-ECHILD);
res = fscrypt_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
caddr = kmap(cpage);
caddr[size] = 0;
caddr = page_address(cpage);
/* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr;
sd = (struct fscrypt_symlink_data *)caddr;
cstr.name = sd->encrypted_path;
cstr.len = le16_to_cpu(sd->len);
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
goto errout;
}
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */
if (cstr.name[0] == 0 && cstr.len == 0) {
if (unlikely(cstr.len == 0)) {
res = -ENOENT;
goto errout;
}
if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) >
max_size) {
if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
/* Symlink data on the disk is corrupted */
res = -EIO;
goto errout;
}
res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr);
res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
if (res)
goto errout;
res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
if (res < 0)
goto errout;
kfree(cstr.name);
/* this is broken symlink case */
if (unlikely(pstr.name[0] == 0)) {
res = -ENOENT;
goto errout;
}
paddr = pstr.name;
/* Null-terminate the name */
paddr[res] = '\0';
kunmap(cpage);
page_cache_release(cpage);
put_page(cpage);
return *cookie = paddr;
errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr);
kunmap(cpage);
page_cache_release(cpage);
fscrypt_fname_free_buffer(&pstr);
put_page(cpage);
return ERR_PTR(res);
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = f2fs_encrypted_follow_link,
.put_link = kfree_put_link,
.follow_link = f2fs_encrypted_follow_link,
.put_link = kfree_put_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
};
#endif
};
const struct inode_operations f2fs_dir_inode_operations = {
.create = f2fs_create,

File diff suppressed because it is too large Load diff

View file

@ -15,15 +15,21 @@
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 4
#define FREE_NID_PAGES 8
#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */
#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
/* control the memory footprint threshold (10MB per 1GB ram) */
#define DEF_RAM_THRESHOLD 10
#define DEF_RAM_THRESHOLD 1
/* control dirty nats ratio threshold (default: 10% over max nid count) */
#define DEF_DIRTY_NAT_RATIO_THRESHOLD 10
/* control total # of nats */
#define DEF_NAT_CACHE_THRESHOLD 100000
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
@ -117,6 +123,17 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
raw_ne->version = ni->version;
}
static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
NM_I(sbi)->dirty_nats_ratio / 100;
}
static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@ -183,7 +200,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
(block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
@ -212,6 +229,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
f2fs_change_bit(block_off, nm_i->nat_bitmap);
}
static inline nid_t ino_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
static inline nid_t nid_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.nid);
}
static inline unsigned int ofs_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline __u64 cpver_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.next_blkaddr);
}
static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
@ -242,40 +290,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
rn->footer.cp_ver = ckpt->checkpoint_ver;
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
static inline nid_t ino_of_node(struct page *node_page)
static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = cur_cp_version(ckpt);
static inline nid_t nid_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.nid);
}
static inline unsigned int ofs_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline unsigned long long cpver_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.next_blkaddr);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
return cpu_to_le64(cp_ver) == cpver_of_node(page);
}
/*
@ -317,17 +355,17 @@ static inline bool IS_DNODE(struct page *node_page)
return true;
}
static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
f2fs_wait_on_page_writeback(p, NODE);
f2fs_wait_on_page_writeback(p, NODE, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
else
rn->in.nid[off] = cpu_to_le32(nid);
set_page_dirty(p);
return set_page_dirty(p);
}
static inline nid_t get_nid(struct page *p, int off, bool i)
@ -370,6 +408,21 @@ static inline int is_node(struct page *page, int type)
#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
static inline int is_inline_node(struct page *page)
{
return PageChecked(page);
}
static inline void set_inline_node(struct page *page)
{
SetPageChecked(page);
}
static inline void clear_inline_node(struct page *page)
{
ClearPageChecked(page);
}
static inline void set_cold_node(struct inode *inode, struct page *page)
{
struct f2fs_node *rn = F2FS_NODE(page);

View file

@ -49,8 +49,9 @@ static struct kmem_cache *fsync_entry_slab;
bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
> sbi->user_block_count)
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
return false;
return true;
}
@ -67,42 +68,71 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
return NULL;
}
static int recover_dentry(struct inode *inode, struct page *ipage)
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
struct list_head *head, nid_t ino)
{
struct inode *inode;
struct fsync_inode_entry *entry;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
entry->inode = inode;
list_add_tail(&entry->list, head);
return entry;
}
static void del_fsync_inode(struct fsync_inode_entry *entry)
{
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
}
static int recover_dentry(struct inode *inode, struct page *ipage,
struct list_head *dir_list)
{
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct qstr name;
struct fscrypt_name fname;
struct page *page;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
int err = 0;
char *name;
dir = f2fs_iget(inode->i_sb, pino);
if (IS_ERR(dir)) {
err = PTR_ERR(dir);
goto out;
entry = get_fsync_inode(dir_list, pino);
if (!entry) {
entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
if (IS_ERR(entry)) {
dir = ERR_CAST(entry);
err = PTR_ERR(entry);
goto out;
}
}
if (file_enc_name(inode)) {
iput(dir);
return 0;
}
dir = entry->inode;
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
memset(&fname, 0, sizeof(struct fscrypt_name));
fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
fname.disk_name.name = raw_inode->i_name;
if (unlikely(name.len > F2FS_NAME_LEN)) {
if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
goto out_err;
goto out;
}
retry:
de = f2fs_find_entry(dir, &name, &page);
de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
goto out_unmap_put;
if (de) {
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
WARN_ON(1);
err = PTR_ERR(einode);
@ -118,29 +148,27 @@ retry:
f2fs_delete_entry(de, page, dir, einode);
iput(einode);
goto retry;
}
err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
if (err)
goto out_err;
if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
iput(dir);
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
add_dirty_dir_inode(dir);
set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
err = __f2fs_do_add_link(dir, &fname, inode,
inode->i_ino, inode->i_mode);
}
if (err == -ENOMEM)
goto retry;
goto out;
out_unmap_put:
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
out_err:
iput(dir);
out:
if (file_enc_name(inode))
name = "<encrypted>";
else
name = raw_inode->i_name;
f2fs_msg(inode->i_sb, KERN_NOTICE,
"%s: ino = %x, name = %s, dir = %lx, err = %d",
__func__, ino_of_node(ipage), raw_inode->i_name,
__func__, ino_of_node(ipage), name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
@ -151,7 +179,7 @@ static void recover_inode(struct inode *inode, struct page *page)
char *name;
inode->i_mode = le16_to_cpu(raw->i_mode);
i_size_write(inode, le64_to_cpu(raw->i_size));
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
@ -168,9 +196,34 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
struct f2fs_inode *ri = F2FS_INODE(ipage);
struct timespec disk;
if (!IS_INODE(ipage))
return true;
disk.tv_sec = le64_to_cpu(ri->i_ctime);
disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
if (timespec_compare(&inode->i_ctime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_atime);
disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
if (timespec_compare(&inode->i_atime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_mtime);
disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
if (timespec_compare(&inode->i_mtime, &disk) > 0)
return false;
return true;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page = NULL;
block_t blkaddr;
@ -180,8 +233,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
while (1) {
struct fsync_inode_entry *entry;
@ -190,49 +241,41 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page))
if (!is_recoverable_dnode(page))
break;
if (!is_fsync_dnode(page))
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry) {
if (entry) {
if (!is_same_inode(entry->inode, page))
goto next;
} else {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
break;
}
/* add this fsync inode to the list */
entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
if (!entry) {
err = -ENOMEM;
break;
}
/*
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
if (IS_ERR(entry->inode)) {
err = PTR_ERR(entry->inode);
kmem_cache_free(fsync_entry_slab, entry);
entry = add_fsync_inode(sbi, head, ino_of_node(page));
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
if (err == -ENOENT) {
err = 0;
goto next;
}
break;
}
list_add_tail(&entry->list, head);
}
entry->blkaddr = blkaddr;
if (IS_INODE(page)) {
entry->last_inode = blkaddr;
if (is_dent_dnode(page))
entry->last_dentry = blkaddr;
}
if (IS_INODE(page) && is_dent_dnode(page))
entry->last_dentry = blkaddr;
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@ -248,11 +291,8 @@ static void destroy_fsync_dnodes(struct list_head *head)
{
struct fsync_inode_entry *entry, *tmp;
list_for_each_entry_safe(entry, tmp, head, list) {
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
}
list_for_each_entry_safe(entry, tmp, head, list)
del_fsync_inode(entry);
}
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@ -314,15 +354,14 @@ got_it:
if (ino != dn->inode->i_ino) {
/* Deallocate previous index in the node page */
inode = f2fs_iget(sbi->sb, ino);
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return PTR_ERR(inode);
} else {
inode = dn->inode;
}
bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
le16_to_cpu(sum.ofs_in_node);
bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
/*
* if inode page is locked, unlock temporarily, but its reference
@ -357,10 +396,9 @@ truncate_out:
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page, block_t blkaddr)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int start, end;
struct dnode_of_data dn;
struct node_info ni;
unsigned int start, end;
int err = 0, recovered = 0;
/* step 1: recover xattr */
@ -380,16 +418,21 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
goto out;
/* step 3: recover data indices */
start = start_bidx_of_node(ofs_of_node(page), fi);
end = start + ADDRS_PER_PAGE(page, fi);
start = start_bidx_of_node(ofs_of_node(page), inode);
end = start + ADDRS_PER_PAGE(page, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err)
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_dn;
}
goto out;
}
f2fs_wait_on_page_writeback(dn.node_page, NODE);
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
get_node_info(sbi, dn.nid, &ni);
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
@ -411,14 +454,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
continue;
}
if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
* and then reserve one new block in dnode page.
*/
if (dest == NEW_ADDR) {
truncate_data_blocks_range(&dn, 1);
err = reserve_new_block(&dn);
f2fs_bug_on(sbi, err);
reserve_new_block(&dn);
continue;
}
@ -427,25 +472,33 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
#ifdef CONFIG_F2FS_FAULT_INJECTION
while (err)
err = reserve_new_block(&dn);
#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
goto err;
}
retry_prev:
/* Check the previous node page having this index */
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err)
if (err) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_prev;
}
goto err;
}
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
ni.version, false);
ni.version, false, false);
recovered++;
}
}
if (IS_INODE(dn.node_page))
sync_inode_page(&dn);
copy_node_footer(dn.node_page, page);
fill_node_footer(dn.node_page, dn.nid, ni.ino,
ofs_of_node(page), false);
@ -459,17 +512,16 @@ out:
return err;
}
static int recover_data(struct f2fs_sb_info *sbi,
struct list_head *head, int type)
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct list_head *dir_list)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page = NULL;
int err = 0;
block_t blkaddr;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, type);
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
while (1) {
@ -482,12 +534,12 @@ static int recover_data(struct f2fs_sb_info *sbi,
page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) {
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
break;
}
entry = get_fsync_inode(head, ino_of_node(page));
entry = get_fsync_inode(inode_list, ino_of_node(page));
if (!entry)
goto next;
/*
@ -495,10 +547,10 @@ static int recover_data(struct f2fs_sb_info *sbi,
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
if (entry->last_inode == blkaddr)
if (IS_INODE(page))
recover_inode(entry->inode, page);
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page);
err = recover_dentry(entry->inode, page, dir_list);
if (err) {
f2fs_put_page(page, 1);
break;
@ -510,11 +562,8 @@ static int recover_data(struct f2fs_sb_info *sbi,
break;
}
if (entry->blkaddr == blkaddr) {
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
}
if (entry->blkaddr == blkaddr)
del_fsync_inode(entry);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@ -525,12 +574,14 @@ next:
return err;
}
int recover_fsync_data(struct f2fs_sb_info *sbi)
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
struct list_head dir_list;
block_t blkaddr;
int err;
int ret = 0;
bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@ -539,6 +590,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
return -ENOMEM;
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
@ -547,25 +599,26 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
if (err || list_empty(&inode_list))
goto out;
if (list_empty(&inode_list))
if (check_only) {
ret = 1;
goto out;
}
need_writecp = true;
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
err = recover_data(sbi, &inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
@ -573,31 +626,20 @@ out:
}
clear_sbi_flag(sbi, SBI_POR_DOING);
if (err) {
bool invalidate = false;
if (err)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex);
if (discard_next_dnode(sbi, blkaddr))
invalidate = true;
/* let's drop all the directory inodes for clean checkpoint */
destroy_fsync_dnodes(&dir_list);
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
sync_meta_pages(sbi, META, LONG_MAX);
/* invalidate temporary meta page */
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi),
blkaddr, blkaddr);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) {
if (!err && need_writecp) {
struct cp_control cpc = {
.reason = CP_RECOVERY,
};
mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, &cpc);
} else {
mutex_unlock(&sbi->cp_mutex);
err = write_checkpoint(sbi, &cpc);
}
return err;
kmem_cache_destroy(fsync_entry_slab);
return ret ? ret: err;
}

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@
#define NULL_SECNO ((unsigned int)(~0))
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
@ -158,16 +159,17 @@ struct victim_sel_policy {
};
struct seg_entry {
unsigned short valid_blocks; /* # of valid blocks */
unsigned int type:6; /* segment type like CURSEG_XXX_TYPE */
unsigned int valid_blocks:10; /* # of valid blocks */
unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
unsigned int padding:6; /* padding */
unsigned char *cur_valid_map; /* validity bitmap of blocks */
/*
* # of valid blocks and the validity bitmap stored in the the last
* checkpoint pack. This information is used by the SSR mode.
*/
unsigned short ckpt_valid_blocks;
unsigned char *ckpt_valid_map;
unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
unsigned char *discard_map;
unsigned char type; /* segment type like CURSEG_XXX_TYPE */
unsigned long long mtime; /* modification time of the segment */
};
@ -183,7 +185,7 @@ struct segment_allocation {
* this value is set in page as a private data which indicate that
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE 0x0000ffff
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
@ -191,6 +193,7 @@ struct segment_allocation {
struct inmem_pages {
struct list_head list;
struct page *page;
block_t old_addr; /* for revoking when fail to commit */
};
struct sit_info {
@ -257,6 +260,8 @@ struct victim_selection {
struct curseg_info {
struct mutex curseg_mutex; /* lock for consistency */
struct f2fs_summary_block *sum_blk; /* cached summary block */
struct rw_semaphore journal_rwsem; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
@ -466,20 +471,27 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
if (test_opt(sbi, LFS))
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs +
reserved_sections(sbi) + 1);
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
int freed, int needed)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
reserved_sections(sbi));
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + reserved_sections(sbi) + needed);
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
@ -527,6 +539,9 @@ static inline bool need_inplace_update(struct inode *inode)
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
return false;
if (test_opt(sbi, LFS))
return false;
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
@ -540,7 +555,7 @@ static inline bool need_inplace_update(struct inode *inode)
/* this is only set during fdatasync */
if (policy & (0x1 << F2FS_IPU_FSYNC) &&
is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
is_inode_flag_set(inode, FI_NEED_IPU))
return true;
return false;
@ -573,8 +588,8 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
|| blk_addr >= MAX_BLKADDR(sbi));
BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
|| blk_addr >= MAX_BLKADDR(sbi));
}
/*
@ -702,9 +717,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
if (type == DATA)
return sbi->blocks_per_seg;
else if (type == NODE)
return 3 * sbi->blocks_per_seg;
return 8 * sbi->blocks_per_seg;
else if (type == META)
return MAX_BIO_BLOCKS(sbi);
return 8 * MAX_BIO_BLOCKS(sbi);
else
return 0;
}
@ -722,10 +737,8 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
nr_to_write = wbc->nr_to_write;
if (type == DATA)
desired = 4096;
else if (type == NODE)
desired = 3 * max_hw_blocks(sbi);
if (type == NODE)
desired = 2 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(sbi);

View file

@ -13,6 +13,7 @@
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
static LIST_HEAD(f2fs_list);
static DEFINE_SPINLOCK(f2fs_list_lock);
@ -25,14 +26,15 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK)
return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK;
if (NM_I(sbi)->fcnt > MAX_FREE_NIDS)
return NM_I(sbi)->fcnt - MAX_FREE_NIDS;
return 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
return atomic_read(&sbi->total_zombie_tree) +
atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,

File diff suppressed because it is too large Load diff

View file

@ -29,7 +29,8 @@ static inline void __print_last_io(void)
last_io.major, last_io.minor,
last_io.pid, "----------------",
last_io.type,
last_io.fio.rw, last_io.fio.blk_addr,
last_io.fio.rw,
last_io.fio.new_blkaddr,
last_io.len);
memset(&last_io, 0, sizeof(last_io));
}
@ -101,7 +102,8 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
last_io.pid == pid &&
last_io.type == __file_type(inode, pid) &&
last_io.fio.rw == fio->rw &&
last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
last_io.fio.new_blkaddr + last_io.len ==
fio->new_blkaddr) {
last_io.len++;
return;
}

View file

@ -151,7 +151,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
mark_inode_dirty(inode);
f2fs_mark_inode_dirty_sync(inode);
return 0;
}
@ -264,18 +264,20 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
static void *read_all_xattrs(struct inode *inode, struct page *ipage)
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
void *txattr_addr;
int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return NULL;
return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
@ -286,8 +288,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page))
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
@ -301,8 +305,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage))
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto fail;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
@ -316,10 +322,11 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
header->h_refcount = cpu_to_le32(1);
}
return txattr_addr;
*base_addr = txattr_addr;
return 0;
fail:
kzfree(txattr_addr);
return NULL;
return err;
}
static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
@ -345,7 +352,8 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE);
f2fs_wait_on_page_writeback(ipage, NODE, true);
set_page_dirty(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@ -353,7 +361,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
f2fs_wait_on_page_writeback(page, NODE);
f2fs_wait_on_page_writeback(page, NODE, true);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
@ -374,7 +382,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return PTR_ERR(xpage);
}
f2fs_bug_on(sbi, new_nid);
f2fs_wait_on_page_writeback(xpage, NODE);
f2fs_wait_on_page_writeback(xpage, NODE, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@ -412,9 +420,9 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
return -ENOMEM;
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
@ -448,9 +456,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
int error = 0;
size_t rest = buffer_size;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
return -ENOMEM;
error = read_all_xattrs(inode, NULL, &base_addr);
if (error)
return error;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
@ -481,13 +489,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
size_t len;
__u32 new_hsize;
int error = -ENOMEM;
int error = 0;
if (name == NULL)
return -EINVAL;
@ -503,9 +510,9 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
goto exit;
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
/* find entry with wanted name. */
here = __find_xattr(base_addr, index, len, name);
@ -538,7 +545,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
free = free + ENTRY_SIZE(here);
if (unlikely(free < newsize)) {
error = -ENOSPC;
error = -E2BIG;
goto exit;
}
}
@ -566,7 +573,6 @@ static int __f2fs_setxattr(struct inode *inode, int index,
* Before we come here, old entry is removed.
* We just write new entry.
*/
memset(last, 0, newsize);
last->e_name_index = index;
last->e_name_len = len;
memcpy(last->e_name, name, len);
@ -580,19 +586,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (error)
goto exit;
if (is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
inode->i_mode = F2FS_I(inode)->i_acl_mode;
inode->i_ctime = CURRENT_TIME;
clear_inode_flag(fi, FI_ACL_MODE);
clear_inode_flag(inode, FI_ACL_MODE);
}
if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
if (ipage)
update_inode(inode, ipage);
else
update_inode_page(inode);
f2fs_mark_inode_dirty_sync(inode);
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
kzfree(base_addr);
return error;
@ -609,7 +613,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
/* protect xattr_ver */
@ -618,5 +622,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);
return err;
}

View file

@ -126,7 +126,8 @@ extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#define f2fs_xattr_handlers NULL
static inline int f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size, int flags)
const char *name, const void *value, size_t size,
struct page *page, int flags)
{
return -EOPNOTSUPP;
}

View file

@ -229,6 +229,7 @@ struct dentry_operations {
#define DCACHE_MAY_FREE 0x00800000
#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */
#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */
#define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */
#define DCACHE_OP_REAL 0x08000000
extern seqlock_t rename_lock;

View file

@ -21,7 +21,7 @@
#define F2FS_BLKSIZE 4096 /* support only 4KB block */
#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
@ -51,6 +51,7 @@
#define MAX_ACTIVE_DATA_LOGS 8
#define VERSION_LEN 256
#define MAX_VOLUME_NAME 512
/*
* For superblock
@ -84,7 +85,7 @@ struct f2fs_super_block {
__le32 node_ino; /* node inode number */
__le32 meta_ino; /* meta inode number */
__u8 uuid[16]; /* 128-bit uuid for volume */
__le16 volume_name[512]; /* volume name */
__le16 volume_name[MAX_VOLUME_NAME]; /* volume name */
__le32 extension_count; /* # of extensions below */
__u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
__le32 cp_payload;
@ -99,6 +100,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
#define CP_ERROR_FLAG 0x00000008
@ -169,12 +171,12 @@ struct f2fs_extent {
#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(fi) addrs_per_inode(fi)
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
#define ADDRS_PER_PAGE(page, fi) \
(IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
#define ADDRS_PER_PAGE(page, inode) \
(IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK)
#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
@ -261,7 +263,7 @@ struct f2fs_node {
/*
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
@ -281,7 +283,7 @@ struct f2fs_nat_block {
* Not allow to change this.
*/
#define SIT_VBLOCK_MAP_SIZE 64
#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
/*
* F2FS uses 4 bytes to represent block address. As a result, supported size of
@ -350,7 +352,7 @@ struct f2fs_summary {
struct summary_footer {
unsigned char entry_type; /* SUM_TYPE_XXX */
__u32 check_sum; /* summary checksum */
__le32 check_sum; /* summary checksum */
} __packed;
#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
@ -363,6 +365,12 @@ struct summary_footer {
sizeof(struct sit_journal_entry))
#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\
sizeof(struct sit_journal_entry))
/* Reserved area should make size of f2fs_extra_info equals to
* that of nat_journal and sit_journal.
*/
#define EXTRA_INFO_RESERVED (SUM_JOURNAL_SIZE - 2 - 8)
/*
* frequently updated NAT/SIT entries can be stored in the spare area in
* summary blocks
@ -392,18 +400,28 @@ struct sit_journal {
__u8 reserved[SIT_JOURNAL_RESERVED];
} __packed;
/* 4KB-sized summary block structure */
struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
struct f2fs_extra_info {
__le64 kbytes_written;
__u8 reserved[EXTRA_INFO_RESERVED];
} __packed;
struct f2fs_journal {
union {
__le16 n_nats;
__le16 n_sits;
};
/* spare area is used by NAT or SIT journals */
/* spare area is used by NAT or SIT journals or extra info */
union {
struct nat_journal nat_j;
struct sit_journal sit_j;
struct f2fs_extra_info info;
};
} __packed;
/* 4KB-sized summary block structure */
struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
struct f2fs_journal journal;
struct summary_footer footer;
} __packed;
@ -497,4 +515,6 @@ enum {
F2FS_FT_MAX
};
#define S_SHIFT 12
#endif /* _LINUX_F2FS_FS_H */

View file

@ -52,6 +52,8 @@ struct swap_info_struct;
struct seq_file;
struct workqueue_struct;
struct iov_iter;
struct fscrypt_info;
struct fscrypt_operations;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@ -677,6 +679,9 @@ struct inode {
struct hlist_head i_fsnotify_marks;
#endif
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
struct fscrypt_info *i_crypt_info;
#endif
void *i_private; /* fs or device private pointer */
};
@ -1337,6 +1342,8 @@ struct super_block {
#endif
const struct xattr_handler **s_xattr;
const struct fscrypt_operations *s_cop;
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;

435
include/linux/fscrypto.h Normal file
View file

@ -0,0 +1,435 @@
/*
* General per-file encryption definition
*
* Copyright (C) 2015, Google, Inc.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#ifndef _LINUX_FSCRYPTO_H
#define _LINUX_FSCRYPTO_H
#include <linux/key.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <crypto/skcipher.h>
#include <uapi/linux/fs.h>
#define FS_KEY_DERIVATION_NONCE_SIZE 16
#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
#define FS_POLICY_FLAGS_PAD_4 0x00
#define FS_POLICY_FLAGS_PAD_8 0x01
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
#define FS_POLICY_FLAGS_VALID 0x03
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
#define FS_ENCRYPTION_MODE_AES_256_XTS 1
#define FS_ENCRYPTION_MODE_AES_256_GCM 2
#define FS_ENCRYPTION_MODE_AES_256_CBC 3
#define FS_ENCRYPTION_MODE_AES_256_CTS 4
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct fscrypt_context {
u8 format;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
} __packed;
/* Encryption parameters */
#define FS_XTS_TWEAK_SIZE 16
#define FS_AES_128_ECB_KEY_SIZE 16
#define FS_AES_256_GCM_KEY_SIZE 32
#define FS_AES_256_CBC_KEY_SIZE 32
#define FS_AES_256_CTS_KEY_SIZE 32
#define FS_AES_256_XTS_KEY_SIZE 64
#define FS_MAX_KEY_SIZE 64
#define FS_KEY_DESC_PREFIX "fscrypt:"
#define FS_KEY_DESC_PREFIX_SIZE 8
/* This is passed in from userspace into the kernel keyring */
struct fscrypt_key {
u32 mode;
u8 raw[FS_MAX_KEY_SIZE];
u32 size;
} __packed;
struct fscrypt_info {
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
struct key *ci_keyring_key;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_WRITE_PATH_FL 0x00000002
struct fscrypt_ctx {
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
u8 flags; /* Flags */
u8 mode; /* Encryption mode for tfm */
};
struct fscrypt_completion_result {
struct completion completion;
int res;
};
#define DECLARE_FS_COMPLETION_RESULT(ecr) \
struct fscrypt_completion_result ecr = { \
COMPLETION_INITIALIZER((ecr).completion), 0 }
static inline int fscrypt_key_size(int mode)
{
switch (mode) {
case FS_ENCRYPTION_MODE_AES_256_XTS:
return FS_AES_256_XTS_KEY_SIZE;
case FS_ENCRYPTION_MODE_AES_256_GCM:
return FS_AES_256_GCM_KEY_SIZE;
case FS_ENCRYPTION_MODE_AES_256_CBC:
return FS_AES_256_CBC_KEY_SIZE;
case FS_ENCRYPTION_MODE_AES_256_CTS:
return FS_AES_256_CTS_KEY_SIZE;
default:
BUG();
}
return 0;
}
#define FS_FNAME_NUM_SCATTER_ENTRIES 4
#define FS_CRYPTO_BLOCK_SIZE 16
#define FS_FNAME_CRYPTO_DIGEST_SIZE 32
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
*/
struct fscrypt_symlink_data {
__le16 len;
char encrypted_path[1];
} __packed;
/**
* This function is used to calculate the disk space required to
* store a filename of length l in encrypted symlink format.
*/
static inline u32 fscrypt_symlink_data_len(u32 l)
{
if (l < FS_CRYPTO_BLOCK_SIZE)
l = FS_CRYPTO_BLOCK_SIZE;
return (l + sizeof(struct fscrypt_symlink_data) - 1);
}
struct fscrypt_str {
unsigned char *name;
u32 len;
};
struct fscrypt_name {
const struct qstr *usr_fname;
struct fscrypt_str disk_name;
u32 hash;
u32 minor_hash;
struct fscrypt_str crypto_buf;
};
#define FSTR_INIT(n, l) { .name = n, .len = l }
#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
#define fname_name(p) ((p)->disk_name.name)
#define fname_len(p) ((p)->disk_name.len)
/*
* crypto opertions for filesystems
*/
struct fscrypt_operations {
int (*get_context)(struct inode *, void *, size_t);
int (*key_prefix)(struct inode *, u8 **);
int (*prepare_context)(struct inode *);
int (*set_context)(struct inode *, const void *, size_t, void *);
int (*dummy_context)(struct inode *);
bool (*is_encrypted)(struct inode *);
bool (*empty_dir)(struct inode *);
unsigned (*max_namelen)(struct inode *);
};
static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
{
if (inode->i_sb->s_cop->dummy_context &&
inode->i_sb->s_cop->dummy_context(inode))
return true;
return false;
}
static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
}
static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
}
static inline u32 fscrypt_validate_encryption_key_size(u32 mode, u32 size)
{
if (size == fscrypt_key_size(mode))
return size;
return 0;
}
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
return true;
if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
return true;
return false;
}
static inline struct page *fscrypt_control_page(struct page *page)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
#else
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
#endif
}
static inline int fscrypt_has_encryption_key(struct inode *inode)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return (inode->i_crypt_info != NULL);
#else
return 0;
#endif
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
#endif
}
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
extern const struct dentry_operations fscrypt_d_ops;
#endif
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
d_set_d_op(dentry, &fscrypt_d_ops);
#endif
}
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
int fscrypt_initialize(void);
extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
extern int fscrypt_decrypt_page(struct page *);
extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
extern void fscrypt_pullback_bio_page(struct page **, bool);
extern void fscrypt_restore_control_page(struct page *);
extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
unsigned int);
/* policy.c */
extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
/* keyinfo.c */
extern int get_crypt_info(struct inode *);
extern int fscrypt_get_encryption_info(struct inode *);
extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
/* fname.c */
extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
int lookup, struct fscrypt_name *);
extern void fscrypt_free_filename(struct fscrypt_name *);
extern u32 fscrypt_fname_encrypted_size(struct inode *, u32);
extern int fscrypt_fname_alloc_buffer(struct inode *, u32,
struct fscrypt_str *);
extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
const struct fscrypt_str *, struct fscrypt_str *);
extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
struct fscrypt_str *);
#endif
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i,
gfp_t f)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
{
return;
}
static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
struct page *p, gfp_t f)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline int fscrypt_notsupp_decrypt_page(struct page *p)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c,
struct bio *b)
{
return;
}
static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b)
{
return;
}
static inline void fscrypt_notsupp_restore_control_page(struct page *p)
{
return;
}
static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
sector_t s, unsigned int f)
{
return -EOPNOTSUPP;
}
/* policy.c */
static inline int fscrypt_notsupp_process_policy(struct file *f,
const struct fscrypt_policy *p)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_get_policy(struct inode *i,
struct fscrypt_policy *p)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_has_permitted_context(struct inode *p,
struct inode *i)
{
return 0;
}
static inline int fscrypt_notsupp_inherit_context(struct inode *p,
struct inode *i, void *v, bool b)
{
return -EOPNOTSUPP;
}
/* keyinfo.c */
static inline int fscrypt_notsupp_get_encryption_info(struct inode *i)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_put_encryption_info(struct inode *i,
struct fscrypt_info *f)
{
return;
}
/* fname.c */
static inline int fscrypt_notsupp_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
if (dir->i_sb->s_cop->is_encrypted(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname)
{
return;
}
static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s)
{
/* never happens */
WARN_ON(1);
return 0;
}
static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode,
u32 ilen, struct fscrypt_str *crypto_str)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c)
{
return;
}
static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
#endif /* _LINUX_FSCRYPTO_H */

View file

@ -52,6 +52,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ META_FLUSH, "META_FLUSH" }, \
{ INMEM, "INMEM" }, \
{ INMEM_DROP, "INMEM_DROP" }, \
{ INMEM_REVOKE, "INMEM_REVOKE" }, \
{ IPU, "IN-PLACE" }, \
{ OPU, "OUT-OF-PLACE" })
@ -693,28 +694,32 @@ TRACE_EVENT(f2fs_direct_IO_exit,
__entry->ret)
);
TRACE_EVENT(f2fs_reserve_new_block,
TRACE_EVENT(f2fs_reserve_new_blocks,
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node,
blkcnt_t count),
TP_ARGS(inode, nid, ofs_in_node),
TP_ARGS(inode, nid, ofs_in_node, count),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(nid_t, nid)
__field(unsigned int, ofs_in_node)
__field(blkcnt_t, count)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = nid;
__entry->ofs_in_node = ofs_in_node;
__entry->count = count;
),
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u",
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
show_dev(__entry),
(unsigned int)__entry->nid,
__entry->ofs_in_node)
__entry->ofs_in_node,
(unsigned long long)__entry->count)
);
DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
@ -727,7 +732,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
__field(dev_t, dev)
__field(ino_t, ino)
__field(pgoff_t, index)
__field(block_t, blkaddr)
__field(block_t, old_blkaddr)
__field(block_t, new_blkaddr)
__field(int, rw)
__field(int, type)
),
@ -736,16 +742,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
__entry->blkaddr = fio->blk_addr;
__entry->old_blkaddr = fio->old_blkaddr;
__entry->new_blkaddr = fio->new_blkaddr;
__entry->rw = fio->rw;
__entry->type = fio->type;
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"blkaddr = 0x%llx, rw = %s%s, type = %s",
"oldaddr = 0x%llx, newaddr = 0x%llx rw = %s%s, type = %s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->blkaddr,
(unsigned long long)__entry->old_blkaddr,
(unsigned long long)__entry->new_blkaddr,
show_bio_type(__entry->rw),
show_block_type(__entry->type))
);
@ -1265,6 +1273,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
__entry->node_cnt)
);
DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
TP_PROTO(struct super_block *sb, int type, s64 count),
TP_ARGS(sb, type, count),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, type)
__field(s64, count)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->type = type;
__entry->count = count;
),
TP_printk("dev = (%d,%d), %s, dirty count = %lld",
show_dev(__entry),
show_file_type(__entry->type),
__entry->count)
);
DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter,
TP_PROTO(struct super_block *sb, int type, s64 count),
TP_ARGS(sb, type, count)
);
DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
TP_PROTO(struct super_block *sb, int type, s64 count),
TP_ARGS(sb, type, count)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */

View file

@ -172,6 +172,24 @@ struct inodes_stat_t {
#define FS_IOC32_GETVERSION _IOR('v', 1, int)
#define FS_IOC32_SETVERSION _IOW('v', 2, int)
/*
* File system encryption support
*/
/* Policy provided via an ioctl on the topmost directory */
#define FS_KEY_DESCRIPTOR_SIZE 8
struct fscrypt_policy {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
} __packed;
#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*/