This series reworks our current object cache shrinking infrastructure in two main ways: * Noticing that a lot of users copy and paste their own version of LRU lists for objects, we put some effort in providing a generic version. It is modeled after the filesystem users: dentries, inodes, and xfs (for various tasks), but we expect that other users could benefit in the near future with little or no modification. Let us know if you have any issues. * The underlying list_lru being proposed automatically and transparently keeps the elements in per-node lists, and is able to manipulate the node lists individually. Given this infrastructure, we are able to modify the up-to-now hammer called shrink_slab to proceed with node-reclaim instead of always searching memory from all over like it has been doing. Per-node lru lists are also expected to lead to less contention in the lru locks on multi-node scans, since we are now no longer fighting for a global lock. The locks usually disappear from the profilers with this change. Although we have no official benchmarks for this version - be our guest to independently evaluate this - earlier versions of this series were performance tested (details at http://permalink.gmane.org/gmane.linux.kernel.mm/100537) yielding no visible performance regressions while yielding a better qualitative behavior in NUMA machines. With this infrastructure in place, we can use the list_lru entry point to provide memcg isolation and per-memcg targeted reclaim. Historically, those two pieces of work have been posted together. This version presents only the infrastructure work, deferring the memcg work for a later time, so we can focus on getting this part tested. You can see more about the history of such work at http://lwn.net/Articles/552769/ Dave Chinner (18): dcache: convert dentry_stat.nr_unused to per-cpu counters dentry: move to per-sb LRU locks dcache: remove dentries from LRU before putting on dispose list mm: new shrinker API shrinker: convert superblock shrinkers to new API list: add a new LRU list type inode: convert inode lru list to generic lru list code. dcache: convert to use new lru list infrastructure list_lru: per-node list infrastructure shrinker: add node awareness fs: convert inode and dentry shrinking to be node aware xfs: convert buftarg LRU to generic code xfs: rework buffer dispose list tracking xfs: convert dquot cache lru to list_lru fs: convert fs shrinkers to new scan/count API drivers: convert shrinkers to new count/scan API shrinker: convert remaining shrinkers to count/scan API shrinker: Kill old ->shrink API. Glauber Costa (7): fs: bump inode and dentry counters to long super: fix calculation of shrinkable objects for small numbers list_lru: per-node API vmscan: per-node deferred work i915: bail out earlier when shrinker cannot acquire mutex hugepage: convert huge zero page shrinker to new shrinker API list_lru: dynamically adjust node arrays This patch: There are situations in very large machines in which we can have a large quantity of dirty inodes, unused dentries, etc. This is particularly true when umounting a filesystem, where eventually since every live object will eventually be discarded. Dave Chinner reported a problem with this while experimenting with the shrinker revamp patchset. So we believe it is time for a change. This patch just moves int to longs. Machines where it matters should have a big long anyway. Signed-off-by: Glauber Costa <glommer@openvz.org> Cc: Dave Chinner <dchinner@redhat.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: Arve Hjønnevåg <arve@android.com> Cc: Carlos Maiolino <cmaiolino@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Dave Chinner <dchinner@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Gleb Natapov <gleb@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: J. Bruce Fields <bfields@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Stultz <john.stultz@linaro.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Kent Overstreet <koverstreet@google.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Thomas Hellstrom <thellstrom@vmware.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
398 lines
12 KiB
C
398 lines
12 KiB
C
#ifndef __LINUX_DCACHE_H
|
|
#define __LINUX_DCACHE_H
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/list.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/rculist_bl.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/seqlock.h>
|
|
#include <linux/cache.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/lockref.h>
|
|
|
|
struct nameidata;
|
|
struct path;
|
|
struct vfsmount;
|
|
|
|
/*
|
|
* linux/include/linux/dcache.h
|
|
*
|
|
* Dirent cache data structures
|
|
*
|
|
* (C) Copyright 1997 Thomas Schoebel-Theuer,
|
|
* with heavy changes by Linus Torvalds
|
|
*/
|
|
|
|
#define IS_ROOT(x) ((x) == (x)->d_parent)
|
|
|
|
/* The hash is always the low bits of hash_len */
|
|
#ifdef __LITTLE_ENDIAN
|
|
#define HASH_LEN_DECLARE u32 hash; u32 len;
|
|
#else
|
|
#define HASH_LEN_DECLARE u32 len; u32 hash;
|
|
#endif
|
|
|
|
/*
|
|
* "quick string" -- eases parameter passing, but more importantly
|
|
* saves "metadata" about the string (ie length and the hash).
|
|
*
|
|
* hash comes first so it snuggles against d_parent in the
|
|
* dentry.
|
|
*/
|
|
struct qstr {
|
|
union {
|
|
struct {
|
|
HASH_LEN_DECLARE;
|
|
};
|
|
u64 hash_len;
|
|
};
|
|
const unsigned char *name;
|
|
};
|
|
|
|
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
|
#define hashlen_hash(hashlen) ((u32) (hashlen))
|
|
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
|
|
|
|
struct dentry_stat_t {
|
|
long nr_dentry;
|
|
long nr_unused;
|
|
long age_limit; /* age in seconds */
|
|
long want_pages; /* pages requested by system */
|
|
long dummy[2];
|
|
};
|
|
extern struct dentry_stat_t dentry_stat;
|
|
|
|
/* Name hashing routines. Initial hash value */
|
|
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
|
|
#define init_name_hash() 0
|
|
|
|
/* partial hash update function. Assume roughly 4 bits per character */
|
|
static inline unsigned long
|
|
partial_name_hash(unsigned long c, unsigned long prevhash)
|
|
{
|
|
return (prevhash + (c << 4) + (c >> 4)) * 11;
|
|
}
|
|
|
|
/*
|
|
* Finally: cut down the number of bits to a int value (and try to avoid
|
|
* losing bits)
|
|
*/
|
|
static inline unsigned long end_name_hash(unsigned long hash)
|
|
{
|
|
return (unsigned int) hash;
|
|
}
|
|
|
|
/* Compute the hash for a name string. */
|
|
extern unsigned int full_name_hash(const unsigned char *, unsigned int);
|
|
|
|
/*
|
|
* Try to keep struct dentry aligned on 64 byte cachelines (this will
|
|
* give reasonable cacheline footprint with larger lines without the
|
|
* large memory footprint increase).
|
|
*/
|
|
#ifdef CONFIG_64BIT
|
|
# define DNAME_INLINE_LEN 32 /* 192 bytes */
|
|
#else
|
|
# ifdef CONFIG_SMP
|
|
# define DNAME_INLINE_LEN 36 /* 128 bytes */
|
|
# else
|
|
# define DNAME_INLINE_LEN 40 /* 128 bytes */
|
|
# endif
|
|
#endif
|
|
|
|
#define d_lock d_lockref.lock
|
|
|
|
struct dentry {
|
|
/* RCU lookup touched fields */
|
|
unsigned int d_flags; /* protected by d_lock */
|
|
seqcount_t d_seq; /* per dentry seqlock */
|
|
struct hlist_bl_node d_hash; /* lookup hash list */
|
|
struct dentry *d_parent; /* parent directory */
|
|
struct qstr d_name;
|
|
struct inode *d_inode; /* Where the name belongs to - NULL is
|
|
* negative */
|
|
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
|
|
|
|
/* Ref lookup also touches following */
|
|
struct lockref d_lockref; /* per-dentry lock and refcount */
|
|
const struct dentry_operations *d_op;
|
|
struct super_block *d_sb; /* The root of the dentry tree */
|
|
unsigned long d_time; /* used by d_revalidate */
|
|
void *d_fsdata; /* fs-specific data */
|
|
|
|
struct list_head d_lru; /* LRU list */
|
|
/*
|
|
* d_child and d_rcu can share memory
|
|
*/
|
|
union {
|
|
struct list_head d_child; /* child of parent list */
|
|
struct rcu_head d_rcu;
|
|
} d_u;
|
|
struct list_head d_subdirs; /* our children */
|
|
struct hlist_node d_alias; /* inode alias list */
|
|
};
|
|
|
|
/*
|
|
* dentry->d_lock spinlock nesting subclasses:
|
|
*
|
|
* 0: normal
|
|
* 1: nested
|
|
*/
|
|
enum dentry_d_lock_class
|
|
{
|
|
DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */
|
|
DENTRY_D_LOCK_NESTED
|
|
};
|
|
|
|
struct dentry_operations {
|
|
int (*d_revalidate)(struct dentry *, unsigned int);
|
|
int (*d_weak_revalidate)(struct dentry *, unsigned int);
|
|
int (*d_hash)(const struct dentry *, struct qstr *);
|
|
int (*d_compare)(const struct dentry *, const struct dentry *,
|
|
unsigned int, const char *, const struct qstr *);
|
|
int (*d_delete)(const struct dentry *);
|
|
void (*d_release)(struct dentry *);
|
|
void (*d_prune)(struct dentry *);
|
|
void (*d_iput)(struct dentry *, struct inode *);
|
|
char *(*d_dname)(struct dentry *, char *, int);
|
|
struct vfsmount *(*d_automount)(struct path *);
|
|
int (*d_manage)(struct dentry *, bool);
|
|
} ____cacheline_aligned;
|
|
|
|
/*
|
|
* Locking rules for dentry_operations callbacks are to be found in
|
|
* Documentation/filesystems/Locking. Keep it updated!
|
|
*
|
|
* FUrther descriptions are found in Documentation/filesystems/vfs.txt.
|
|
* Keep it updated too!
|
|
*/
|
|
|
|
/* d_flags entries */
|
|
#define DCACHE_OP_HASH 0x0001
|
|
#define DCACHE_OP_COMPARE 0x0002
|
|
#define DCACHE_OP_REVALIDATE 0x0004
|
|
#define DCACHE_OP_DELETE 0x0008
|
|
#define DCACHE_OP_PRUNE 0x0010
|
|
|
|
#define DCACHE_DISCONNECTED 0x0020
|
|
/* This dentry is possibly not currently connected to the dcache tree, in
|
|
* which case its parent will either be itself, or will have this flag as
|
|
* well. nfsd will not use a dentry with this bit set, but will first
|
|
* endeavour to clear the bit either by discovering that it is connected,
|
|
* or by performing lookup operations. Any filesystem which supports
|
|
* nfsd_operations MUST have a lookup function which, if it finds a
|
|
* directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
|
|
* dentry into place and return that dentry rather than the passed one,
|
|
* typically using d_splice_alias. */
|
|
|
|
#define DCACHE_REFERENCED 0x0040 /* Recently used, don't discard. */
|
|
#define DCACHE_RCUACCESS 0x0080 /* Entry has ever been RCU-visible */
|
|
|
|
#define DCACHE_CANT_MOUNT 0x0100
|
|
#define DCACHE_GENOCIDE 0x0200
|
|
#define DCACHE_SHRINK_LIST 0x0400
|
|
|
|
#define DCACHE_OP_WEAK_REVALIDATE 0x0800
|
|
|
|
#define DCACHE_NFSFS_RENAMED 0x1000
|
|
/* this dentry has been "silly renamed" and has to be deleted on the last
|
|
* dput() */
|
|
#define DCACHE_COOKIE 0x2000 /* For use by dcookie subsystem */
|
|
#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
|
|
/* Parent inode is watched by some fsnotify listener */
|
|
|
|
#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */
|
|
#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */
|
|
#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */
|
|
#define DCACHE_MANAGED_DENTRY \
|
|
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
|
|
|
|
#define DCACHE_LRU_LIST 0x80000
|
|
#define DCACHE_DENTRY_KILLED 0x100000
|
|
|
|
extern seqlock_t rename_lock;
|
|
|
|
static inline int dname_external(const struct dentry *dentry)
|
|
{
|
|
return dentry->d_name.name != dentry->d_iname;
|
|
}
|
|
|
|
/*
|
|
* These are the low-level FS interfaces to the dcache..
|
|
*/
|
|
extern void d_instantiate(struct dentry *, struct inode *);
|
|
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
|
|
extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
|
|
extern void __d_drop(struct dentry *dentry);
|
|
extern void d_drop(struct dentry *dentry);
|
|
extern void d_delete(struct dentry *);
|
|
extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
|
|
|
|
/* allocate/de-allocate */
|
|
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
|
|
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
|
|
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
|
|
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
|
|
extern struct dentry *d_find_any_alias(struct inode *inode);
|
|
extern struct dentry * d_obtain_alias(struct inode *);
|
|
extern void shrink_dcache_sb(struct super_block *);
|
|
extern void shrink_dcache_parent(struct dentry *);
|
|
extern void shrink_dcache_for_umount(struct super_block *);
|
|
extern int d_invalidate(struct dentry *);
|
|
|
|
/* only used at mount-time */
|
|
extern struct dentry * d_make_root(struct inode *);
|
|
|
|
/* <clickety>-<click> the ramfs-type tree */
|
|
extern void d_genocide(struct dentry *);
|
|
|
|
extern void d_tmpfile(struct dentry *, struct inode *);
|
|
|
|
extern struct dentry *d_find_alias(struct inode *);
|
|
extern void d_prune_aliases(struct inode *);
|
|
|
|
/* test whether we have any submounts in a subdir tree */
|
|
extern int have_submounts(struct dentry *);
|
|
extern int check_submounts_and_drop(struct dentry *);
|
|
|
|
/*
|
|
* This adds the entry to the hash queues.
|
|
*/
|
|
extern void d_rehash(struct dentry *);
|
|
|
|
/**
|
|
* d_add - add dentry to hash queues
|
|
* @entry: dentry to add
|
|
* @inode: The inode to attach to this dentry
|
|
*
|
|
* This adds the entry to the hash queues and initializes @inode.
|
|
* The entry was actually filled in earlier during d_alloc().
|
|
*/
|
|
|
|
static inline void d_add(struct dentry *entry, struct inode *inode)
|
|
{
|
|
d_instantiate(entry, inode);
|
|
d_rehash(entry);
|
|
}
|
|
|
|
/**
|
|
* d_add_unique - add dentry to hash queues without aliasing
|
|
* @entry: dentry to add
|
|
* @inode: The inode to attach to this dentry
|
|
*
|
|
* This adds the entry to the hash queues and initializes @inode.
|
|
* The entry was actually filled in earlier during d_alloc().
|
|
*/
|
|
static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
|
|
{
|
|
struct dentry *res;
|
|
|
|
res = d_instantiate_unique(entry, inode);
|
|
d_rehash(res != NULL ? res : entry);
|
|
return res;
|
|
}
|
|
|
|
extern void dentry_update_name_case(struct dentry *, struct qstr *);
|
|
|
|
/* used for rename() and baskets */
|
|
extern void d_move(struct dentry *, struct dentry *);
|
|
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
|
|
|
|
/* appendix may either be NULL or be used for transname suffixes */
|
|
extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
|
|
extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
|
|
extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
|
|
extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
|
|
const struct qstr *name, unsigned *seq);
|
|
|
|
static inline unsigned d_count(const struct dentry *dentry)
|
|
{
|
|
return dentry->d_lockref.count;
|
|
}
|
|
|
|
/* validate "insecure" dentry pointer */
|
|
extern int d_validate(struct dentry *, struct dentry *);
|
|
|
|
/*
|
|
* helper function for dentry_operations.d_dname() members
|
|
*/
|
|
extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
|
|
extern char *simple_dname(struct dentry *, char *, int);
|
|
|
|
extern char *__d_path(const struct path *, const struct path *, char *, int);
|
|
extern char *d_absolute_path(const struct path *, char *, int);
|
|
extern char *d_path(const struct path *, char *, int);
|
|
extern char *dentry_path_raw(struct dentry *, char *, int);
|
|
extern char *dentry_path(struct dentry *, char *, int);
|
|
|
|
/* Allocation counts.. */
|
|
|
|
/**
|
|
* dget, dget_dlock - get a reference to a dentry
|
|
* @dentry: dentry to get a reference to
|
|
*
|
|
* Given a dentry or %NULL pointer increment the reference count
|
|
* if appropriate and return the dentry. A dentry will not be
|
|
* destroyed when it has references.
|
|
*/
|
|
static inline struct dentry *dget_dlock(struct dentry *dentry)
|
|
{
|
|
if (dentry)
|
|
dentry->d_lockref.count++;
|
|
return dentry;
|
|
}
|
|
|
|
static inline struct dentry *dget(struct dentry *dentry)
|
|
{
|
|
if (dentry)
|
|
lockref_get(&dentry->d_lockref);
|
|
return dentry;
|
|
}
|
|
|
|
extern struct dentry *dget_parent(struct dentry *dentry);
|
|
|
|
/**
|
|
* d_unhashed - is dentry hashed
|
|
* @dentry: entry to check
|
|
*
|
|
* Returns true if the dentry passed is not currently hashed.
|
|
*/
|
|
|
|
static inline int d_unhashed(const struct dentry *dentry)
|
|
{
|
|
return hlist_bl_unhashed(&dentry->d_hash);
|
|
}
|
|
|
|
static inline int d_unlinked(const struct dentry *dentry)
|
|
{
|
|
return d_unhashed(dentry) && !IS_ROOT(dentry);
|
|
}
|
|
|
|
static inline int cant_mount(const struct dentry *dentry)
|
|
{
|
|
return (dentry->d_flags & DCACHE_CANT_MOUNT);
|
|
}
|
|
|
|
static inline void dont_mount(struct dentry *dentry)
|
|
{
|
|
spin_lock(&dentry->d_lock);
|
|
dentry->d_flags |= DCACHE_CANT_MOUNT;
|
|
spin_unlock(&dentry->d_lock);
|
|
}
|
|
|
|
extern void dput(struct dentry *);
|
|
|
|
static inline bool d_managed(const struct dentry *dentry)
|
|
{
|
|
return dentry->d_flags & DCACHE_MANAGED_DENTRY;
|
|
}
|
|
|
|
static inline bool d_mountpoint(const struct dentry *dentry)
|
|
{
|
|
return dentry->d_flags & DCACHE_MOUNTED;
|
|
}
|
|
|
|
extern int sysctl_vfs_cache_pressure;
|
|
|
|
#endif /* __LINUX_DCACHE_H */
|