Merge branch 'xfs-sparse-inode' into for-next
This commit is contained in:
commit
b9a350a118
16 changed files with 829 additions and 86 deletions
|
@ -149,13 +149,27 @@ xfs_alloc_compute_aligned(
|
||||||
{
|
{
|
||||||
xfs_agblock_t bno;
|
xfs_agblock_t bno;
|
||||||
xfs_extlen_t len;
|
xfs_extlen_t len;
|
||||||
|
xfs_extlen_t diff;
|
||||||
|
|
||||||
/* Trim busy sections out of found extent */
|
/* Trim busy sections out of found extent */
|
||||||
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
|
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have a largish extent that happens to start before min_agbno,
|
||||||
|
* see if we can shift it into range...
|
||||||
|
*/
|
||||||
|
if (bno < args->min_agbno && bno + len > args->min_agbno) {
|
||||||
|
diff = args->min_agbno - bno;
|
||||||
|
if (len > diff) {
|
||||||
|
bno += diff;
|
||||||
|
len -= diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (args->alignment > 1 && len >= args->minlen) {
|
if (args->alignment > 1 && len >= args->minlen) {
|
||||||
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
|
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
|
||||||
xfs_extlen_t diff = aligned_bno - bno;
|
|
||||||
|
diff = aligned_bno - bno;
|
||||||
|
|
||||||
*resbno = aligned_bno;
|
*resbno = aligned_bno;
|
||||||
*reslen = diff >= len ? 0 : len - diff;
|
*reslen = diff >= len ? 0 : len - diff;
|
||||||
|
@ -795,9 +809,13 @@ xfs_alloc_find_best_extent(
|
||||||
* The good extent is closer than this one.
|
* The good extent is closer than this one.
|
||||||
*/
|
*/
|
||||||
if (!dir) {
|
if (!dir) {
|
||||||
|
if (*sbnoa > args->max_agbno)
|
||||||
|
goto out_use_good;
|
||||||
if (*sbnoa >= args->agbno + gdiff)
|
if (*sbnoa >= args->agbno + gdiff)
|
||||||
goto out_use_good;
|
goto out_use_good;
|
||||||
} else {
|
} else {
|
||||||
|
if (*sbnoa < args->min_agbno)
|
||||||
|
goto out_use_good;
|
||||||
if (*sbnoa <= args->agbno - gdiff)
|
if (*sbnoa <= args->agbno - gdiff)
|
||||||
goto out_use_good;
|
goto out_use_good;
|
||||||
}
|
}
|
||||||
|
@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near(
|
||||||
dofirst = prandom_u32() & 1;
|
dofirst = prandom_u32() & 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* handle unitialized agbno range so caller doesn't have to */
|
||||||
|
if (!args->min_agbno && !args->max_agbno)
|
||||||
|
args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
|
||||||
|
ASSERT(args->min_agbno <= args->max_agbno);
|
||||||
|
|
||||||
|
/* clamp agbno to the range if it's outside */
|
||||||
|
if (args->agbno < args->min_agbno)
|
||||||
|
args->agbno = args->min_agbno;
|
||||||
|
if (args->agbno > args->max_agbno)
|
||||||
|
args->agbno = args->max_agbno;
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
bno_cur_lt = NULL;
|
bno_cur_lt = NULL;
|
||||||
bno_cur_gt = NULL;
|
bno_cur_gt = NULL;
|
||||||
|
@ -976,6 +1005,8 @@ restart:
|
||||||
<bnoa, <lena);
|
<bnoa, <lena);
|
||||||
if (ltlena < args->minlen)
|
if (ltlena < args->minlen)
|
||||||
continue;
|
continue;
|
||||||
|
if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
|
||||||
|
continue;
|
||||||
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
|
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
|
||||||
xfs_alloc_fix_len(args);
|
xfs_alloc_fix_len(args);
|
||||||
ASSERT(args->len >= args->minlen);
|
ASSERT(args->len >= args->minlen);
|
||||||
|
@ -1096,11 +1127,11 @@ restart:
|
||||||
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
|
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
|
||||||
xfs_alloc_compute_aligned(args, ltbno, ltlen,
|
xfs_alloc_compute_aligned(args, ltbno, ltlen,
|
||||||
<bnoa, <lena);
|
<bnoa, <lena);
|
||||||
if (ltlena >= args->minlen)
|
if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
|
||||||
break;
|
break;
|
||||||
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
|
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
|
||||||
goto error0;
|
goto error0;
|
||||||
if (!i) {
|
if (!i || ltbnoa < args->min_agbno) {
|
||||||
xfs_btree_del_cursor(bno_cur_lt,
|
xfs_btree_del_cursor(bno_cur_lt,
|
||||||
XFS_BTREE_NOERROR);
|
XFS_BTREE_NOERROR);
|
||||||
bno_cur_lt = NULL;
|
bno_cur_lt = NULL;
|
||||||
|
@ -1112,11 +1143,11 @@ restart:
|
||||||
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
|
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
|
||||||
xfs_alloc_compute_aligned(args, gtbno, gtlen,
|
xfs_alloc_compute_aligned(args, gtbno, gtlen,
|
||||||
>bnoa, >lena);
|
>bnoa, >lena);
|
||||||
if (gtlena >= args->minlen)
|
if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
|
||||||
break;
|
break;
|
||||||
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
|
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
|
||||||
goto error0;
|
goto error0;
|
||||||
if (!i) {
|
if (!i || gtbnoa > args->max_agbno) {
|
||||||
xfs_btree_del_cursor(bno_cur_gt,
|
xfs_btree_del_cursor(bno_cur_gt,
|
||||||
XFS_BTREE_NOERROR);
|
XFS_BTREE_NOERROR);
|
||||||
bno_cur_gt = NULL;
|
bno_cur_gt = NULL;
|
||||||
|
@ -1216,6 +1247,7 @@ restart:
|
||||||
ASSERT(ltnew >= ltbno);
|
ASSERT(ltnew >= ltbno);
|
||||||
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
|
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
|
||||||
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
|
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
|
||||||
|
ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
|
||||||
args->agbno = ltnew;
|
args->agbno = ltnew;
|
||||||
|
|
||||||
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
|
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
|
||||||
|
|
|
@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
|
||||||
xfs_extlen_t total; /* total blocks needed in xaction */
|
xfs_extlen_t total; /* total blocks needed in xaction */
|
||||||
xfs_extlen_t alignment; /* align answer to multiple of this */
|
xfs_extlen_t alignment; /* align answer to multiple of this */
|
||||||
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
|
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
|
||||||
|
xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
|
||||||
|
xfs_agblock_t max_agbno; /* ... */
|
||||||
xfs_extlen_t len; /* output: actual size of extent */
|
xfs_extlen_t len; /* output: actual size of extent */
|
||||||
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
|
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
|
||||||
xfs_alloctype_t otype; /* original allocation type */
|
xfs_alloctype_t otype; /* original allocation type */
|
||||||
|
|
|
@ -170,7 +170,7 @@ typedef struct xfs_sb {
|
||||||
__uint32_t sb_features_log_incompat;
|
__uint32_t sb_features_log_incompat;
|
||||||
|
|
||||||
__uint32_t sb_crc; /* superblock crc */
|
__uint32_t sb_crc; /* superblock crc */
|
||||||
__uint32_t sb_pad;
|
xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
|
||||||
|
|
||||||
xfs_ino_t sb_pquotino; /* project quota inode */
|
xfs_ino_t sb_pquotino; /* project quota inode */
|
||||||
xfs_lsn_t sb_lsn; /* last write sequence */
|
xfs_lsn_t sb_lsn; /* last write sequence */
|
||||||
|
@ -256,7 +256,7 @@ typedef struct xfs_dsb {
|
||||||
__be32 sb_features_log_incompat;
|
__be32 sb_features_log_incompat;
|
||||||
|
|
||||||
__le32 sb_crc; /* superblock crc */
|
__le32 sb_crc; /* superblock crc */
|
||||||
__be32 sb_pad;
|
__be32 sb_spino_align; /* sparse inode chunk alignment */
|
||||||
|
|
||||||
__be64 sb_pquotino; /* project quota inode */
|
__be64 sb_pquotino; /* project quota inode */
|
||||||
__be64 sb_lsn; /* last write sequence */
|
__be64 sb_lsn; /* last write sequence */
|
||||||
|
@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature(
|
||||||
}
|
}
|
||||||
|
|
||||||
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
|
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
|
||||||
|
#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
|
||||||
#define XFS_SB_FEAT_INCOMPAT_ALL \
|
#define XFS_SB_FEAT_INCOMPAT_ALL \
|
||||||
(XFS_SB_FEAT_INCOMPAT_FTYPE)
|
(XFS_SB_FEAT_INCOMPAT_FTYPE| \
|
||||||
|
XFS_SB_FEAT_INCOMPAT_SPINODES)
|
||||||
|
|
||||||
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
|
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
|
||||||
static inline bool
|
static inline bool
|
||||||
|
@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
|
||||||
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
|
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
|
||||||
|
{
|
||||||
|
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
|
||||||
|
xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* end of superblock version macros
|
* end of superblock version macros
|
||||||
*/
|
*/
|
||||||
|
@ -1216,26 +1224,54 @@ typedef __uint64_t xfs_inofree_t;
|
||||||
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
|
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
|
||||||
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
|
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
|
||||||
|
|
||||||
|
#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
|
||||||
|
#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
|
||||||
|
#define XFS_INODES_PER_HOLEMASK_BIT \
|
||||||
|
(XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
|
||||||
|
|
||||||
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
|
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
|
||||||
{
|
{
|
||||||
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
|
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Data record structure
|
* The on-disk inode record structure has two formats. The original "full"
|
||||||
|
* format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
|
||||||
|
* and replaces the 3 high-order freecount bytes wth the holemask and inode
|
||||||
|
* count.
|
||||||
|
*
|
||||||
|
* The holemask of the sparse record format allows an inode chunk to have holes
|
||||||
|
* that refer to blocks not owned by the inode record. This facilitates inode
|
||||||
|
* allocation in the event of severe free space fragmentation.
|
||||||
*/
|
*/
|
||||||
typedef struct xfs_inobt_rec {
|
typedef struct xfs_inobt_rec {
|
||||||
__be32 ir_startino; /* starting inode number */
|
__be32 ir_startino; /* starting inode number */
|
||||||
__be32 ir_freecount; /* count of free inodes (set bits) */
|
union {
|
||||||
|
struct {
|
||||||
|
__be32 ir_freecount; /* count of free inodes */
|
||||||
|
} f;
|
||||||
|
struct {
|
||||||
|
__be16 ir_holemask;/* hole mask for sparse chunks */
|
||||||
|
__u8 ir_count; /* total inode count */
|
||||||
|
__u8 ir_freecount; /* count of free inodes */
|
||||||
|
} sp;
|
||||||
|
} ir_u;
|
||||||
__be64 ir_free; /* free inode mask */
|
__be64 ir_free; /* free inode mask */
|
||||||
} xfs_inobt_rec_t;
|
} xfs_inobt_rec_t;
|
||||||
|
|
||||||
typedef struct xfs_inobt_rec_incore {
|
typedef struct xfs_inobt_rec_incore {
|
||||||
xfs_agino_t ir_startino; /* starting inode number */
|
xfs_agino_t ir_startino; /* starting inode number */
|
||||||
__int32_t ir_freecount; /* count of free inodes (set bits) */
|
__uint16_t ir_holemask; /* hole mask for sparse chunks */
|
||||||
|
__uint8_t ir_count; /* total inode count */
|
||||||
|
__uint8_t ir_freecount; /* count of free inodes (set bits) */
|
||||||
xfs_inofree_t ir_free; /* free inode mask */
|
xfs_inofree_t ir_free; /* free inode mask */
|
||||||
} xfs_inobt_rec_incore_t;
|
} xfs_inobt_rec_incore_t;
|
||||||
|
|
||||||
|
static inline bool xfs_inobt_issparse(uint16_t holemask)
|
||||||
|
{
|
||||||
|
/* non-zero holemask represents a sparse rec. */
|
||||||
|
return holemask;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Key structure
|
* Key structure
|
||||||
|
|
|
@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
|
||||||
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
|
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
|
||||||
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
|
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
|
||||||
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
|
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
|
||||||
|
#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Minimum and maximum sizes need for growth checks.
|
* Minimum and maximum sizes need for growth checks.
|
||||||
|
|
|
@ -65,6 +65,8 @@ xfs_inobt_lookup(
|
||||||
int *stat) /* success/failure */
|
int *stat) /* success/failure */
|
||||||
{
|
{
|
||||||
cur->bc_rec.i.ir_startino = ino;
|
cur->bc_rec.i.ir_startino = ino;
|
||||||
|
cur->bc_rec.i.ir_holemask = 0;
|
||||||
|
cur->bc_rec.i.ir_count = 0;
|
||||||
cur->bc_rec.i.ir_freecount = 0;
|
cur->bc_rec.i.ir_freecount = 0;
|
||||||
cur->bc_rec.i.ir_free = 0;
|
cur->bc_rec.i.ir_free = 0;
|
||||||
return xfs_btree_lookup(cur, dir, stat);
|
return xfs_btree_lookup(cur, dir, stat);
|
||||||
|
@ -82,7 +84,14 @@ xfs_inobt_update(
|
||||||
union xfs_btree_rec rec;
|
union xfs_btree_rec rec;
|
||||||
|
|
||||||
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
|
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
|
||||||
rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
|
if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
|
||||||
|
rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
|
||||||
|
rec.inobt.ir_u.sp.ir_count = irec->ir_count;
|
||||||
|
rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
|
||||||
|
} else {
|
||||||
|
/* ir_holemask/ir_count not supported on-disk */
|
||||||
|
rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
|
||||||
|
}
|
||||||
rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
|
rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
|
||||||
return xfs_btree_update(cur, &rec);
|
return xfs_btree_update(cur, &rec);
|
||||||
}
|
}
|
||||||
|
@ -100,12 +109,27 @@ xfs_inobt_get_rec(
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = xfs_btree_get_rec(cur, &rec, stat);
|
error = xfs_btree_get_rec(cur, &rec, stat);
|
||||||
if (!error && *stat == 1) {
|
if (error || *stat == 0)
|
||||||
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
|
return error;
|
||||||
irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
|
|
||||||
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
|
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
|
||||||
|
if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
|
||||||
|
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
|
||||||
|
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
|
||||||
|
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* ir_holemask/ir_count not supported on-disk. Fill in hardcoded
|
||||||
|
* values for full inode chunks.
|
||||||
|
*/
|
||||||
|
irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
|
||||||
|
irec->ir_count = XFS_INODES_PER_CHUNK;
|
||||||
|
irec->ir_freecount =
|
||||||
|
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
|
||||||
}
|
}
|
||||||
return error;
|
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -114,10 +138,14 @@ xfs_inobt_get_rec(
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_inobt_insert_rec(
|
xfs_inobt_insert_rec(
|
||||||
struct xfs_btree_cur *cur,
|
struct xfs_btree_cur *cur,
|
||||||
|
__uint16_t holemask,
|
||||||
|
__uint8_t count,
|
||||||
__int32_t freecount,
|
__int32_t freecount,
|
||||||
xfs_inofree_t free,
|
xfs_inofree_t free,
|
||||||
int *stat)
|
int *stat)
|
||||||
{
|
{
|
||||||
|
cur->bc_rec.i.ir_holemask = holemask;
|
||||||
|
cur->bc_rec.i.ir_count = count;
|
||||||
cur->bc_rec.i.ir_freecount = freecount;
|
cur->bc_rec.i.ir_freecount = freecount;
|
||||||
cur->bc_rec.i.ir_free = free;
|
cur->bc_rec.i.ir_free = free;
|
||||||
return xfs_btree_insert(cur, stat);
|
return xfs_btree_insert(cur, stat);
|
||||||
|
@ -154,7 +182,9 @@ xfs_inobt_insert(
|
||||||
}
|
}
|
||||||
ASSERT(i == 0);
|
ASSERT(i == 0);
|
||||||
|
|
||||||
error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
|
error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
|
||||||
|
XFS_INODES_PER_CHUNK,
|
||||||
|
XFS_INODES_PER_CHUNK,
|
||||||
XFS_INOBT_ALL_FREE, &i);
|
XFS_INOBT_ALL_FREE, &i);
|
||||||
if (error) {
|
if (error) {
|
||||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||||
|
@ -220,6 +250,7 @@ xfs_ialloc_inode_init(
|
||||||
struct xfs_mount *mp,
|
struct xfs_mount *mp,
|
||||||
struct xfs_trans *tp,
|
struct xfs_trans *tp,
|
||||||
struct list_head *buffer_list,
|
struct list_head *buffer_list,
|
||||||
|
int icount,
|
||||||
xfs_agnumber_t agno,
|
xfs_agnumber_t agno,
|
||||||
xfs_agblock_t agbno,
|
xfs_agblock_t agbno,
|
||||||
xfs_agblock_t length,
|
xfs_agblock_t length,
|
||||||
|
@ -275,7 +306,7 @@ xfs_ialloc_inode_init(
|
||||||
* they track in the AIL as if they were physically logged.
|
* they track in the AIL as if they were physically logged.
|
||||||
*/
|
*/
|
||||||
if (tp)
|
if (tp)
|
||||||
xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
|
xfs_icreate_log(tp, agno, agbno, icount,
|
||||||
mp->m_sb.sb_inodesize, length, gen);
|
mp->m_sb.sb_inodesize, length, gen);
|
||||||
} else
|
} else
|
||||||
version = 2;
|
version = 2;
|
||||||
|
@ -346,6 +377,214 @@ xfs_ialloc_inode_init(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Align startino and allocmask for a recently allocated sparse chunk such that
|
||||||
|
* they are fit for insertion (or merge) into the on-disk inode btrees.
|
||||||
|
*
|
||||||
|
* Background:
|
||||||
|
*
|
||||||
|
* When enabled, sparse inode support increases the inode alignment from cluster
|
||||||
|
* size to inode chunk size. This means that the minimum range between two
|
||||||
|
* non-adjacent inode records in the inobt is large enough for a full inode
|
||||||
|
* record. This allows for cluster sized, cluster aligned block allocation
|
||||||
|
* without need to worry about whether the resulting inode record overlaps with
|
||||||
|
* another record in the tree. Without this basic rule, we would have to deal
|
||||||
|
* with the consequences of overlap by potentially undoing recent allocations in
|
||||||
|
* the inode allocation codepath.
|
||||||
|
*
|
||||||
|
* Because of this alignment rule (which is enforced on mount), there are two
|
||||||
|
* inobt possibilities for newly allocated sparse chunks. One is that the
|
||||||
|
* aligned inode record for the chunk covers a range of inodes not already
|
||||||
|
* covered in the inobt (i.e., it is safe to insert a new sparse record). The
|
||||||
|
* other is that a record already exists at the aligned startino that considers
|
||||||
|
* the newly allocated range as sparse. In the latter case, record content is
|
||||||
|
* merged in hope that sparse inode chunks fill to full chunks over time.
|
||||||
|
*/
|
||||||
|
STATIC void
|
||||||
|
xfs_align_sparse_ino(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
xfs_agino_t *startino,
|
||||||
|
uint16_t *allocmask)
|
||||||
|
{
|
||||||
|
xfs_agblock_t agbno;
|
||||||
|
xfs_agblock_t mod;
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
|
||||||
|
mod = agbno % mp->m_sb.sb_inoalignmt;
|
||||||
|
if (!mod)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* calculate the inode offset and align startino */
|
||||||
|
offset = mod << mp->m_sb.sb_inopblog;
|
||||||
|
*startino -= offset;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since startino has been aligned down, left shift allocmask such that
|
||||||
|
* it continues to represent the same physical inodes relative to the
|
||||||
|
* new startino.
|
||||||
|
*/
|
||||||
|
*allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether the source inode record can merge into the target. Both
|
||||||
|
* records must be sparse, the inode ranges must match and there must be no
|
||||||
|
* allocation overlap between the records.
|
||||||
|
*/
|
||||||
|
STATIC bool
|
||||||
|
__xfs_inobt_can_merge(
|
||||||
|
struct xfs_inobt_rec_incore *trec, /* tgt record */
|
||||||
|
struct xfs_inobt_rec_incore *srec) /* src record */
|
||||||
|
{
|
||||||
|
uint64_t talloc;
|
||||||
|
uint64_t salloc;
|
||||||
|
|
||||||
|
/* records must cover the same inode range */
|
||||||
|
if (trec->ir_startino != srec->ir_startino)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* both records must be sparse */
|
||||||
|
if (!xfs_inobt_issparse(trec->ir_holemask) ||
|
||||||
|
!xfs_inobt_issparse(srec->ir_holemask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* both records must track some inodes */
|
||||||
|
if (!trec->ir_count || !srec->ir_count)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* can't exceed capacity of a full record */
|
||||||
|
if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* verify there is no allocation overlap */
|
||||||
|
talloc = xfs_inobt_irec_to_allocmask(trec);
|
||||||
|
salloc = xfs_inobt_irec_to_allocmask(srec);
|
||||||
|
if (talloc & salloc)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merge the source inode record into the target. The caller must call
|
||||||
|
* __xfs_inobt_can_merge() to ensure the merge is valid.
|
||||||
|
*/
|
||||||
|
STATIC void
|
||||||
|
__xfs_inobt_rec_merge(
|
||||||
|
struct xfs_inobt_rec_incore *trec, /* target */
|
||||||
|
struct xfs_inobt_rec_incore *srec) /* src */
|
||||||
|
{
|
||||||
|
ASSERT(trec->ir_startino == srec->ir_startino);
|
||||||
|
|
||||||
|
/* combine the counts */
|
||||||
|
trec->ir_count += srec->ir_count;
|
||||||
|
trec->ir_freecount += srec->ir_freecount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merge the holemask and free mask. For both fields, 0 bits refer to
|
||||||
|
* allocated inodes. We combine the allocated ranges with bitwise AND.
|
||||||
|
*/
|
||||||
|
trec->ir_holemask &= srec->ir_holemask;
|
||||||
|
trec->ir_free &= srec->ir_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert a new sparse inode chunk into the associated inode btree. The inode
|
||||||
|
* record for the sparse chunk is pre-aligned to a startino that should match
|
||||||
|
* any pre-existing sparse inode record in the tree. This allows sparse chunks
|
||||||
|
* to fill over time.
|
||||||
|
*
|
||||||
|
* This function supports two modes of handling preexisting records depending on
|
||||||
|
* the merge flag. If merge is true, the provided record is merged with the
|
||||||
|
* existing record and updated in place. The merged record is returned in nrec.
|
||||||
|
* If merge is false, an existing record is replaced with the provided record.
|
||||||
|
* If no preexisting record exists, the provided record is always inserted.
|
||||||
|
*
|
||||||
|
* It is considered corruption if a merge is requested and not possible. Given
|
||||||
|
* the sparse inode alignment constraints, this should never happen.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_inobt_insert_sprec(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_trans *tp,
|
||||||
|
struct xfs_buf *agbp,
|
||||||
|
int btnum,
|
||||||
|
struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
|
||||||
|
bool merge) /* merge or replace */
|
||||||
|
{
|
||||||
|
struct xfs_btree_cur *cur;
|
||||||
|
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||||
|
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
||||||
|
int error;
|
||||||
|
int i;
|
||||||
|
struct xfs_inobt_rec_incore rec;
|
||||||
|
|
||||||
|
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
|
||||||
|
|
||||||
|
/* the new record is pre-aligned so we know where to look */
|
||||||
|
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
/* if nothing there, insert a new record and return */
|
||||||
|
if (i == 0) {
|
||||||
|
error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
|
||||||
|
nrec->ir_count, nrec->ir_freecount,
|
||||||
|
nrec->ir_free, &i);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
|
||||||
|
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A record exists at this startino. Merge or replace the record
|
||||||
|
* depending on what we've been asked to do.
|
||||||
|
*/
|
||||||
|
if (merge) {
|
||||||
|
error = xfs_inobt_get_rec(cur, &rec, &i);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
|
||||||
|
XFS_WANT_CORRUPTED_GOTO(mp,
|
||||||
|
rec.ir_startino == nrec->ir_startino,
|
||||||
|
error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This should never fail. If we have coexisting records that
|
||||||
|
* cannot merge, something is seriously wrong.
|
||||||
|
*/
|
||||||
|
XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
|
||||||
|
error);
|
||||||
|
|
||||||
|
trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
|
||||||
|
rec.ir_holemask, nrec->ir_startino,
|
||||||
|
nrec->ir_holemask);
|
||||||
|
|
||||||
|
/* merge to nrec to output the updated record */
|
||||||
|
__xfs_inobt_rec_merge(nrec, &rec);
|
||||||
|
|
||||||
|
trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
|
||||||
|
nrec->ir_holemask);
|
||||||
|
|
||||||
|
error = xfs_inobt_rec_check_count(mp, nrec);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = xfs_inobt_update(cur, nrec);
|
||||||
|
if (error)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
out:
|
||||||
|
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||||
|
return 0;
|
||||||
|
error:
|
||||||
|
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate new inodes in the allocation group specified by agbp.
|
* Allocate new inodes in the allocation group specified by agbp.
|
||||||
* Return 0 for success, else error code.
|
* Return 0 for success, else error code.
|
||||||
|
@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc(
|
||||||
xfs_agino_t newlen; /* new number of inodes */
|
xfs_agino_t newlen; /* new number of inodes */
|
||||||
int isaligned = 0; /* inode allocation at stripe unit */
|
int isaligned = 0; /* inode allocation at stripe unit */
|
||||||
/* boundary */
|
/* boundary */
|
||||||
|
uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
|
||||||
|
struct xfs_inobt_rec_incore rec;
|
||||||
struct xfs_perag *pag;
|
struct xfs_perag *pag;
|
||||||
|
|
||||||
|
int do_sparse = 0;
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
/* randomly do sparse inode allocations */
|
||||||
|
if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb))
|
||||||
|
do_sparse = prandom_u32() & 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
memset(&args, 0, sizeof(args));
|
memset(&args, 0, sizeof(args));
|
||||||
args.tp = tp;
|
args.tp = tp;
|
||||||
args.mp = tp->t_mountp;
|
args.mp = tp->t_mountp;
|
||||||
|
args.fsbno = NULLFSBLOCK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Locking will ensure that we don't have two callers in here
|
* Locking will ensure that we don't have two callers in here
|
||||||
|
@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc(
|
||||||
agno = be32_to_cpu(agi->agi_seqno);
|
agno = be32_to_cpu(agi->agi_seqno);
|
||||||
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
|
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
|
||||||
args.mp->m_ialloc_blks;
|
args.mp->m_ialloc_blks;
|
||||||
|
if (do_sparse)
|
||||||
|
goto sparse_alloc;
|
||||||
if (likely(newino != NULLAGINO &&
|
if (likely(newino != NULLAGINO &&
|
||||||
(args.agbno < be32_to_cpu(agi->agi_length)))) {
|
(args.agbno < be32_to_cpu(agi->agi_length)))) {
|
||||||
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
|
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
|
||||||
|
@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc(
|
||||||
* subsequent requests.
|
* subsequent requests.
|
||||||
*/
|
*/
|
||||||
args.minalignslop = 0;
|
args.minalignslop = 0;
|
||||||
} else
|
}
|
||||||
args.fsbno = NULLFSBLOCK;
|
|
||||||
|
|
||||||
if (unlikely(args.fsbno == NULLFSBLOCK)) {
|
if (unlikely(args.fsbno == NULLFSBLOCK)) {
|
||||||
/*
|
/*
|
||||||
|
@ -480,6 +731,46 @@ xfs_ialloc_ag_alloc(
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finally, try a sparse allocation if the filesystem supports it and
|
||||||
|
* the sparse allocation length is smaller than a full chunk.
|
||||||
|
*/
|
||||||
|
if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
|
||||||
|
args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
|
||||||
|
args.fsbno == NULLFSBLOCK) {
|
||||||
|
sparse_alloc:
|
||||||
|
args.type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||||
|
args.agbno = be32_to_cpu(agi->agi_root);
|
||||||
|
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
|
||||||
|
args.alignment = args.mp->m_sb.sb_spino_align;
|
||||||
|
args.prod = 1;
|
||||||
|
|
||||||
|
args.minlen = args.mp->m_ialloc_min_blks;
|
||||||
|
args.maxlen = args.minlen;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The inode record will be aligned to full chunk size. We must
|
||||||
|
* prevent sparse allocation from AG boundaries that result in
|
||||||
|
* invalid inode records, such as records that start at agbno 0
|
||||||
|
* or extend beyond the AG.
|
||||||
|
*
|
||||||
|
* Set min agbno to the first aligned, non-zero agbno and max to
|
||||||
|
* the last aligned agbno that is at least one full chunk from
|
||||||
|
* the end of the AG.
|
||||||
|
*/
|
||||||
|
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
|
||||||
|
args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
|
||||||
|
args.mp->m_sb.sb_inoalignmt) -
|
||||||
|
args.mp->m_ialloc_blks;
|
||||||
|
|
||||||
|
error = xfs_alloc_vextent(&args);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
newlen = args.len << args.mp->m_sb.sb_inopblog;
|
||||||
|
allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (args.fsbno == NULLFSBLOCK) {
|
if (args.fsbno == NULLFSBLOCK) {
|
||||||
*alloc = 0;
|
*alloc = 0;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -495,8 +786,8 @@ xfs_ialloc_ag_alloc(
|
||||||
* rather than a linear progression to prevent the next generation
|
* rather than a linear progression to prevent the next generation
|
||||||
* number from being easily guessable.
|
* number from being easily guessable.
|
||||||
*/
|
*/
|
||||||
error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
|
error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
|
||||||
args.len, prandom_u32());
|
args.agbno, args.len, prandom_u32());
|
||||||
|
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
@ -504,6 +795,73 @@ xfs_ialloc_ag_alloc(
|
||||||
* Convert the results.
|
* Convert the results.
|
||||||
*/
|
*/
|
||||||
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
|
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
|
||||||
|
|
||||||
|
if (xfs_inobt_issparse(~allocmask)) {
|
||||||
|
/*
|
||||||
|
* We've allocated a sparse chunk. Align the startino and mask.
|
||||||
|
*/
|
||||||
|
xfs_align_sparse_ino(args.mp, &newino, &allocmask);
|
||||||
|
|
||||||
|
rec.ir_startino = newino;
|
||||||
|
rec.ir_holemask = ~allocmask;
|
||||||
|
rec.ir_count = newlen;
|
||||||
|
rec.ir_freecount = newlen;
|
||||||
|
rec.ir_free = XFS_INOBT_ALL_FREE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert the sparse record into the inobt and allow for a merge
|
||||||
|
* if necessary. If a merge does occur, rec is updated to the
|
||||||
|
* merged record.
|
||||||
|
*/
|
||||||
|
error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
|
||||||
|
&rec, true);
|
||||||
|
if (error == -EFSCORRUPTED) {
|
||||||
|
xfs_alert(args.mp,
|
||||||
|
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
|
||||||
|
XFS_AGINO_TO_INO(args.mp, agno,
|
||||||
|
rec.ir_startino),
|
||||||
|
rec.ir_holemask, rec.ir_count);
|
||||||
|
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
|
||||||
|
}
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can't merge the part we've just allocated as for the inobt
|
||||||
|
* due to finobt semantics. The original record may or may not
|
||||||
|
* exist independent of whether physical inodes exist in this
|
||||||
|
* sparse chunk.
|
||||||
|
*
|
||||||
|
* We must update the finobt record based on the inobt record.
|
||||||
|
* rec contains the fully merged and up to date inobt record
|
||||||
|
* from the previous call. Set merge false to replace any
|
||||||
|
* existing record with this one.
|
||||||
|
*/
|
||||||
|
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
|
||||||
|
error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
|
||||||
|
XFS_BTNUM_FINO, &rec,
|
||||||
|
false);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* full chunk - insert new records to both btrees */
|
||||||
|
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
||||||
|
XFS_BTNUM_INO);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
|
||||||
|
error = xfs_inobt_insert(args.mp, tp, agbp, newino,
|
||||||
|
newlen, XFS_BTNUM_FINO);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update AGI counts and newino.
|
||||||
|
*/
|
||||||
be32_add_cpu(&agi->agi_count, newlen);
|
be32_add_cpu(&agi->agi_count, newlen);
|
||||||
be32_add_cpu(&agi->agi_freecount, newlen);
|
be32_add_cpu(&agi->agi_freecount, newlen);
|
||||||
pag = xfs_perag_get(args.mp, agno);
|
pag = xfs_perag_get(args.mp, agno);
|
||||||
|
@ -511,20 +869,6 @@ xfs_ialloc_ag_alloc(
|
||||||
xfs_perag_put(pag);
|
xfs_perag_put(pag);
|
||||||
agi->agi_newino = cpu_to_be32(newino);
|
agi->agi_newino = cpu_to_be32(newino);
|
||||||
|
|
||||||
/*
|
|
||||||
* Insert records describing the new inode chunk into the btrees.
|
|
||||||
*/
|
|
||||||
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
|
||||||
XFS_BTNUM_INO);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
|
|
||||||
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
|
|
||||||
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
|
||||||
XFS_BTNUM_FINO);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Log allocation group header fields
|
* Log allocation group header fields
|
||||||
*/
|
*/
|
||||||
|
@ -645,7 +989,7 @@ xfs_ialloc_ag_select(
|
||||||
* if we fail allocation due to alignment issues then it is most
|
* if we fail allocation due to alignment issues then it is most
|
||||||
* likely a real ENOSPC condition.
|
* likely a real ENOSPC condition.
|
||||||
*/
|
*/
|
||||||
ineed = mp->m_ialloc_blks;
|
ineed = mp->m_ialloc_min_blks;
|
||||||
if (flags && ineed > 1)
|
if (flags && ineed > 1)
|
||||||
ineed += xfs_ialloc_cluster_alignment(mp);
|
ineed += xfs_ialloc_cluster_alignment(mp);
|
||||||
longest = pag->pagf_longest;
|
longest = pag->pagf_longest;
|
||||||
|
@ -731,6 +1075,27 @@ xfs_ialloc_get_rec(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the offset of the first free inode in the record. If the inode chunk
|
||||||
|
* is sparsely allocated, we convert the record holemask to inode granularity
|
||||||
|
* and mask off the unallocated regions from the inode free mask.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_inobt_first_free_inode(
|
||||||
|
struct xfs_inobt_rec_incore *rec)
|
||||||
|
{
|
||||||
|
xfs_inofree_t realfree;
|
||||||
|
|
||||||
|
/* if there are no holes, return the first available offset */
|
||||||
|
if (!xfs_inobt_issparse(rec->ir_holemask))
|
||||||
|
return xfs_lowbit64(rec->ir_free);
|
||||||
|
|
||||||
|
realfree = xfs_inobt_irec_to_allocmask(rec);
|
||||||
|
realfree &= rec->ir_free;
|
||||||
|
|
||||||
|
return xfs_lowbit64(realfree);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate an inode using the inobt-only algorithm.
|
* Allocate an inode using the inobt-only algorithm.
|
||||||
*/
|
*/
|
||||||
|
@ -961,7 +1326,7 @@ newino:
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc_inode:
|
alloc_inode:
|
||||||
offset = xfs_lowbit64(rec.ir_free);
|
offset = xfs_inobt_first_free_inode(&rec);
|
||||||
ASSERT(offset >= 0);
|
ASSERT(offset >= 0);
|
||||||
ASSERT(offset < XFS_INODES_PER_CHUNK);
|
ASSERT(offset < XFS_INODES_PER_CHUNK);
|
||||||
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
||||||
|
@ -1210,7 +1575,7 @@ xfs_dialloc_ag(
|
||||||
if (error)
|
if (error)
|
||||||
goto error_cur;
|
goto error_cur;
|
||||||
|
|
||||||
offset = xfs_lowbit64(rec.ir_free);
|
offset = xfs_inobt_first_free_inode(&rec);
|
||||||
ASSERT(offset >= 0);
|
ASSERT(offset >= 0);
|
||||||
ASSERT(offset < XFS_INODES_PER_CHUNK);
|
ASSERT(offset < XFS_INODES_PER_CHUNK);
|
||||||
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
||||||
|
@ -1439,6 +1804,83 @@ out_error:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free the blocks of an inode chunk. We must consider that the inode chunk
|
||||||
|
* might be sparse and only free the regions that are allocated as part of the
|
||||||
|
* chunk.
|
||||||
|
*/
|
||||||
|
STATIC void
|
||||||
|
xfs_difree_inode_chunk(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
xfs_agnumber_t agno,
|
||||||
|
struct xfs_inobt_rec_incore *rec,
|
||||||
|
struct xfs_bmap_free *flist)
|
||||||
|
{
|
||||||
|
xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
|
||||||
|
int startidx, endidx;
|
||||||
|
int nextbit;
|
||||||
|
xfs_agblock_t agbno;
|
||||||
|
int contigblk;
|
||||||
|
DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
|
||||||
|
|
||||||
|
if (!xfs_inobt_issparse(rec->ir_holemask)) {
|
||||||
|
/* not sparse, calculate extent info directly */
|
||||||
|
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
|
||||||
|
XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
|
||||||
|
mp->m_ialloc_blks, flist, mp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* holemask is only 16-bits (fits in an unsigned long) */
|
||||||
|
ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
|
||||||
|
holemask[0] = rec->ir_holemask;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find contiguous ranges of zeroes (i.e., allocated regions) in the
|
||||||
|
* holemask and convert the start/end index of each range to an extent.
|
||||||
|
* We start with the start and end index both pointing at the first 0 in
|
||||||
|
* the mask.
|
||||||
|
*/
|
||||||
|
startidx = endidx = find_first_zero_bit(holemask,
|
||||||
|
XFS_INOBT_HOLEMASK_BITS);
|
||||||
|
nextbit = startidx + 1;
|
||||||
|
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
|
||||||
|
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
|
||||||
|
nextbit);
|
||||||
|
/*
|
||||||
|
* If the next zero bit is contiguous, update the end index of
|
||||||
|
* the current range and continue.
|
||||||
|
*/
|
||||||
|
if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
|
||||||
|
nextbit == endidx + 1) {
|
||||||
|
endidx = nextbit;
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* nextbit is not contiguous with the current end index. Convert
|
||||||
|
* the current start/end to an extent and add it to the free
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
|
||||||
|
mp->m_sb.sb_inopblock;
|
||||||
|
contigblk = ((endidx - startidx + 1) *
|
||||||
|
XFS_INODES_PER_HOLEMASK_BIT) /
|
||||||
|
mp->m_sb.sb_inopblock;
|
||||||
|
|
||||||
|
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
|
||||||
|
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
|
||||||
|
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
|
||||||
|
flist, mp);
|
||||||
|
|
||||||
|
/* reset range to current bit and carry on... */
|
||||||
|
startidx = endidx = nextbit;
|
||||||
|
|
||||||
|
next:
|
||||||
|
nextbit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_difree_inobt(
|
xfs_difree_inobt(
|
||||||
struct xfs_mount *mp,
|
struct xfs_mount *mp,
|
||||||
|
@ -1446,8 +1888,7 @@ xfs_difree_inobt(
|
||||||
struct xfs_buf *agbp,
|
struct xfs_buf *agbp,
|
||||||
xfs_agino_t agino,
|
xfs_agino_t agino,
|
||||||
struct xfs_bmap_free *flist,
|
struct xfs_bmap_free *flist,
|
||||||
int *deleted,
|
struct xfs_icluster *xic,
|
||||||
xfs_ino_t *first_ino,
|
|
||||||
struct xfs_inobt_rec_incore *orec)
|
struct xfs_inobt_rec_incore *orec)
|
||||||
{
|
{
|
||||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||||
|
@ -1501,20 +1942,23 @@ xfs_difree_inobt(
|
||||||
rec.ir_freecount++;
|
rec.ir_freecount++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When an inode cluster is free, it becomes eligible for removal
|
* When an inode chunk is free, it becomes eligible for removal. Don't
|
||||||
|
* remove the chunk if the block size is large enough for multiple inode
|
||||||
|
* chunks (that might not be free).
|
||||||
*/
|
*/
|
||||||
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
|
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
|
||||||
(rec.ir_freecount == mp->m_ialloc_inos)) {
|
rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||||
|
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
|
||||||
*deleted = 1;
|
xic->deleted = 1;
|
||||||
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
|
xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
|
||||||
|
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove the inode cluster from the AGI B+Tree, adjust the
|
* Remove the inode cluster from the AGI B+Tree, adjust the
|
||||||
* AGI and Superblock inode counts, and mark the disk space
|
* AGI and Superblock inode counts, and mark the disk space
|
||||||
* to be freed when the transaction is committed.
|
* to be freed when the transaction is committed.
|
||||||
*/
|
*/
|
||||||
ilen = mp->m_ialloc_inos;
|
ilen = rec.ir_freecount;
|
||||||
be32_add_cpu(&agi->agi_count, -ilen);
|
be32_add_cpu(&agi->agi_count, -ilen);
|
||||||
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
|
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
|
||||||
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
|
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
|
||||||
|
@ -1530,11 +1974,9 @@ xfs_difree_inobt(
|
||||||
goto error0;
|
goto error0;
|
||||||
}
|
}
|
||||||
|
|
||||||
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
|
xfs_difree_inode_chunk(mp, agno, &rec, flist);
|
||||||
XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
|
|
||||||
mp->m_ialloc_blks, flist, mp);
|
|
||||||
} else {
|
} else {
|
||||||
*deleted = 0;
|
xic->deleted = 0;
|
||||||
|
|
||||||
error = xfs_inobt_update(cur, &rec);
|
error = xfs_inobt_update(cur, &rec);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -1599,7 +2041,9 @@ xfs_difree_finobt(
|
||||||
*/
|
*/
|
||||||
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
|
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
|
||||||
|
|
||||||
error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
|
error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
|
||||||
|
ibtrec->ir_count,
|
||||||
|
ibtrec->ir_freecount,
|
||||||
ibtrec->ir_free, &i);
|
ibtrec->ir_free, &i);
|
||||||
if (error)
|
if (error)
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -1634,8 +2078,13 @@ xfs_difree_finobt(
|
||||||
* free inode. Hence, if all of the inodes are free and we aren't
|
* free inode. Hence, if all of the inodes are free and we aren't
|
||||||
* keeping inode chunks permanently on disk, remove the record.
|
* keeping inode chunks permanently on disk, remove the record.
|
||||||
* Otherwise, update the record with the new information.
|
* Otherwise, update the record with the new information.
|
||||||
|
*
|
||||||
|
* Note that we currently can't free chunks when the block size is large
|
||||||
|
* enough for multiple chunks. Leave the finobt record to remain in sync
|
||||||
|
* with the inobt.
|
||||||
*/
|
*/
|
||||||
if (rec.ir_freecount == mp->m_ialloc_inos &&
|
if (rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||||
|
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
|
||||||
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
||||||
error = xfs_btree_delete(cur, &i);
|
error = xfs_btree_delete(cur, &i);
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -1671,8 +2120,7 @@ xfs_difree(
|
||||||
struct xfs_trans *tp, /* transaction pointer */
|
struct xfs_trans *tp, /* transaction pointer */
|
||||||
xfs_ino_t inode, /* inode to be freed */
|
xfs_ino_t inode, /* inode to be freed */
|
||||||
struct xfs_bmap_free *flist, /* extents to free */
|
struct xfs_bmap_free *flist, /* extents to free */
|
||||||
int *deleted,/* set if inode cluster was deleted */
|
struct xfs_icluster *xic) /* cluster info if deleted */
|
||||||
xfs_ino_t *first_ino)/* first inode in deleted cluster */
|
|
||||||
{
|
{
|
||||||
/* REFERENCED */
|
/* REFERENCED */
|
||||||
xfs_agblock_t agbno; /* block number containing inode */
|
xfs_agblock_t agbno; /* block number containing inode */
|
||||||
|
@ -1723,8 +2171,7 @@ xfs_difree(
|
||||||
/*
|
/*
|
||||||
* Fix up the inode allocation btree.
|
* Fix up the inode allocation btree.
|
||||||
*/
|
*/
|
||||||
error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
|
error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
|
||||||
&rec);
|
|
||||||
if (error)
|
if (error)
|
||||||
goto error0;
|
goto error0;
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,13 @@ struct xfs_btree_cur;
|
||||||
/* Move inodes in clusters of this size */
|
/* Move inodes in clusters of this size */
|
||||||
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
|
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
|
||||||
|
|
||||||
|
struct xfs_icluster {
|
||||||
|
bool deleted; /* record is deleted */
|
||||||
|
xfs_ino_t first_ino; /* first inode number */
|
||||||
|
uint64_t alloc; /* inode phys. allocation bitmap for
|
||||||
|
* sparse chunks */
|
||||||
|
};
|
||||||
|
|
||||||
/* Calculate and return the number of filesystem blocks per inode cluster */
|
/* Calculate and return the number of filesystem blocks per inode cluster */
|
||||||
static inline int
|
static inline int
|
||||||
xfs_icluster_size_fsb(
|
xfs_icluster_size_fsb(
|
||||||
|
@ -90,8 +97,7 @@ xfs_difree(
|
||||||
struct xfs_trans *tp, /* transaction pointer */
|
struct xfs_trans *tp, /* transaction pointer */
|
||||||
xfs_ino_t inode, /* inode to be freed */
|
xfs_ino_t inode, /* inode to be freed */
|
||||||
struct xfs_bmap_free *flist, /* extents to free */
|
struct xfs_bmap_free *flist, /* extents to free */
|
||||||
int *deleted, /* set if inode cluster was deleted */
|
struct xfs_icluster *ifree); /* cluster info if deleted */
|
||||||
xfs_ino_t *first_ino); /* first inode in deleted cluster */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the location of the inode in imap, for mapping it into a buffer.
|
* Return the location of the inode in imap, for mapping it into a buffer.
|
||||||
|
@ -156,7 +162,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
|
||||||
* Inode chunk initialisation routine
|
* Inode chunk initialisation routine
|
||||||
*/
|
*/
|
||||||
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
|
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
|
||||||
struct list_head *buffer_list,
|
struct list_head *buffer_list, int icount,
|
||||||
xfs_agnumber_t agno, xfs_agblock_t agbno,
|
xfs_agnumber_t agno, xfs_agblock_t agbno,
|
||||||
xfs_agblock_t length, unsigned int gen);
|
xfs_agblock_t length, unsigned int gen);
|
||||||
|
|
||||||
|
|
|
@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur(
|
||||||
union xfs_btree_rec *rec)
|
union xfs_btree_rec *rec)
|
||||||
{
|
{
|
||||||
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
|
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
|
||||||
rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
|
if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
|
||||||
|
rec->inobt.ir_u.sp.ir_holemask =
|
||||||
|
cpu_to_be16(cur->bc_rec.i.ir_holemask);
|
||||||
|
rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
|
||||||
|
rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
|
||||||
|
} else {
|
||||||
|
/* ir_holemask/ir_count not supported on-disk */
|
||||||
|
rec->inobt.ir_u.f.ir_freecount =
|
||||||
|
cpu_to_be32(cur->bc_rec.i.ir_freecount);
|
||||||
|
}
|
||||||
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
|
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,3 +427,85 @@ xfs_inobt_maxrecs(
|
||||||
return blocklen / sizeof(xfs_inobt_rec_t);
|
return blocklen / sizeof(xfs_inobt_rec_t);
|
||||||
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
|
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert the inode record holemask to an inode allocation bitmap. The inode
|
||||||
|
* allocation bitmap is inode granularity and specifies whether an inode is
|
||||||
|
* physically allocated on disk (not whether the inode is considered allocated
|
||||||
|
* or free by the fs).
|
||||||
|
*
|
||||||
|
* A bit value of 1 means the inode is allocated, a value of 0 means it is free.
|
||||||
|
*/
|
||||||
|
uint64_t
|
||||||
|
xfs_inobt_irec_to_allocmask(
|
||||||
|
struct xfs_inobt_rec_incore *rec)
|
||||||
|
{
|
||||||
|
uint64_t bitmap = 0;
|
||||||
|
uint64_t inodespbit;
|
||||||
|
int nextbit;
|
||||||
|
uint allocbitmap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The holemask has 16-bits for a 64 inode record. Therefore each
|
||||||
|
* holemask bit represents multiple inodes. Create a mask of bits to set
|
||||||
|
* in the allocmask for each holemask bit.
|
||||||
|
*/
|
||||||
|
inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocated inodes are represented by 0 bits in holemask. Invert the 0
|
||||||
|
* bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
|
||||||
|
* anything beyond the 16 holemask bits since this casts to a larger
|
||||||
|
* type.
|
||||||
|
*/
|
||||||
|
allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* allocbitmap is the inverted holemask so every set bit represents
|
||||||
|
* allocated inodes. To expand from 16-bit holemask granularity to
|
||||||
|
* 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
|
||||||
|
* bitmap for every holemask bit.
|
||||||
|
*/
|
||||||
|
nextbit = xfs_next_bit(&allocbitmap, 1, 0);
|
||||||
|
while (nextbit != -1) {
|
||||||
|
ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
|
||||||
|
|
||||||
|
bitmap |= (inodespbit <<
|
||||||
|
(nextbit * XFS_INODES_PER_HOLEMASK_BIT));
|
||||||
|
|
||||||
|
nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return bitmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(XFS_WARN)
|
||||||
|
/*
|
||||||
|
* Verify that an in-core inode record has a valid inode count.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xfs_inobt_rec_check_count(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_inobt_rec_incore *rec)
|
||||||
|
{
|
||||||
|
int inocount = 0;
|
||||||
|
int nextbit = 0;
|
||||||
|
uint64_t allocbmap;
|
||||||
|
int wordsz;
|
||||||
|
|
||||||
|
wordsz = sizeof(allocbmap) / sizeof(unsigned int);
|
||||||
|
allocbmap = xfs_inobt_irec_to_allocmask(rec);
|
||||||
|
|
||||||
|
nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
|
||||||
|
while (nextbit != -1) {
|
||||||
|
inocount++;
|
||||||
|
nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
|
||||||
|
nextbit + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inocount != rec->ir_count)
|
||||||
|
return -EFSCORRUPTED;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* DEBUG */
|
||||||
|
|
|
@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
|
||||||
xfs_btnum_t);
|
xfs_btnum_t);
|
||||||
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
|
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
|
||||||
|
|
||||||
|
/* ir_holemask to inode allocation bitmap conversion */
|
||||||
|
uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(XFS_WARN)
|
||||||
|
int xfs_inobt_rec_check_count(struct xfs_mount *,
|
||||||
|
struct xfs_inobt_rec_incore *);
|
||||||
|
#else
|
||||||
|
#define xfs_inobt_rec_check_count(mp, rec) 0
|
||||||
|
#endif /* DEBUG */
|
||||||
|
|
||||||
#endif /* __XFS_IALLOC_BTREE_H__ */
|
#endif /* __XFS_IALLOC_BTREE_H__ */
|
||||||
|
|
|
@ -174,6 +174,27 @@ xfs_mount_validate_sb(
|
||||||
return -EFSCORRUPTED;
|
return -EFSCORRUPTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Full inode chunks must be aligned to inode chunk size when
|
||||||
|
* sparse inodes are enabled to support the sparse chunk
|
||||||
|
* allocation algorithm and prevent overlapping inode records.
|
||||||
|
*/
|
||||||
|
if (xfs_sb_version_hassparseinodes(sbp)) {
|
||||||
|
uint32_t align;
|
||||||
|
|
||||||
|
xfs_alert(mp,
|
||||||
|
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
|
||||||
|
|
||||||
|
align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
|
||||||
|
>> sbp->sb_blocklog;
|
||||||
|
if (sbp->sb_inoalignmt != align) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
|
||||||
|
sbp->sb_inoalignmt, align);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(
|
if (unlikely(
|
||||||
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
|
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
|
||||||
xfs_warn(mp,
|
xfs_warn(mp,
|
||||||
|
@ -374,7 +395,7 @@ __xfs_sb_from_disk(
|
||||||
be32_to_cpu(from->sb_features_log_incompat);
|
be32_to_cpu(from->sb_features_log_incompat);
|
||||||
/* crc is only used on disk, not in memory; just init to 0 here. */
|
/* crc is only used on disk, not in memory; just init to 0 here. */
|
||||||
to->sb_crc = 0;
|
to->sb_crc = 0;
|
||||||
to->sb_pad = 0;
|
to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
|
||||||
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
|
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
|
||||||
to->sb_lsn = be64_to_cpu(from->sb_lsn);
|
to->sb_lsn = be64_to_cpu(from->sb_lsn);
|
||||||
/* Convert on-disk flags to in-memory flags? */
|
/* Convert on-disk flags to in-memory flags? */
|
||||||
|
@ -516,7 +537,7 @@ xfs_sb_to_disk(
|
||||||
cpu_to_be32(from->sb_features_incompat);
|
cpu_to_be32(from->sb_features_incompat);
|
||||||
to->sb_features_log_incompat =
|
to->sb_features_log_incompat =
|
||||||
cpu_to_be32(from->sb_features_log_incompat);
|
cpu_to_be32(from->sb_features_log_incompat);
|
||||||
to->sb_pad = 0;
|
to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
|
||||||
to->sb_lsn = cpu_to_be64(from->sb_lsn);
|
to->sb_lsn = cpu_to_be64(from->sb_lsn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -689,6 +710,11 @@ xfs_sb_mount_common(
|
||||||
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
|
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
|
||||||
sbp->sb_inopblock);
|
sbp->sb_inopblock);
|
||||||
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
|
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
|
||||||
|
|
||||||
|
if (sbp->sb_spino_align)
|
||||||
|
mp->m_ialloc_min_blks = sbp->sb_spino_align;
|
||||||
|
else
|
||||||
|
mp->m_ialloc_min_blks = mp->m_ialloc_blks;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -101,7 +101,9 @@ xfs_fs_geometry(
|
||||||
(xfs_sb_version_hasftype(&mp->m_sb) ?
|
(xfs_sb_version_hasftype(&mp->m_sb) ?
|
||||||
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
|
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
|
||||||
(xfs_sb_version_hasfinobt(&mp->m_sb) ?
|
(xfs_sb_version_hasfinobt(&mp->m_sb) ?
|
||||||
XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
|
XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
|
||||||
|
(xfs_sb_version_hassparseinodes(&mp->m_sb) ?
|
||||||
|
XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
|
||||||
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
|
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
|
||||||
mp->m_sb.sb_logsectsize : BBSIZE;
|
mp->m_sb.sb_logsectsize : BBSIZE;
|
||||||
geo->rtsectsize = mp->m_sb.sb_blocksize;
|
geo->rtsectsize = mp->m_sb.sb_blocksize;
|
||||||
|
|
|
@ -2235,9 +2235,9 @@ xfs_iunlink_remove(
|
||||||
*/
|
*/
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_ifree_cluster(
|
xfs_ifree_cluster(
|
||||||
xfs_inode_t *free_ip,
|
xfs_inode_t *free_ip,
|
||||||
xfs_trans_t *tp,
|
xfs_trans_t *tp,
|
||||||
xfs_ino_t inum)
|
struct xfs_icluster *xic)
|
||||||
{
|
{
|
||||||
xfs_mount_t *mp = free_ip->i_mount;
|
xfs_mount_t *mp = free_ip->i_mount;
|
||||||
int blks_per_cluster;
|
int blks_per_cluster;
|
||||||
|
@ -2250,13 +2250,26 @@ xfs_ifree_cluster(
|
||||||
xfs_inode_log_item_t *iip;
|
xfs_inode_log_item_t *iip;
|
||||||
xfs_log_item_t *lip;
|
xfs_log_item_t *lip;
|
||||||
struct xfs_perag *pag;
|
struct xfs_perag *pag;
|
||||||
|
xfs_ino_t inum;
|
||||||
|
|
||||||
|
inum = xic->first_ino;
|
||||||
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
|
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
|
||||||
blks_per_cluster = xfs_icluster_size_fsb(mp);
|
blks_per_cluster = xfs_icluster_size_fsb(mp);
|
||||||
inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
|
inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
|
||||||
nbufs = mp->m_ialloc_blks / blks_per_cluster;
|
nbufs = mp->m_ialloc_blks / blks_per_cluster;
|
||||||
|
|
||||||
for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
|
for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
|
||||||
|
/*
|
||||||
|
* The allocation bitmap tells us which inodes of the chunk were
|
||||||
|
* physically allocated. Skip the cluster if an inode falls into
|
||||||
|
* a sparse region.
|
||||||
|
*/
|
||||||
|
if ((xic->alloc & XFS_INOBT_MASK(inum - xic->first_ino)) == 0) {
|
||||||
|
ASSERT(((inum - xic->first_ino) %
|
||||||
|
inodes_per_cluster) == 0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
|
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
|
||||||
XFS_INO_TO_AGBNO(mp, inum));
|
XFS_INO_TO_AGBNO(mp, inum));
|
||||||
|
|
||||||
|
@ -2414,8 +2427,7 @@ xfs_ifree(
|
||||||
xfs_bmap_free_t *flist)
|
xfs_bmap_free_t *flist)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
int delete;
|
struct xfs_icluster xic = { 0 };
|
||||||
xfs_ino_t first_ino;
|
|
||||||
|
|
||||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||||
ASSERT(ip->i_d.di_nlink == 0);
|
ASSERT(ip->i_d.di_nlink == 0);
|
||||||
|
@ -2431,7 +2443,7 @@ xfs_ifree(
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
|
error = xfs_difree(tp, ip->i_ino, flist, &xic);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
|
@ -2448,8 +2460,8 @@ xfs_ifree(
|
||||||
ip->i_d.di_gen++;
|
ip->i_d.di_gen++;
|
||||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||||
|
|
||||||
if (delete)
|
if (xic.deleted)
|
||||||
error = xfs_ifree_cluster(ip, tp, first_ino);
|
error = xfs_ifree_cluster(ip, tp, &xic);
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
|
@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk(
|
||||||
}
|
}
|
||||||
|
|
||||||
irec->ir_free |= xfs_inobt_maskn(0, idx);
|
irec->ir_free |= xfs_inobt_maskn(0, idx);
|
||||||
*icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
|
*icount = irec->ir_count - irec->ir_freecount;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -415,6 +415,8 @@ xfs_bulkstat(
|
||||||
goto del_cursor;
|
goto del_cursor;
|
||||||
if (icount) {
|
if (icount) {
|
||||||
irbp->ir_startino = r.ir_startino;
|
irbp->ir_startino = r.ir_startino;
|
||||||
|
irbp->ir_holemask = r.ir_holemask;
|
||||||
|
irbp->ir_count = r.ir_count;
|
||||||
irbp->ir_freecount = r.ir_freecount;
|
irbp->ir_freecount = r.ir_freecount;
|
||||||
irbp->ir_free = r.ir_free;
|
irbp->ir_free = r.ir_free;
|
||||||
irbp++;
|
irbp++;
|
||||||
|
@ -447,13 +449,15 @@ xfs_bulkstat(
|
||||||
* If this chunk has any allocated inodes, save it.
|
* If this chunk has any allocated inodes, save it.
|
||||||
* Also start read-ahead now for this chunk.
|
* Also start read-ahead now for this chunk.
|
||||||
*/
|
*/
|
||||||
if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
|
if (r.ir_freecount < r.ir_count) {
|
||||||
xfs_bulkstat_ichunk_ra(mp, agno, &r);
|
xfs_bulkstat_ichunk_ra(mp, agno, &r);
|
||||||
irbp->ir_startino = r.ir_startino;
|
irbp->ir_startino = r.ir_startino;
|
||||||
|
irbp->ir_holemask = r.ir_holemask;
|
||||||
|
irbp->ir_count = r.ir_count;
|
||||||
irbp->ir_freecount = r.ir_freecount;
|
irbp->ir_freecount = r.ir_freecount;
|
||||||
irbp->ir_free = r.ir_free;
|
irbp->ir_free = r.ir_free;
|
||||||
irbp++;
|
irbp++;
|
||||||
icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
|
icount += r.ir_count - r.ir_freecount;
|
||||||
}
|
}
|
||||||
error = xfs_btree_increment(cur, 0, &stat);
|
error = xfs_btree_increment(cur, 0, &stat);
|
||||||
if (error || stat == 0) {
|
if (error || stat == 0) {
|
||||||
|
@ -599,8 +603,7 @@ xfs_inumbers(
|
||||||
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
|
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
|
||||||
buffer[bufidx].xi_startino =
|
buffer[bufidx].xi_startino =
|
||||||
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
|
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
|
||||||
buffer[bufidx].xi_alloccount =
|
buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
|
||||||
XFS_INODES_PER_CHUNK - r.ir_freecount;
|
|
||||||
buffer[bufidx].xi_allocmask = ~r.ir_free;
|
buffer[bufidx].xi_allocmask = ~r.ir_free;
|
||||||
if (++bufidx == bcount) {
|
if (++bufidx == bcount) {
|
||||||
long written;
|
long written;
|
||||||
|
|
|
@ -3068,12 +3068,22 @@ xlog_recover_do_icreate_pass2(
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* existing allocation is fixed value */
|
/*
|
||||||
ASSERT(count == mp->m_ialloc_inos);
|
* The inode chunk is either full or sparse and we only support
|
||||||
ASSERT(length == mp->m_ialloc_blks);
|
* m_ialloc_min_blks sized sparse allocations at this time.
|
||||||
if (count != mp->m_ialloc_inos ||
|
*/
|
||||||
length != mp->m_ialloc_blks) {
|
if (length != mp->m_ialloc_blks &&
|
||||||
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
|
length != mp->m_ialloc_min_blks) {
|
||||||
|
xfs_warn(log->l_mp,
|
||||||
|
"%s: unsupported chunk length", __FUNCTION__);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* verify inode count is consistent with extent length */
|
||||||
|
if ((count >> mp->m_sb.sb_inopblog) != length) {
|
||||||
|
xfs_warn(log->l_mp,
|
||||||
|
"%s: inconsistent inode count and chunk length",
|
||||||
|
__FUNCTION__);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3091,8 +3101,8 @@ xlog_recover_do_icreate_pass2(
|
||||||
XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
|
XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
|
xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
|
||||||
be32_to_cpu(icl->icl_gen));
|
be32_to_cpu(icl->icl_gen));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -724,6 +724,22 @@ xfs_mountfs(
|
||||||
mp->m_inode_cluster_size = new_size;
|
mp->m_inode_cluster_size = new_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If enabled, sparse inode chunk alignment is expected to match the
|
||||||
|
* cluster size. Full inode chunk alignment must match the chunk size,
|
||||||
|
* but that is checked on sb read verification...
|
||||||
|
*/
|
||||||
|
if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
|
||||||
|
mp->m_sb.sb_spino_align !=
|
||||||
|
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"Sparse inode block alignment (%u) must match cluster size (%llu).",
|
||||||
|
mp->m_sb.sb_spino_align,
|
||||||
|
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out_remove_uuid;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set inode alignment fields
|
* Set inode alignment fields
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -101,6 +101,8 @@ typedef struct xfs_mount {
|
||||||
__uint64_t m_flags; /* global mount flags */
|
__uint64_t m_flags; /* global mount flags */
|
||||||
int m_ialloc_inos; /* inodes in inode allocation */
|
int m_ialloc_inos; /* inodes in inode allocation */
|
||||||
int m_ialloc_blks; /* blocks in inode allocation */
|
int m_ialloc_blks; /* blocks in inode allocation */
|
||||||
|
int m_ialloc_min_blks;/* min blocks in sparse inode
|
||||||
|
* allocation */
|
||||||
int m_inoalign_mask;/* mask sb_inoalignmt if used */
|
int m_inoalign_mask;/* mask sb_inoalignmt if used */
|
||||||
uint m_qflags; /* quota status flags */
|
uint m_qflags; /* quota status flags */
|
||||||
struct xfs_trans_resv m_resv; /* precomputed res values */
|
struct xfs_trans_resv m_resv; /* precomputed res values */
|
||||||
|
|
|
@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
|
||||||
__entry->blocks, __entry->shift, __entry->writeio_blocks)
|
__entry->blocks, __entry->shift, __entry->writeio_blocks)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_irec_merge_pre,
|
||||||
|
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
|
||||||
|
uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
|
||||||
|
TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(xfs_agnumber_t, agno)
|
||||||
|
__field(xfs_agino_t, agino)
|
||||||
|
__field(uint16_t, holemask)
|
||||||
|
__field(xfs_agino_t, nagino)
|
||||||
|
__field(uint16_t, nholemask)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = mp->m_super->s_dev;
|
||||||
|
__entry->agno = agno;
|
||||||
|
__entry->agino = agino;
|
||||||
|
__entry->holemask = holemask;
|
||||||
|
__entry->nagino = nagino;
|
||||||
|
__entry->nholemask = holemask;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
|
||||||
|
__entry->agino, __entry->holemask, __entry->nagino,
|
||||||
|
__entry->nholemask)
|
||||||
|
)
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_irec_merge_post,
|
||||||
|
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
|
||||||
|
uint16_t holemask),
|
||||||
|
TP_ARGS(mp, agno, agino, holemask),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(xfs_agnumber_t, agno)
|
||||||
|
__field(xfs_agino_t, agino)
|
||||||
|
__field(uint16_t, holemask)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = mp->m_super->s_dev;
|
||||||
|
__entry->agno = agno;
|
||||||
|
__entry->agino = agino;
|
||||||
|
__entry->holemask = holemask;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
|
||||||
|
MINOR(__entry->dev), __entry->agno, __entry->agino,
|
||||||
|
__entry->holemask)
|
||||||
|
)
|
||||||
|
|
||||||
#define DEFINE_IREF_EVENT(name) \
|
#define DEFINE_IREF_EVENT(name) \
|
||||||
DEFINE_EVENT(xfs_iref_class, name, \
|
DEFINE_EVENT(xfs_iref_class, name, \
|
||||||
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
|
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
|
||||||
|
|
Loading…
Add table
Reference in a new issue