xfs: add missing ilock around dio write last extent alignment
The iomap codepath (via get_blocks()) acquires and release the inode lock in the case of a direct write that requires block allocation. This is because xfs_iomap_write_direct() allocates a transaction, which means the ilock must be dropped and reacquired after the transaction is allocated and reserved. xfs_iomap_write_direct() invokes xfs_iomap_eof_align_last_fsb() before the transaction is created and thus before the ilock is reacquired. This can lead to calls to xfs_iread_extents() and reads of the in-core extent list without any synchronization (via xfs_bmap_eof() and xfs_bmap_last_extent()). xfs_iread_extents() assert fails if the ilock is not held, but this is not currently seen in practice as the current callers had already invoked xfs_bmapi_read(). What has been seen in practice are reports of crashes down in the xfs_bmap_eof() codepath on direct writes due to seemingly bogus pointer references from xfs_iext_get_ext(). While an explicit reproducer is not currently available to confirm the cause of the problem, crash analysis and code inspection from David Jeffrey had identified the insufficient locking. xfs_iomap_eof_align_last_fsb() is called from other contexts with the inode lock already held, so we cannot acquire it therein. __xfs_get_blocks() acquires and drops the ilock with variable flags to cover the event that the extent list must be read in. The common case is that __xfs_get_blocks() acquires the shared ilock. To provide locking around the last extent alignment call without adding more lock cycles to the dio path, update xfs_iomap_write_direct() to expect the shared ilock held on entry and do the extent alignment under its protection. Demote the lock, if necessary, from __xfs_get_blocks() and push the xfs_qm_dqattach() call outside of the shared lock critical section. Also, add an assert to document that the extent list is always expected to be present in this path. Otherwise, we risk a call to xfs_iread_extents() while under the shared ilock. This is safe as all current callers have executed an xfs_bmapi_read() call under the current iolock context. Reported-by: David Jeffery <djeffery@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
5cb13dcd0f
commit
009c6e871e
3 changed files with 36 additions and 12 deletions
|
@ -1408,12 +1408,12 @@ __xfs_get_blocks(
|
||||||
imap.br_startblock == DELAYSTARTBLOCK))) {
|
imap.br_startblock == DELAYSTARTBLOCK))) {
|
||||||
if (direct || xfs_get_extsz_hint(ip)) {
|
if (direct || xfs_get_extsz_hint(ip)) {
|
||||||
/*
|
/*
|
||||||
* Drop the ilock in preparation for starting the block
|
* xfs_iomap_write_direct() expects the shared lock. It
|
||||||
* allocation transaction. It will be retaken
|
* is unlocked on return.
|
||||||
* exclusively inside xfs_iomap_write_direct for the
|
|
||||||
* actual allocation.
|
|
||||||
*/
|
*/
|
||||||
xfs_iunlock(ip, lockmode);
|
if (lockmode == XFS_ILOCK_EXCL)
|
||||||
|
xfs_ilock_demote(ip, lockmode);
|
||||||
|
|
||||||
error = xfs_iomap_write_direct(ip, offset, size,
|
error = xfs_iomap_write_direct(ip, offset, size,
|
||||||
&imap, nimaps);
|
&imap, nimaps);
|
||||||
if (error)
|
if (error)
|
||||||
|
|
|
@ -131,20 +131,29 @@ xfs_iomap_write_direct(
|
||||||
uint qblocks, resblks, resrtextents;
|
uint qblocks, resblks, resrtextents;
|
||||||
int committed;
|
int committed;
|
||||||
int error;
|
int error;
|
||||||
|
int lockmode;
|
||||||
error = xfs_qm_dqattach(ip, 0);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
|
|
||||||
rt = XFS_IS_REALTIME_INODE(ip);
|
rt = XFS_IS_REALTIME_INODE(ip);
|
||||||
extsz = xfs_get_extsz_hint(ip);
|
extsz = xfs_get_extsz_hint(ip);
|
||||||
|
lockmode = XFS_ILOCK_SHARED; /* locked by caller */
|
||||||
|
|
||||||
|
ASSERT(xfs_isilocked(ip, lockmode));
|
||||||
|
|
||||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
|
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
|
||||||
if ((offset + count) > XFS_ISIZE(ip)) {
|
if ((offset + count) > XFS_ISIZE(ip)) {
|
||||||
|
/*
|
||||||
|
* Assert that the in-core extent list is present since this can
|
||||||
|
* call xfs_iread_extents() and we only have the ilock shared.
|
||||||
|
* This should be safe because the lock was held around a bmapi
|
||||||
|
* call in the caller and we only need it to access the in-core
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
|
||||||
|
XFS_IFEXTENTS);
|
||||||
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
|
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
goto out_unlock;
|
||||||
} else {
|
} else {
|
||||||
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
|
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
|
||||||
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
|
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
|
||||||
|
@ -173,6 +182,15 @@ xfs_iomap_write_direct(
|
||||||
quota_flag = XFS_QMOPT_RES_REGBLKS;
|
quota_flag = XFS_QMOPT_RES_REGBLKS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop the shared lock acquired by the caller, attach the dquot if
|
||||||
|
* necessary and move on to transaction setup.
|
||||||
|
*/
|
||||||
|
xfs_iunlock(ip, lockmode);
|
||||||
|
error = xfs_qm_dqattach(ip, 0);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and setup the transaction
|
* Allocate and setup the transaction
|
||||||
*/
|
*/
|
||||||
|
@ -187,7 +205,8 @@ xfs_iomap_write_direct(
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
lockmode = XFS_ILOCK_EXCL;
|
||||||
|
xfs_ilock(ip, lockmode);
|
||||||
|
|
||||||
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
|
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -229,7 +248,7 @@ xfs_iomap_write_direct(
|
||||||
error = xfs_alert_fsblock_zero(ip, imap);
|
error = xfs_alert_fsblock_zero(ip, imap);
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
xfs_iunlock(ip, lockmode);
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
out_bmap_cancel:
|
out_bmap_cancel:
|
||||||
|
|
|
@ -181,6 +181,11 @@ xfs_fs_map_blocks(
|
||||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||||
|
|
||||||
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
|
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
|
||||||
|
/*
|
||||||
|
* xfs_iomap_write_direct() expects to take ownership of
|
||||||
|
* the shared ilock.
|
||||||
|
*/
|
||||||
|
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||||
error = xfs_iomap_write_direct(ip, offset, length,
|
error = xfs_iomap_write_direct(ip, offset, length,
|
||||||
&imap, nimaps);
|
&imap, nimaps);
|
||||||
if (error)
|
if (error)
|
||||||
|
|
Loading…
Add table
Reference in a new issue