|
View:
New views
4 Messages
—
Rating Filter:
Alert me
|
|
|
[PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocksWhen xfs_free_eofblocks is called from ->release the VM might already
hold the mmap_sem, but in the write path we take the iolock before taking the mmap_sem in the generic write code. Switch xfs_free_eofblocks to only trylock the iolock if called from ->release and skip trimming the prellocated blocks in that case. We'll still free them later on the final iput. Signed-off-by: Christoph Hellwig <hch@...> Index: xfs/fs/xfs/xfs_rw.h =================================================================== --- xfs.orig/fs/xfs/xfs_rw.h 2009-10-14 17:30:02.396028076 +0200 +++ xfs/fs/xfs/xfs_rw.h 2009-10-14 17:30:20.472287472 +0200 @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_ } /* - * Flags for xfs_free_eofblocks - */ -#define XFS_FREE_EOF_LOCK (1<<0) -#define XFS_FREE_EOF_NOLOCK (1<<1) - - -/* * helper function to extract extent size hint from inode */ STATIC_INLINE xfs_extlen_t Index: xfs/fs/xfs/xfs_vnodeops.c =================================================================== --- xfs.orig/fs/xfs/xfs_vnodeops.c 2009-10-14 17:30:13.363024201 +0200 +++ xfs/fs/xfs/xfs_vnodeops.c 2009-10-14 17:35:39.314256388 +0200 @@ -709,6 +709,11 @@ xfs_fsync( } /* + * Flags for xfs_free_eofblocks + */ +#define XFS_FREE_EOF_TRYLOCK (1<<0) + +/* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. @@ -726,7 +731,6 @@ xfs_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; - int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end @@ -768,14 +772,19 @@ xfs_free_eofblocks( * cache and we can't * do that within a transaction. */ - if (use_iolock) + if (flags & XFS_FREE_EOF_TRYLOCK) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + xfs_trans_cancel(tp, 0); + return 0; + } + } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); + } error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size); if (error) { xfs_trans_cancel(tp, 0); - if (use_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -812,8 +821,7 @@ xfs_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) - : XFS_ILOCK_EXCL)); + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); } return error; } @@ -1113,7 +1121,17 @@ xfs_release( (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + + /* + * If we can't get the iolock just skip truncating + * the blocks past EOF because we could deadlock + * with the mmap_sem otherwise. We'll get another + * chance to drop them once the last reference to + * the inode is dropped, so we'll never leak blocks + * permanently. + */ + error = xfs_free_eofblocks(mp, ip, + XFS_FREE_EOF_TRYLOCK); if (error) return error; } @@ -1184,7 +1202,7 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + error = xfs_free_eofblocks(mp, ip, 0); if (error) return VN_INACTIVE_CACHE; } _______________________________________________ xfs mailing list xfs@... http://oss.sgi.com/mailman/listinfo/xfs |
|
|
Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocksping?
On Mon, Oct 19, 2009 at 12:03:46AM -0400, Christoph Hellwig wrote: > When xfs_free_eofblocks is called from ->release the VM might already > hold the mmap_sem, but in the write path we take the iolock before > taking the mmap_sem in the generic write code. > > Switch xfs_free_eofblocks to only trylock the iolock if called from ->release > and skip trimming the prellocated blocks in that case. We'll still free > them later on the final iput. > > Signed-off-by: Christoph Hellwig <hch@...> > > Index: xfs/fs/xfs/xfs_rw.h > =================================================================== > --- xfs.orig/fs/xfs/xfs_rw.h 2009-10-14 17:30:02.396028076 +0200 > +++ xfs/fs/xfs/xfs_rw.h 2009-10-14 17:30:20.472287472 +0200 > @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_ > } > > /* > - * Flags for xfs_free_eofblocks > - */ > -#define XFS_FREE_EOF_LOCK (1<<0) > -#define XFS_FREE_EOF_NOLOCK (1<<1) > - > - > -/* > * helper function to extract extent size hint from inode > */ > STATIC_INLINE xfs_extlen_t > Index: xfs/fs/xfs/xfs_vnodeops.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_vnodeops.c 2009-10-14 17:30:13.363024201 +0200 > +++ xfs/fs/xfs/xfs_vnodeops.c 2009-10-14 17:35:39.314256388 +0200 > @@ -709,6 +709,11 @@ xfs_fsync( > } > > /* > + * Flags for xfs_free_eofblocks > + */ > +#define XFS_FREE_EOF_TRYLOCK (1<<0) > + > +/* > * This is called by xfs_inactive to free any blocks beyond eof > * when the link count isn't zero and by xfs_dm_punch_hole() when > * punching a hole to EOF. > @@ -726,7 +731,6 @@ xfs_free_eofblocks( > xfs_filblks_t map_len; > int nimaps; > xfs_bmbt_irec_t imap; > - int use_iolock = (flags & XFS_FREE_EOF_LOCK); > > /* > * Figure out if there are any blocks beyond the end > @@ -768,14 +772,19 @@ xfs_free_eofblocks( > * cache and we can't > * do that within a transaction. > */ > - if (use_iolock) > + if (flags & XFS_FREE_EOF_TRYLOCK) { > + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { > + xfs_trans_cancel(tp, 0); > + return 0; > + } > + } else { > xfs_ilock(ip, XFS_IOLOCK_EXCL); > + } > error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, > ip->i_size); > if (error) { > xfs_trans_cancel(tp, 0); > - if (use_iolock) > - xfs_iunlock(ip, XFS_IOLOCK_EXCL); > + xfs_iunlock(ip, XFS_IOLOCK_EXCL); > return error; > } > > @@ -812,8 +821,7 @@ xfs_free_eofblocks( > error = xfs_trans_commit(tp, > XFS_TRANS_RELEASE_LOG_RES); > } > - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) > - : XFS_ILOCK_EXCL)); > + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); > } > return error; > } > @@ -1113,7 +1121,17 @@ xfs_release( > (ip->i_df.if_flags & XFS_IFEXTENTS)) && > (!(ip->i_d.di_flags & > (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { > - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); > + > + /* > + * If we can't get the iolock just skip truncating > + * the blocks past EOF because we could deadlock > + * with the mmap_sem otherwise. We'll get another > + * chance to drop them once the last reference to > + * the inode is dropped, so we'll never leak blocks > + * permanently. > + */ > + error = xfs_free_eofblocks(mp, ip, > + XFS_FREE_EOF_TRYLOCK); > if (error) > return error; > } > @@ -1184,7 +1202,7 @@ xfs_inactive( > (!(ip->i_d.di_flags & > (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || > (ip->i_delayed_blks != 0)))) { > - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); > + error = xfs_free_eofblocks(mp, ip, 0); > if (error) > return VN_INACTIVE_CACHE; > } > > _______________________________________________ > xfs mailing list > xfs@... > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@... http://oss.sgi.com/mailman/listinfo/xfs |
|
|
RE: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocksChristoph Hellwig wrote:
> When xfs_free_eofblocks is called from ->release the VM might already > hold the mmap_sem, but in the write path we take the iolock before > taking the mmap_sem in the generic write code. > > Switch xfs_free_eofblocks to only trylock the iolock if called from ->release > and skip trimming the prellocated blocks in that case. We'll still free > them later on the final iput. > > Signed-off-by: Christoph Hellwig <hch@...> I have a minor suggestion below, but it looks correct to me. I tried to get a better idea of what the conditions were where mmap_sem would be held by VM when ->release gets called, but didn't get to the bottom of that. If it is easily characterized you could mention it in comments. Reviewed-by: Alex Elder <aelder@...> > Index: xfs/fs/xfs/xfs_rw.h > =================================================================== > --- xfs.orig/fs/xfs/xfs_rw.h 2009-10-14 17:30:02.396028076 +0200 > +++ xfs/fs/xfs/xfs_rw.h 2009-10-14 17:30:20.472287472 +0200 > @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_ > } > > /* > - * Flags for xfs_free_eofblocks > - */ > -#define XFS_FREE_EOF_LOCK (1<<0) > -#define XFS_FREE_EOF_NOLOCK (1<<1) > - > - > -/* > * helper function to extract extent size hint from inode > */ > STATIC_INLINE xfs_extlen_t > Index: xfs/fs/xfs/xfs_vnodeops.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_vnodeops.c 2009-10-14 17:30:13.363024201 +0200 > +++ xfs/fs/xfs/xfs_vnodeops.c 2009-10-14 17:35:39.314256388 +0200 > @@ -709,6 +709,11 @@ xfs_fsync( > } > > /* > + * Flags for xfs_free_eofblocks > + */ > +#define XFS_FREE_EOF_TRYLOCK (1<<0) This is really a Boolean in the place it's used, rather than a flags mask. Unless you have plans to add other flags, maybe just don't bother defining this, and pass a zero/non-zero for a renamed argument to xfs_free_eofblocks(). (I mention this again below, in that context.) > +/* > * This is called by xfs_inactive to free any blocks beyond eof > * when the link count isn't zero and by xfs_dm_punch_hole() when > * punching a hole to EOF. > @@ -726,7 +731,6 @@ xfs_free_eofblocks( > xfs_filblks_t map_len; > int nimaps; > xfs_bmbt_irec_t imap; > - int use_iolock = (flags & XFS_FREE_EOF_LOCK); > > /* > * Figure out if there are any blocks beyond the end > @@ -768,14 +772,19 @@ xfs_free_eofblocks( > * cache and we can't > * do that within a transaction. > */ > - if (use_iolock) > + if (flags & XFS_FREE_EOF_TRYLOCK) { If you rename "flag" to "skip_ok" (or maybe "try_lock": if (skip_ok && ! xfs_ilock_nowait(...)) { xfs_trans_cancel(); return 0; } else xfs_ilock(...); if (try_lock) > + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { > + xfs_trans_cancel(tp, 0); > + return 0; > + } > + } else { > xfs_ilock(ip, XFS_IOLOCK_EXCL); > + } > error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, > ip->i_size); > if (error) { > xfs_trans_cancel(tp, 0); > - if (use_iolock) > - xfs_iunlock(ip, XFS_IOLOCK_EXCL); > + xfs_iunlock(ip, XFS_IOLOCK_EXCL); > return error; > } > > @@ -812,8 +821,7 @@ xfs_free_eofblocks( > error = xfs_trans_commit(tp, > XFS_TRANS_RELEASE_LOG_RES); > } > - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) > - : XFS_ILOCK_EXCL)); > + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); > } > return error; > } > @@ -1113,7 +1121,17 @@ xfs_release( > (ip->i_df.if_flags & XFS_IFEXTENTS)) && > (!(ip->i_d.di_flags & > (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { > - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); > + > + /* > + * If we can't get the iolock just skip truncating > + * the blocks past EOF because we could deadlock > + * with the mmap_sem otherwise. We'll get another > + * chance to drop them once the last reference to > + * the inode is dropped, so we'll never leak blocks > + * permanently. > + */ > + error = xfs_free_eofblocks(mp, ip, > + XFS_FREE_EOF_TRYLOCK); > if (error) > return error; > } > @@ -1184,7 +1202,7 @@ xfs_inactive( > (!(ip->i_d.di_flags & > (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || > (ip->i_delayed_blks != 0)))) { > - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); > + error = xfs_free_eofblocks(mp, ip, 0); > if (error) > return VN_INACTIVE_CACHE; > } > > _______________________________________________ > xfs mailing list > xfs@... > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@... http://oss.sgi.com/mailman/listinfo/xfs |
|
|
Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocksOn Mon, Nov 02, 2009 at 03:10:33PM -0600, Alex Elder wrote:
> I have a minor suggestion below, but it looks correct to me. > I tried to get a better idea of what the conditions were > where mmap_sem would be held by VM when ->release gets > called, but didn't get to the bottom of that. If it is > easily characterized you could mention it in comments. It's the VMA merging code. But IMHO that's too much of an implementation detail to put into the comment here. Commit message might be fine. _______________________________________________ xfs mailing list xfs@... http://oss.sgi.com/mailman/listinfo/xfs |
| Free embeddable forum powered by Nabble | Forum Help |