|
View:
New views
6 Messages
—
Rating Filter:
Alert me
|
|
|
vget() patchHi,
unless someone objects I'll commit the attached patch and request pullup to netbsd-5 next week-end. This is he same as the one posted in the "panic due to lost vnode flag" thread. Althoug it doesn't fix this special issue, it seems to fix kern/41147 in a better way, and I also hope kern/42205. I've been running with this patch on several servers (and the previous fix for kern/41147 commented out as in the patch below) without problems; this is also being tested by others. -- Manuel Bouyer, LIP6, Universite Paris VI. Manuel.Bouyer@... NetBSD: 26 ans d'experience feront toujours la difference -- Index: sys/ufs/ufs/ufs_ihash.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_ihash.c,v retrieving revision 1.26.10.1 diff -u -p -u -r1.26.10.1 ufs_ihash.c --- sys/ufs/ufs/ufs_ihash.c 28 Sep 2009 01:43:02 -0000 1.26.10.1 +++ sys/ufs/ufs/ufs_ihash.c 24 Oct 2009 13:33:23 -0000 @@ -152,6 +152,7 @@ ufs_ihashget(dev_t dev, ino_t inum, int mutex_exit(&ufs_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; +#if 0 if (VTOI(vp) != ip || ip->i_number != inum || ip->i_dev != dev) { /* lost race against vclean() */ @@ -161,6 +162,7 @@ ufs_ihashget(dev_t dev, ino_t inum, int vp = NULL; goto loop; } +#endif } return (vp); } Index: sys/kern/vfs_subr.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v retrieving revision 1.357.4.5 diff -u -p -u -r1.357.4.5 vfs_subr.c --- sys/kern/vfs_subr.c 21 Jul 2009 00:31:58 -0000 1.357.4.5 +++ sys/kern/vfs_subr.c 24 Oct 2009 13:33:23 -0000 @@ -370,6 +370,17 @@ try_nextlist: vp->v_freelisthd = NULL; mutex_exit(&vnode_free_list_lock); + if (vp->v_usecount != 0) { + /* + * was referenced again before we got the interlock + * Don't return to freelist - the holder of the last + * reference will destroy it. + */ + vrelel(vp, 0); /* releases vp->v_interlock */ + mutex_enter(&vnode_free_list_lock); + goto retry; + } + /* * The vnode is still associated with a file system, so we must * clean it out before reusing it. We need to add a reference @@ -1288,6 +1299,22 @@ vget(vnode_t *vp, int flags) vrelel(vp, 0); return ENOENT; } + + if ((vp->v_iflag & VI_INACTNOW) != 0) { + /* + * if it's being desactived, wait for it to complete. + * Make sure to not return a clean vnode. + */ + if ((flags & LK_NOWAIT) != 0) { + vrelel(vp, 0); + return EBUSY; + } + vwait(vp, VI_INACTNOW); + if ((vp->v_iflag & VI_CLEAN) != 0) { + vrelel(vp, 0); + return ENOENT; + } + } if (flags & LK_TYPE_MASK) { error = vn_lock(vp, flags | LK_INTERLOCK); if (error != 0) { @@ -1427,6 +1454,7 @@ vrelel(vnode_t *vp, int flags) if (++vrele_pending > (desiredvnodes >> 8)) cv_signal(&vrele_cv); mutex_exit(&vrele_lock); + cv_broadcast(&vp->v_cv); mutex_exit(&vp->v_interlock); return; } @@ -1451,6 +1479,7 @@ vrelel(vnode_t *vp, int flags) VOP_INACTIVE(vp, &recycle); mutex_enter(&vp->v_interlock); vp->v_iflag &= ~VI_INACTNOW; + cv_broadcast(&vp->v_cv); if (!recycle) { if (vtryrele(vp)) { mutex_exit(&vp->v_interlock); |
|
|
Re: vget() patchbouyer@... said: > Althoug it doesn't fix this special issue fwiw, I didn't see this panic since I reported it yesterday. If it happens again, I'll let you know. best regards Matthias ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ Forschungszentrum Juelich GmbH 52425 Juelich Sitz der Gesellschaft: Juelich Eingetragen im Handelsregister des Amtsgerichts Dueren Nr. HR B 3498 Vorsitzende des Aufsichtsrats: MinDir'in Baerbel Brumme-Bothe Geschaeftsfuehrung: Prof. Dr. Achim Bachem (Vorsitzender), Dr. Ulrich Krafft (stellv. Vorsitzender), Prof. Dr.-Ing. Harald Bolt, Prof. Dr. Sebastian M. Schmidt ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ |
|
|
Re: vget() patchOn Wed, Oct 28, 2009 at 06:57:36PM +0100, Manuel Bouyer wrote:
> Hi, > unless someone objects I'll commit the attached patch and request > pullup to netbsd-5 next week-end. This is he same as the one posted in the > "panic due to lost vnode flag" thread. > Althoug it doesn't fix this special issue, it seems to fix > kern/41147 in a better way, and I also hope kern/42205. It doesn't fix kern/42205, This one is still being looked at. -- Manuel Bouyer, LIP6, Universite Paris VI. Manuel.Bouyer@... NetBSD: 26 ans d'experience feront toujours la difference -- |
|
|
Re: vget() patchOn Wed, Oct 28, 2009 at 06:57:36PM +0100, Manuel Bouyer wrote:
> Hi, > unless someone objects I'll commit the attached patch and request > pullup to netbsd-5 next week-end. This is he same as the one posted in the > "panic due to lost vnode flag" thread. > Althoug it doesn't fix this special issue, it seems to fix > kern/41147 in a better way, and I also hope kern/42205. > I've been running with this patch on several servers (and the previous > fix for kern/41147 commented out as in the patch below) without > problems; this is also being tested by others. commited, will request a pullup to netbsd-5 -- Manuel Bouyer, LIP6, Universite Paris VI. Manuel.Bouyer@... NetBSD: 26 ans d'experience feront toujours la difference -- |
|
|
Re: vget() patchOn Wed, Oct 28, 2009 at 06:57:36PM +0100, Manuel Bouyer wrote:
> Hi, > unless someone objects I'll commit the attached patch and request > pullup to netbsd-5 next week-end. This is he same as the one posted in the > "panic due to lost vnode flag" thread. > Althoug it doesn't fix this special issue, it seems to fix > kern/41147 in a better way, and I also hope kern/42205. > I've been running with this patch on several servers (and the previous > fix for kern/41147 commented out as in the patch below) without > problems; this is also being tested by others. This patch cause a deadlock with layered fs: the vrele kernel thread can sleep in vrelel() getting the vnode lock. While it does this another thread may sleep in vget() waiting for VI_INACTNOW to clear (well, not exactly, but it's looping on vget(NOWAIT) in layer_node_find() which leads to the same result) and if this is a vnode in a upper layer of a stacked FS, this thread may hold the vnode lock because it's the same as the vnode lock from the corresponding vnode in the lower layer (i.e. in layer_node_find(), lowervp and the vp we call vget with share the same lock). So the vrele thread will never get the vnode lock to VOP_INACTIVE it and the loop in always restart because vget() fails because vp is in VI_INACTNOW state. A similar deadlock could happen in layer_node_find() with VI_XLOCK, it's avoided by skipping vnode with VI_XLOCK set. We can't do the same for VI_INACTNOW because when VI_INACTNOW is set we don't know if the vnode will really be cleaned or not. Skipping VI_INACTNOW vnode would lead to duplicate entries in the layer mount hash. I think a better way to handle this is to not sleep with VI_INACTNOW set in vrelel(), this is what the attached patch does. Before trying to get the vnode lock (when allowed to sleep, in the LK_NOWAIT case it's not an issue), we drop VI_INACTNOW. Once we got the lock, we set VI_INACTNOW again and test that we did not gain another reference while we were slepping. If we did, we drop VI_INACTNOW, drop the vnode lock and exit from vrelel(). If we didn't gain a reference all is well, we set VI_INACTNOW again and go on VOP_INACTIVEing the vnode. Comments ? -- Manuel Bouyer <bouyer@...> NetBSD: 26 ans d'experience feront toujours la difference -- Index: vfs_subr.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v retrieving revision 1.357.4.7 diff -u -p -u -r1.357.4.7 vfs_subr.c --- vfs_subr.c 21 Nov 2009 20:07:49 -0000 1.357.4.7 +++ vfs_subr.c 27 Nov 2009 18:57:41 -0000 @@ -1411,14 +1411,30 @@ vrelel(vnode_t *vp, int flags) /* The pagedaemon can't wait around; defer. */ defer = true; } else if (curlwp == vrele_lwp) { - /* We have to try harder. */ - vp->v_iflag &= ~VI_INACTREDO; + /* + * We have to try harder. But we can't sleep + * with VI_INACTNOW as vget() may be waiting on it. + */ + vp->v_iflag &= ~(VI_INACTREDO|VI_INACTNOW); + cv_broadcast(&vp->v_cv); error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); if (error != 0) { /* XXX */ vpanic(vp, "vrele: unable to lock %p"); } + mutex_enter(&vp->v_interlock); + /* + * if we did get another reference while + * sleeping, don't try to inactivate it yet. + */ + if (__predict_false(vtryrele(vp))) { + VOP_UNLOCK(vp, 0); + mutex_exit(&vp->v_interlock); + return; + } + vp->v_iflag |= VI_INACTNOW; + mutex_exit(&vp->v_interlock); defer = false; } else if ((vp->v_iflag & VI_LAYER) != 0) { /* |
|
|
Re: vget() patchManuel Bouyer wrote:
> On Wed, Oct 28, 2009 at 06:57:36PM +0100, Manuel Bouyer wrote: > >> Hi, >> unless someone objects I'll commit the attached patch and request >> pullup to netbsd-5 next week-end. This is he same as the one posted in the >> "panic due to lost vnode flag" thread. >> Althoug it doesn't fix this special issue, it seems to fix >> kern/41147 in a better way, and I also hope kern/42205. >> I've been running with this patch on several servers (and the previous >> fix for kern/41147 commented out as in the patch below) without >> problems; this is also being tested by others. >> > > This patch cause a deadlock with layered fs: > [...] > Comments ? > > is now running for more than half an hour whereas it hang the machine after a few minutes with the previous kernel. Thank you |
| Free embeddable forum powered by Nabble | Forum Help |