Message ID | 20210825134545.117521-2-xiubli@redhat.com |
---|---|
State | New |
Headers | show |
Series | [v3,1/3] ceph: remove the capsnaps when removing the caps | expand |
On 8/25/21 10:25 PM, Jeff Layton wrote: > On Wed, 2021-08-25 at 21:45 +0800, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> The capsnaps will ihold the inodes when queuing to flush, so when >> force umounting it will close the sessions first and if the MDSes >> respond very fast and the session connections are closed just >> before killing the superblock, which will flush the msgr queue, >> then the flush capsnap callback won't ever be called, which will >> lead the memory leak bug for the ceph_inode_info. >> >> URL: https://tracker.ceph.com/issues/52295 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> fs/ceph/caps.c | 67 +++++++++++++++++++++++++++++++++----------- >> fs/ceph/mds_client.c | 31 +++++++++++++++++++- >> fs/ceph/super.h | 6 ++++ >> 3 files changed, 86 insertions(+), 18 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 1e6261a16fb5..61326b490b2b 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -3162,7 +3162,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, >> break; >> } >> } >> - BUG_ON(!found); >> + >> + /* >> + * The capsnap should already be removed when >> + * removing auth cap in case likes force unmount. >> + */ >> + BUG_ON(!found && ci->i_auth_cap); >> + if (!found) >> + goto unlock; >> + >> capsnap->dirty_pages -= nr; >> if (capsnap->dirty_pages == 0) { >> complete_capsnap = true; >> @@ -3184,6 +3192,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, >> complete_capsnap ? " (complete capsnap)" : ""); >> } >> >> +unlock: >> spin_unlock(&ci->i_ceph_lock); >> >> if (last) { >> @@ -3658,6 +3667,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, >> iput(inode); >> } >> >> +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, >> + bool *wake_ci, bool *wake_mdsc) >> +{ >> + struct ceph_inode_info *ci = ceph_inode(inode); >> + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; >> + bool ret; >> + >> + lockdep_assert_held(&ci->i_ceph_lock); > Hmm, your earlier patch had a note saying that the s_mutex needed to he > held here too. Is that not the case? The s_mutex is not needed here, I meant the i_ceph_lock and the comment was just copied from somewhere and forgot to modify it. > >> + >> + dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci); >> + >> + list_del_init(&capsnap->ci_item); >> + ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); >> + if (wake_ci) >> + *wake_ci = ret; >> + >> + spin_lock(&mdsc->cap_dirty_lock); >> + if (list_empty(&ci->i_cap_flush_list)) >> + list_del_init(&ci->i_flushing_item); >> + >> + ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush); >> + if (wake_mdsc) >> + *wake_mdsc = ret; >> + spin_unlock(&mdsc->cap_dirty_lock); >> +} >> + >> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, >> + bool *wake_ci, bool *wake_mdsc) >> +{ >> + struct ceph_inode_info *ci = ceph_inode(inode); >> + >> + lockdep_assert_held(&ci->i_ceph_lock); >> + >> + WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing); >> + __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc); >> +} >> + >> /* >> * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can >> * throw away our cap_snap. >> @@ -3695,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, >> capsnap, capsnap->follows); >> } >> } >> - if (flushed) { >> - WARN_ON(capsnap->dirty_pages || capsnap->writing); >> - dout(" removing %p cap_snap %p follows %lld\n", >> - inode, capsnap, follows); >> - list_del(&capsnap->ci_item); >> - wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); >> - >> - spin_lock(&mdsc->cap_dirty_lock); >> - >> - if (list_empty(&ci->i_cap_flush_list)) >> - list_del_init(&ci->i_flushing_item); >> - >> - wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, >> - &capsnap->cap_flush); >> - spin_unlock(&mdsc->cap_dirty_lock); >> - } >> + if (flushed) >> + ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc); >> spin_unlock(&ci->i_ceph_lock); >> + >> if (flushed) { >> ceph_put_snap_context(capsnap->context); >> ceph_put_cap_snap(capsnap); >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index df3a735f7837..36ad0ebb2295 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -1604,14 +1604,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, >> return ret; >> } >> >> +static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) >> +{ >> + struct ceph_inode_info *ci = ceph_inode(inode); >> + struct ceph_cap_snap *capsnap; >> + int capsnap_release = 0; >> + >> + lockdep_assert_held(&ci->i_ceph_lock); >> + >> + dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); >> + >> + while (!list_empty(&ci->i_cap_snaps)) { >> + capsnap = list_first_entry(&ci->i_cap_snaps, >> + struct ceph_cap_snap, ci_item); >> + __ceph_remove_capsnap(inode, capsnap, NULL, NULL); >> + ceph_put_snap_context(capsnap->context); >> + ceph_put_cap_snap(capsnap); >> + capsnap_release++; >> + } >> + wake_up_all(&ci->i_cap_wq); >> + wake_up_all(&mdsc->cap_flushing_wq); >> + return capsnap_release; >> +} >> + >> static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, >> void *arg) >> { >> struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; >> + struct ceph_mds_client *mdsc = fsc->mdsc; >> struct ceph_inode_info *ci = ceph_inode(inode); >> LIST_HEAD(to_remove); >> bool dirty_dropped = false; >> bool invalidate = false; >> + int capsnap_release = 0; >> >> dout("removing cap %p, ci is %p, inode is %p\n", >> cap, ci, &ci->vfs_inode); >> @@ -1619,7 +1644,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, >> __ceph_remove_cap(cap, false); >> if (!ci->i_auth_cap) { >> struct ceph_cap_flush *cf; >> - struct ceph_mds_client *mdsc = fsc->mdsc; >> >> if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { >> if (inode->i_data.nrpages > 0) >> @@ -1683,6 +1707,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, >> list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); >> ci->i_prealloc_cap_flush = NULL; >> } >> + >> + if (!list_empty(&ci->i_cap_snaps)) >> + capsnap_release = remove_capsnaps(mdsc, inode); >> } >> spin_unlock(&ci->i_ceph_lock); >> while (!list_empty(&to_remove)) { >> @@ -1699,6 +1726,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, >> ceph_queue_invalidate(inode); >> if (dirty_dropped) >> iput(inode); >> + while (capsnap_release--) >> + iput(inode); >> return 0; >> } >> >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index 8f4f2747be65..445d13d760d1 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, >> int had); >> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, >> struct ceph_snap_context *snapc); >> +extern void __ceph_remove_capsnap(struct inode *inode, >> + struct ceph_cap_snap *capsnap, >> + bool *wake_ci, bool *wake_mdsc); >> +extern void ceph_remove_capsnap(struct inode *inode, >> + struct ceph_cap_snap *capsnap, >> + bool *wake_ci, bool *wake_mdsc); >> extern void ceph_flush_snaps(struct ceph_inode_info *ci, >> struct ceph_mds_session **psession); >> extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1e6261a16fb5..61326b490b2b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3162,7 +3162,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, break; } } - BUG_ON(!found); + + /* + * The capsnap should already be removed when + * removing auth cap in case likes force unmount. + */ + BUG_ON(!found && ci->i_auth_cap); + if (!found) + goto unlock; + capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = true; @@ -3184,6 +3192,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, complete_capsnap ? " (complete capsnap)" : ""); } +unlock: spin_unlock(&ci->i_ceph_lock); if (last) { @@ -3658,6 +3667,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, iput(inode); } +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + bool ret; + + lockdep_assert_held(&ci->i_ceph_lock); + + dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci); + + list_del_init(&capsnap->ci_item); + ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); + if (wake_ci) + *wake_ci = ret; + + spin_lock(&mdsc->cap_dirty_lock); + if (list_empty(&ci->i_cap_flush_list)) + list_del_init(&ci->i_flushing_item); + + ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush); + if (wake_mdsc) + *wake_mdsc = ret; + spin_unlock(&mdsc->cap_dirty_lock); +} + +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + lockdep_assert_held(&ci->i_ceph_lock); + + WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing); + __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc); +} + /* * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can * throw away our cap_snap. @@ -3695,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, capsnap, capsnap->follows); } } - if (flushed) { - WARN_ON(capsnap->dirty_pages || capsnap->writing); - dout(" removing %p cap_snap %p follows %lld\n", - inode, capsnap, follows); - list_del(&capsnap->ci_item); - wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); - - spin_lock(&mdsc->cap_dirty_lock); - - if (list_empty(&ci->i_cap_flush_list)) - list_del_init(&ci->i_flushing_item); - - wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, - &capsnap->cap_flush); - spin_unlock(&mdsc->cap_dirty_lock); - } + if (flushed) + ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc); spin_unlock(&ci->i_ceph_lock); + if (flushed) { ceph_put_snap_context(capsnap->context); ceph_put_cap_snap(capsnap); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index df3a735f7837..36ad0ebb2295 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1604,14 +1604,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, return ret; } +static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap_snap *capsnap; + int capsnap_release = 0; + + lockdep_assert_held(&ci->i_ceph_lock); + + dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); + + while (!list_empty(&ci->i_cap_snaps)) { + capsnap = list_first_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, ci_item); + __ceph_remove_capsnap(inode, capsnap, NULL, NULL); + ceph_put_snap_context(capsnap->context); + ceph_put_cap_snap(capsnap); + capsnap_release++; + } + wake_up_all(&ci->i_cap_wq); + wake_up_all(&mdsc->cap_flushing_wq); + return capsnap_release; +} + static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; + struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); LIST_HEAD(to_remove); bool dirty_dropped = false; bool invalidate = false; + int capsnap_release = 0; dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); @@ -1619,7 +1644,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { struct ceph_cap_flush *cf; - struct ceph_mds_client *mdsc = fsc->mdsc; if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { if (inode->i_data.nrpages > 0) @@ -1683,6 +1707,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (!list_empty(&ci->i_cap_snaps)) + capsnap_release = remove_capsnaps(mdsc, inode); } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { @@ -1699,6 +1726,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_queue_invalidate(inode); if (dirty_dropped) iput(inode); + while (capsnap_release--) + iput(inode); return 0; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8f4f2747be65..445d13d760d1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); +extern void __ceph_remove_capsnap(struct inode *inode, + struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc); +extern void ceph_remove_capsnap(struct inode *inode, + struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc); extern void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession); extern bool __ceph_should_report_size(struct ceph_inode_info *ci);