diff mbox series

[3/5] ceph: flush mdlog before umounting

Message ID 20210629044241.30359-4-xiubli@redhat.com
State New
Headers show
Series [1/5] ceph: export ceph_create_session_msg | expand

Commit Message

Xiubo Li June 29, 2021, 4:42 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c         | 29 +++++++++++++++++++++++++++++
 fs/ceph/mds_client.h         |  1 +
 include/linux/ceph/ceph_fs.h |  1 +
 3 files changed, 31 insertions(+)

Comments

Xiubo Li June 30, 2021, 12:36 a.m. UTC | #1
On 6/29/21 11:34 PM, Jeff Layton wrote:
> On Tue, 2021-06-29 at 12:42 +0800, xiubli@redhat.com wrote:

>> From: Xiubo Li <xiubli@redhat.com>

>>

>> Signed-off-by: Xiubo Li <xiubli@redhat.com>

>> ---

>>   fs/ceph/mds_client.c         | 29 +++++++++++++++++++++++++++++

>>   fs/ceph/mds_client.h         |  1 +

>>   include/linux/ceph/ceph_fs.h |  1 +

>>   3 files changed, 31 insertions(+)

>>

>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

>> index 96bef289f58f..2db87a5c68d4 100644

>> --- a/fs/ceph/mds_client.c

>> +++ b/fs/ceph/mds_client.c

>> @@ -4689,6 +4689,34 @@ static void wait_requests(struct ceph_mds_client *mdsc)

>>   	dout("wait_requests done\n");

>>   }

>>   

>> +static void send_flush_mdlog(struct ceph_mds_session *s)

>> +{

>> +	u64 seq = s->s_seq;

>> +	struct ceph_msg *msg;

>> +

> The s_seq field is protected by the s_mutex (at least, AFAICT). I think

> you probably need to take it before fetching the s_seq and release it

> after calling ceph_con_send.


Will fix it.


>

> Long term, we probably need to rethink how the session sequence number

> handling is done. The s_mutex is a terribly heavyweight mechanism for

> this.


Yeah, makes sense.


>> +	/*

>> +	 * For the MDS daemons lower than Luminous will crash when it

>> +	 * saw this unknown session request.

>> +	 */

>> +	if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))

>> +		return;

>> +

>> +	dout("send_flush_mdlog to mds%d (%s)s seq %lld\n",

>> +	     s->s_mds, ceph_session_state_name(s->s_state), seq);

>> +	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, seq);

>> +	if (!msg) {

>> +		pr_err("failed to send_flush_mdlog to mds%d (%s)s seq %lld\n",

>> +		       s->s_mds, ceph_session_state_name(s->s_state), seq);

>> +	} else {

>> +		ceph_con_send(&s->s_con, msg);

>> +	}

>> +}

>> +

>> +void flush_mdlog(struct ceph_mds_client *mdsc)

>> +{

>> +	ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);

>> +}

>> +

>>   /*

>>    * called before mount is ro, and before dentries are torn down.

>>    * (hmm, does this still race with new lookups?)

>> @@ -4698,6 +4726,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)

>>   	dout("pre_umount\n");

>>   	mdsc->stopping = 1;

>>   

>> +	flush_mdlog(mdsc);

>>   	ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);

>>   	ceph_flush_dirty_caps(mdsc);

>>   	wait_requests(mdsc);

>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h

>> index fca2cf427eaf..79d5b8ed62bf 100644

>> --- a/fs/ceph/mds_client.h

>> +++ b/fs/ceph/mds_client.h

>> @@ -537,6 +537,7 @@ extern int ceph_iterate_session_caps(struct ceph_mds_session *session,

>>   				     int (*cb)(struct inode *,

>>   					       struct ceph_cap *, void *),

>>   				     void *arg);

>> +extern void flush_mdlog(struct ceph_mds_client *mdsc);

>>   extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);

>>   

>>   static inline void ceph_mdsc_free_path(char *path, int len)

>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h

>> index 57e5bd63fb7a..ae60696fe40b 100644

>> --- a/include/linux/ceph/ceph_fs.h

>> +++ b/include/linux/ceph/ceph_fs.h

>> @@ -300,6 +300,7 @@ enum {

>>   	CEPH_SESSION_FLUSHMSG_ACK,

>>   	CEPH_SESSION_FORCE_RO,

>>   	CEPH_SESSION_REJECT,

>> +	CEPH_SESSION_REQUEST_FLUSH_MDLOG,

>>   };

>>   

>>   extern const char *ceph_session_op_name(int op);
Ilya Dryomov June 30, 2021, 12:39 p.m. UTC | #2
On Tue, Jun 29, 2021 at 6:42 AM <xiubli@redhat.com> wrote:
>

> From: Xiubo Li <xiubli@redhat.com>

>

> Signed-off-by: Xiubo Li <xiubli@redhat.com>

> ---

>  fs/ceph/mds_client.c         | 29 +++++++++++++++++++++++++++++

>  fs/ceph/mds_client.h         |  1 +

>  include/linux/ceph/ceph_fs.h |  1 +

>  3 files changed, 31 insertions(+)

>

> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

> index 96bef289f58f..2db87a5c68d4 100644

> --- a/fs/ceph/mds_client.c

> +++ b/fs/ceph/mds_client.c

> @@ -4689,6 +4689,34 @@ static void wait_requests(struct ceph_mds_client *mdsc)

>         dout("wait_requests done\n");

>  }

>

> +static void send_flush_mdlog(struct ceph_mds_session *s)

> +{

> +       u64 seq = s->s_seq;

> +       struct ceph_msg *msg;

> +

> +       /*

> +        * For the MDS daemons lower than Luminous will crash when it

> +        * saw this unknown session request.


"Pre-luminous MDS crashes when it sees an unknown session request"

> +        */

> +       if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))

> +               return;

> +

> +       dout("send_flush_mdlog to mds%d (%s)s seq %lld\n",


Should (%s)s be just (%s)?

> +            s->s_mds, ceph_session_state_name(s->s_state), seq);

> +       msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, seq);

> +       if (!msg) {

> +               pr_err("failed to send_flush_mdlog to mds%d (%s)s seq %lld\n",


Same here and let's avoid function names in error messages.  Something
like "failed to request mdlog flush ...".

> +                      s->s_mds, ceph_session_state_name(s->s_state), seq);

> +       } else {

> +               ceph_con_send(&s->s_con, msg);

> +       }

> +}

> +

> +void flush_mdlog(struct ceph_mds_client *mdsc)

> +{

> +       ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);

> +}


Is this wrapper really needed?

> +

>  /*

>   * called before mount is ro, and before dentries are torn down.

>   * (hmm, does this still race with new lookups?)

> @@ -4698,6 +4726,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)

>         dout("pre_umount\n");

>         mdsc->stopping = 1;

>

> +       flush_mdlog(mdsc);

>         ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);

>         ceph_flush_dirty_caps(mdsc);

>         wait_requests(mdsc);

> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h

> index fca2cf427eaf..79d5b8ed62bf 100644

> --- a/fs/ceph/mds_client.h

> +++ b/fs/ceph/mds_client.h

> @@ -537,6 +537,7 @@ extern int ceph_iterate_session_caps(struct ceph_mds_session *session,

>                                      int (*cb)(struct inode *,

>                                                struct ceph_cap *, void *),

>                                      void *arg);

> +extern void flush_mdlog(struct ceph_mds_client *mdsc);

>  extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);

>

>  static inline void ceph_mdsc_free_path(char *path, int len)

> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h

> index 57e5bd63fb7a..ae60696fe40b 100644

> --- a/include/linux/ceph/ceph_fs.h

> +++ b/include/linux/ceph/ceph_fs.h

> @@ -300,6 +300,7 @@ enum {

>         CEPH_SESSION_FLUSHMSG_ACK,

>         CEPH_SESSION_FORCE_RO,

>         CEPH_SESSION_REJECT,

> +       CEPH_SESSION_REQUEST_FLUSH_MDLOG,


Need to update ceph_session_op_name().

Thanks,

                Ilya
Xiubo Li July 1, 2021, 1:18 a.m. UTC | #3
On 6/30/21 8:39 PM, Ilya Dryomov wrote:
> On Tue, Jun 29, 2021 at 6:42 AM <xiubli@redhat.com> wrote:

>> From: Xiubo Li <xiubli@redhat.com>

>>

>> Signed-off-by: Xiubo Li <xiubli@redhat.com>

>> ---

>>   fs/ceph/mds_client.c         | 29 +++++++++++++++++++++++++++++

>>   fs/ceph/mds_client.h         |  1 +

>>   include/linux/ceph/ceph_fs.h |  1 +

>>   3 files changed, 31 insertions(+)

>>

>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

>> index 96bef289f58f..2db87a5c68d4 100644

>> --- a/fs/ceph/mds_client.c

>> +++ b/fs/ceph/mds_client.c

>> @@ -4689,6 +4689,34 @@ static void wait_requests(struct ceph_mds_client *mdsc)

>>          dout("wait_requests done\n");

>>   }

>>

>> +static void send_flush_mdlog(struct ceph_mds_session *s)

>> +{

>> +       u64 seq = s->s_seq;

>> +       struct ceph_msg *msg;

>> +

>> +       /*

>> +        * For the MDS daemons lower than Luminous will crash when it

>> +        * saw this unknown session request.

> "Pre-luminous MDS crashes when it sees an unknown session request"

Will fix it.
>

>> +        */

>> +       if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))

>> +               return;

>> +

>> +       dout("send_flush_mdlog to mds%d (%s)s seq %lld\n",

> Should (%s)s be just (%s)?

Will fix it.
>

>> +            s->s_mds, ceph_session_state_name(s->s_state), seq);

>> +       msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, seq);

>> +       if (!msg) {

>> +               pr_err("failed to send_flush_mdlog to mds%d (%s)s seq %lld\n",

> Same here and let's avoid function names in error messages.  Something

> like "failed to request mdlog flush ...".

Okay.
>

>> +                      s->s_mds, ceph_session_state_name(s->s_state), seq);

>> +       } else {

>> +               ceph_con_send(&s->s_con, msg);

>> +       }

>> +}

>> +

>> +void flush_mdlog(struct ceph_mds_client *mdsc)

>> +{

>> +       ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);

>> +}

> Is this wrapper really needed?

Yeah, I can remove it.
>

>> +

>>   /*

>>    * called before mount is ro, and before dentries are torn down.

>>    * (hmm, does this still race with new lookups?)

>> @@ -4698,6 +4726,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)

>>          dout("pre_umount\n");

>>          mdsc->stopping = 1;

>>

>> +       flush_mdlog(mdsc);

>>          ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);

>>          ceph_flush_dirty_caps(mdsc);

>>          wait_requests(mdsc);

>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h

>> index fca2cf427eaf..79d5b8ed62bf 100644

>> --- a/fs/ceph/mds_client.h

>> +++ b/fs/ceph/mds_client.h

>> @@ -537,6 +537,7 @@ extern int ceph_iterate_session_caps(struct ceph_mds_session *session,

>>                                       int (*cb)(struct inode *,

>>                                                 struct ceph_cap *, void *),

>>                                       void *arg);

>> +extern void flush_mdlog(struct ceph_mds_client *mdsc);

>>   extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);

>>

>>   static inline void ceph_mdsc_free_path(char *path, int len)

>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h

>> index 57e5bd63fb7a..ae60696fe40b 100644

>> --- a/include/linux/ceph/ceph_fs.h

>> +++ b/include/linux/ceph/ceph_fs.h

>> @@ -300,6 +300,7 @@ enum {

>>          CEPH_SESSION_FLUSHMSG_ACK,

>>          CEPH_SESSION_FORCE_RO,

>>          CEPH_SESSION_REJECT,

>> +       CEPH_SESSION_REQUEST_FLUSH_MDLOG,

> Need to update ceph_session_op_name().


Sure.

Thanks

BRs

> Thanks,

>

>                  Ilya

>
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 96bef289f58f..2db87a5c68d4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4689,6 +4689,34 @@  static void wait_requests(struct ceph_mds_client *mdsc)
 	dout("wait_requests done\n");
 }
 
+static void send_flush_mdlog(struct ceph_mds_session *s)
+{
+	u64 seq = s->s_seq;
+	struct ceph_msg *msg;
+
+	/*
+	 * For the MDS daemons lower than Luminous will crash when it
+	 * saw this unknown session request.
+	 */
+	if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))
+		return;
+
+	dout("send_flush_mdlog to mds%d (%s)s seq %lld\n",
+	     s->s_mds, ceph_session_state_name(s->s_state), seq);
+	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, seq);
+	if (!msg) {
+		pr_err("failed to send_flush_mdlog to mds%d (%s)s seq %lld\n",
+		       s->s_mds, ceph_session_state_name(s->s_state), seq);
+	} else {
+		ceph_con_send(&s->s_con, msg);
+	}
+}
+
+void flush_mdlog(struct ceph_mds_client *mdsc)
+{
+	ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
+}
+
 /*
  * called before mount is ro, and before dentries are torn down.
  * (hmm, does this still race with new lookups?)
@@ -4698,6 +4726,7 @@  void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	dout("pre_umount\n");
 	mdsc->stopping = 1;
 
+	flush_mdlog(mdsc);
 	ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
 	ceph_flush_dirty_caps(mdsc);
 	wait_requests(mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index fca2cf427eaf..79d5b8ed62bf 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -537,6 +537,7 @@  extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
 				     int (*cb)(struct inode *,
 					       struct ceph_cap *, void *),
 				     void *arg);
+extern void flush_mdlog(struct ceph_mds_client *mdsc);
 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
 
 static inline void ceph_mdsc_free_path(char *path, int len)
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 57e5bd63fb7a..ae60696fe40b 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -300,6 +300,7 @@  enum {
 	CEPH_SESSION_FLUSHMSG_ACK,
 	CEPH_SESSION_FORCE_RO,
 	CEPH_SESSION_REJECT,
+	CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 };
 
 extern const char *ceph_session_op_name(int op);