diff mbox series

ceph: put the requests/sessions when it fails to alloc memory

Message ID 20220112042904.8557-1-xiubli@redhat.com
State New
Headers show
Series ceph: put the requests/sessions when it fails to alloc memory | expand

Commit Message

Xiubo Li Jan. 12, 2022, 4:29 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

When failing to allocate the sessions memory we should make sure
the req1 and req2 and the sessions get put. And also in case the
max_sessions decreased so when kreallocate the new memory some
sessions maybe missed being put.

And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
which will lead to a distinct access fault.

URL: https://tracker.ceph.com/issues/53819
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
---
 fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 18 deletions(-)

Comments

Xiubo Li Jan. 13, 2022, 1:39 a.m. UTC | #1
cc Niklas.

On 1/12/22 12:29 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
>
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
>
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")

This is reported by:

Reported-by: Niklas Hambuechen <mail@nh2.me>

I just get the correct mail from Niklas.

Regards

- Xiubo


> ---
>   fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
>   1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
>   	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>   	struct ceph_inode_info *ci = ceph_inode(inode);
>   	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> +	unsigned int max_sessions;
>   	int ret, err = 0;
>   
>   	spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
>   	}
>   	spin_unlock(&ci->i_unsafe_lock);
>   
> +	/*
> +	 * The mdsc->max_sessions is unlikely to be changed
> +	 * mostly, here we will retry it by reallocating the
> +	 * sessions arrary memory to get rid of the mdsc->mutex
> +	 * lock.
> +	 */
> +retry:
> +	max_sessions = mdsc->max_sessions;
> +
>   	/*
>   	 * Trigger to flush the journal logs in all the relevant MDSes
>   	 * manually, or in the worst case we must wait at most 5 seconds
>   	 * to wait the journal logs to be flushed by the MDSes periodically.
>   	 */
> -	if (req1 || req2) {
> +	if ((req1 || req2) && likely(max_sessions)) {
>   		struct ceph_mds_session **sessions = NULL;
>   		struct ceph_mds_session *s;
>   		struct ceph_mds_request *req;
> -		unsigned int max;
>   		int i;
>   
> -		/*
> -		 * The mdsc->max_sessions is unlikely to be changed
> -		 * mostly, here we will retry it by reallocating the
> -		 * sessions arrary memory to get rid of the mdsc->mutex
> -		 * lock.
> -		 */
> -retry:
> -		max = mdsc->max_sessions;
> -		sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> -		if (!sessions)
> -			return -ENOMEM;
> +		sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> +		if (!sessions) {
> +			err = -ENOMEM;
> +			goto out;
> +		}
>   
>   		spin_lock(&ci->i_unsafe_lock);
>   		if (req1) {
>   			list_for_each_entry(req, &ci->i_unsafe_dirops,
>   					    r_unsafe_dir_item) {
>   				s = req->r_session;
> -				if (unlikely(s->s_mds >= max)) {
> +				if (unlikely(s->s_mds >= max_sessions)) {
>   					spin_unlock(&ci->i_unsafe_lock);
> +					for (i = 0; i < max_sessions; i++) {
> +						s = sessions[i];
> +						if (s)
> +							ceph_put_mds_session(s);
> +					}
> +					kfree(sessions);
>   					goto retry;
>   				}
>   				if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
>   			list_for_each_entry(req, &ci->i_unsafe_iops,
>   					    r_unsafe_target_item) {
>   				s = req->r_session;
> -				if (unlikely(s->s_mds >= max)) {
> +				if (unlikely(s->s_mds >= max_sessions)) {
>   					spin_unlock(&ci->i_unsafe_lock);
> +					for (i = 0; i < max_sessions; i++) {
> +						s = sessions[i];
> +						if (s)
> +							ceph_put_mds_session(s);
> +					}
> +					kfree(sessions);
>   					goto retry;
>   				}
>   				if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
>   		spin_unlock(&ci->i_ceph_lock);
>   
>   		/* send flush mdlog request to MDSes */
> -		for (i = 0; i < max; i++) {
> +		for (i = 0; i < max_sessions; i++) {
>   			s = sessions[i];
>   			if (s) {
>   				send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
>   					ceph_timeout_jiffies(req1->r_timeout));
>   		if (ret)
>   			err = -EIO;
> -		ceph_mdsc_put_request(req1);
>   	}
>   	if (req2) {
>   		ret = !wait_for_completion_timeout(&req2->r_safe_completion,
>   					ceph_timeout_jiffies(req2->r_timeout));
>   		if (ret)
>   			err = -EIO;
> -		ceph_mdsc_put_request(req2);
>   	}
> +
> +out:
> +	if (req1)
> +		ceph_mdsc_put_request(req1);
> +	if (req2)
> +		ceph_mdsc_put_request(req2);
>   	return err;
>   }
>
Venky Shankar Jan. 13, 2022, 7:05 a.m. UTC | #2
On Wed, Jan 12, 2022 at 9:59 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
>
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
>
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
> ---
>  fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
>         struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>         struct ceph_inode_info *ci = ceph_inode(inode);
>         struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> +       unsigned int max_sessions;
>         int ret, err = 0;
>
>         spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
>         }
>         spin_unlock(&ci->i_unsafe_lock);
>
> +       /*
> +        * The mdsc->max_sessions is unlikely to be changed
> +        * mostly, here we will retry it by reallocating the
> +        * sessions arrary memory to get rid of the mdsc->mutex
> +        * lock.
> +        */
> +retry:
> +       max_sessions = mdsc->max_sessions;
> +
>         /*
>          * Trigger to flush the journal logs in all the relevant MDSes
>          * manually, or in the worst case we must wait at most 5 seconds
>          * to wait the journal logs to be flushed by the MDSes periodically.
>          */
> -       if (req1 || req2) {
> +       if ((req1 || req2) && likely(max_sessions)) {
>                 struct ceph_mds_session **sessions = NULL;
>                 struct ceph_mds_session *s;
>                 struct ceph_mds_request *req;
> -               unsigned int max;
>                 int i;
>
> -               /*
> -                * The mdsc->max_sessions is unlikely to be changed
> -                * mostly, here we will retry it by reallocating the
> -                * sessions arrary memory to get rid of the mdsc->mutex
> -                * lock.
> -                */

"array"

> -retry:
> -               max = mdsc->max_sessions;
> -               sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> -               if (!sessions)
> -                       return -ENOMEM;
> +               sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> +               if (!sessions) {
> +                       err = -ENOMEM;
> +                       goto out;
> +               }
>
>                 spin_lock(&ci->i_unsafe_lock);
>                 if (req1) {
>                         list_for_each_entry(req, &ci->i_unsafe_dirops,
>                                             r_unsafe_dir_item) {
>                                 s = req->r_session;
> -                               if (unlikely(s->s_mds >= max)) {
> +                               if (unlikely(s->s_mds >= max_sessions)) {
>                                         spin_unlock(&ci->i_unsafe_lock);
> +                                       for (i = 0; i < max_sessions; i++) {
> +                                               s = sessions[i];
> +                                               if (s)
> +                                                       ceph_put_mds_session(s);
> +                                       }
> +                                       kfree(sessions);

nit: this cleanup can be a separate function since it gets repeated below.

>                                         goto retry;
>                                 }
>                                 if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
>                         list_for_each_entry(req, &ci->i_unsafe_iops,
>                                             r_unsafe_target_item) {
>                                 s = req->r_session;
> -                               if (unlikely(s->s_mds >= max)) {
> +                               if (unlikely(s->s_mds >= max_sessions)) {
>                                         spin_unlock(&ci->i_unsafe_lock);
> +                                       for (i = 0; i < max_sessions; i++) {
> +                                               s = sessions[i];
> +                                               if (s)
> +                                                       ceph_put_mds_session(s);
> +                                       }
> +                                       kfree(sessions);
>                                         goto retry;
>                                 }
>                                 if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
>                 spin_unlock(&ci->i_ceph_lock);
>
>                 /* send flush mdlog request to MDSes */
> -               for (i = 0; i < max; i++) {
> +               for (i = 0; i < max_sessions; i++) {
>                         s = sessions[i];
>                         if (s) {
>                                 send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
>                                         ceph_timeout_jiffies(req1->r_timeout));
>                 if (ret)
>                         err = -EIO;
> -               ceph_mdsc_put_request(req1);
>         }
>         if (req2) {
>                 ret = !wait_for_completion_timeout(&req2->r_safe_completion,
>                                         ceph_timeout_jiffies(req2->r_timeout));
>                 if (ret)
>                         err = -EIO;
> -               ceph_mdsc_put_request(req2);
>         }
> +
> +out:
> +       if (req1)
> +               ceph_mdsc_put_request(req1);
> +       if (req2)
> +               ceph_mdsc_put_request(req2);
>         return err;
>  }

Looks good.

Reviewed-by: Venky Shankar <vshankar@redhat.com>

>
> --
> 2.27.0
>
Jeff Layton Jan. 13, 2022, 11:01 a.m. UTC | #3
On Wed, 2022-01-12 at 12:29 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
> 
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
> 
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
> ---
>  fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 37 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
>  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> +	unsigned int max_sessions;
>  	int ret, err = 0;
>  
>  	spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
>  	}
>  	spin_unlock(&ci->i_unsafe_lock);
>  
> +	/*
> +	 * The mdsc->max_sessions is unlikely to be changed
> +	 * mostly, here we will retry it by reallocating the
> +	 * sessions arrary memory to get rid of the mdsc->mutex
> +	 * lock.
> +	 */
> +retry:
> +	max_sessions = mdsc->max_sessions;
> +
>  	/*
>  	 * Trigger to flush the journal logs in all the relevant MDSes
>  	 * manually, or in the worst case we must wait at most 5 seconds
>  	 * to wait the journal logs to be flushed by the MDSes periodically.
>  	 */
> -	if (req1 || req2) {
> +	if ((req1 || req2) && likely(max_sessions)) {
>  		struct ceph_mds_session **sessions = NULL;
>  		struct ceph_mds_session *s;
>  		struct ceph_mds_request *req;
> -		unsigned int max;
>  		int i;
>  
> -		/*
> -		 * The mdsc->max_sessions is unlikely to be changed
> -		 * mostly, here we will retry it by reallocating the
> -		 * sessions arrary memory to get rid of the mdsc->mutex
> -		 * lock.
> -		 */
> -retry:
> -		max = mdsc->max_sessions;
> -		sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> -		if (!sessions)
> -			return -ENOMEM;
> +		sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> +		if (!sessions) {
> +			err = -ENOMEM;
> +			goto out;
> +		}
>  
>  		spin_lock(&ci->i_unsafe_lock);
>  		if (req1) {
>  			list_for_each_entry(req, &ci->i_unsafe_dirops,
>  					    r_unsafe_dir_item) {
>  				s = req->r_session;
> -				if (unlikely(s->s_mds >= max)) {
> +				if (unlikely(s->s_mds >= max_sessions)) {
>  					spin_unlock(&ci->i_unsafe_lock);
> +					for (i = 0; i < max_sessions; i++) {
> +						s = sessions[i];
> +						if (s)
> +							ceph_put_mds_session(s);
> +					}
> +					kfree(sessions);
>  					goto retry;
>  				}
>  				if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
>  			list_for_each_entry(req, &ci->i_unsafe_iops,
>  					    r_unsafe_target_item) {
>  				s = req->r_session;
> -				if (unlikely(s->s_mds >= max)) {
> +				if (unlikely(s->s_mds >= max_sessions)) {
>  					spin_unlock(&ci->i_unsafe_lock);
> +					for (i = 0; i < max_sessions; i++) {
> +						s = sessions[i];
> +						if (s)
> +							ceph_put_mds_session(s);
> +					}
> +					kfree(sessions);
>  					goto retry;
>  				}
>  				if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
>  		spin_unlock(&ci->i_ceph_lock);
>  
>  		/* send flush mdlog request to MDSes */
> -		for (i = 0; i < max; i++) {
> +		for (i = 0; i < max_sessions; i++) {
>  			s = sessions[i];
>  			if (s) {
>  				send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
>  					ceph_timeout_jiffies(req1->r_timeout));
>  		if (ret)
>  			err = -EIO;
> -		ceph_mdsc_put_request(req1);
>  	}
>  	if (req2) {
>  		ret = !wait_for_completion_timeout(&req2->r_safe_completion,
>  					ceph_timeout_jiffies(req2->r_timeout));
>  		if (ret)
>  			err = -EIO;
> -		ceph_mdsc_put_request(req2);
>  	}
> +
> +out:
> +	if (req1)
> +		ceph_mdsc_put_request(req1);
> +	if (req2)
> +		ceph_mdsc_put_request(req2);
>  	return err;
>  }
>  

Looks good. I fixed up the minor spelling error in the comment that
Venky noticed too. Merged into testing branch.

Thanks,
Xiubo Li Jan. 13, 2022, 12:43 p.m. UTC | #4
On 1/13/22 7:01 PM, Jeff Layton wrote:
> On Wed, 2022-01-12 at 12:29 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When failing to allocate the sessions memory we should make sure
>> the req1 and req2 and the sessions get put. And also in case the
>> max_sessions decreased so when kreallocate the new memory some
>> sessions maybe missed being put.
>>
>> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
>> which will lead to a distinct access fault.
>>
>> URL: https://tracker.ceph.com/issues/53819
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
>> ---
>>   fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
>>   1 file changed, 37 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 944b18b4e217..5c2719f66f62 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
>>   	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>   	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
>> +	unsigned int max_sessions;
>>   	int ret, err = 0;
>>   
>>   	spin_lock(&ci->i_unsafe_lock);
>> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
>>   	}
>>   	spin_unlock(&ci->i_unsafe_lock);
>>   
>> +	/*
>> +	 * The mdsc->max_sessions is unlikely to be changed
>> +	 * mostly, here we will retry it by reallocating the
>> +	 * sessions arrary memory to get rid of the mdsc->mutex
>> +	 * lock.
>> +	 */
>> +retry:
>> +	max_sessions = mdsc->max_sessions;
>> +
>>   	/*
>>   	 * Trigger to flush the journal logs in all the relevant MDSes
>>   	 * manually, or in the worst case we must wait at most 5 seconds
>>   	 * to wait the journal logs to be flushed by the MDSes periodically.
>>   	 */
>> -	if (req1 || req2) {
>> +	if ((req1 || req2) && likely(max_sessions)) {
>>   		struct ceph_mds_session **sessions = NULL;
>>   		struct ceph_mds_session *s;
>>   		struct ceph_mds_request *req;
>> -		unsigned int max;
>>   		int i;
>>   
>> -		/*
>> -		 * The mdsc->max_sessions is unlikely to be changed
>> -		 * mostly, here we will retry it by reallocating the
>> -		 * sessions arrary memory to get rid of the mdsc->mutex
>> -		 * lock.
>> -		 */
>> -retry:
>> -		max = mdsc->max_sessions;
>> -		sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
>> -		if (!sessions)
>> -			return -ENOMEM;
>> +		sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
>> +		if (!sessions) {
>> +			err = -ENOMEM;
>> +			goto out;
>> +		}
>>   
>>   		spin_lock(&ci->i_unsafe_lock);
>>   		if (req1) {
>>   			list_for_each_entry(req, &ci->i_unsafe_dirops,
>>   					    r_unsafe_dir_item) {
>>   				s = req->r_session;
>> -				if (unlikely(s->s_mds >= max)) {
>> +				if (unlikely(s->s_mds >= max_sessions)) {
>>   					spin_unlock(&ci->i_unsafe_lock);
>> +					for (i = 0; i < max_sessions; i++) {
>> +						s = sessions[i];
>> +						if (s)
>> +							ceph_put_mds_session(s);
>> +					}
>> +					kfree(sessions);
>>   					goto retry;
>>   				}
>>   				if (!sessions[s->s_mds]) {
>> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
>>   			list_for_each_entry(req, &ci->i_unsafe_iops,
>>   					    r_unsafe_target_item) {
>>   				s = req->r_session;
>> -				if (unlikely(s->s_mds >= max)) {
>> +				if (unlikely(s->s_mds >= max_sessions)) {
>>   					spin_unlock(&ci->i_unsafe_lock);
>> +					for (i = 0; i < max_sessions; i++) {
>> +						s = sessions[i];
>> +						if (s)
>> +							ceph_put_mds_session(s);
>> +					}
>> +					kfree(sessions);
>>   					goto retry;
>>   				}
>>   				if (!sessions[s->s_mds]) {
>> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
>>   		spin_unlock(&ci->i_ceph_lock);
>>   
>>   		/* send flush mdlog request to MDSes */
>> -		for (i = 0; i < max; i++) {
>> +		for (i = 0; i < max_sessions; i++) {
>>   			s = sessions[i];
>>   			if (s) {
>>   				send_flush_mdlog(s);
>> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
>>   					ceph_timeout_jiffies(req1->r_timeout));
>>   		if (ret)
>>   			err = -EIO;
>> -		ceph_mdsc_put_request(req1);
>>   	}
>>   	if (req2) {
>>   		ret = !wait_for_completion_timeout(&req2->r_safe_completion,
>>   					ceph_timeout_jiffies(req2->r_timeout));
>>   		if (ret)
>>   			err = -EIO;
>> -		ceph_mdsc_put_request(req2);
>>   	}
>> +
>> +out:
>> +	if (req1)
>> +		ceph_mdsc_put_request(req1);
>> +	if (req2)
>> +		ceph_mdsc_put_request(req2);
>>   	return err;
>>   }
>>   
> Looks good. I fixed up the minor spelling error in the comment that
> Venky noticed too. Merged into testing branch.

Okay, thanks Venky and Jeff.


>
> Thanks,
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 944b18b4e217..5c2719f66f62 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2276,6 +2276,7 @@  static int unsafe_request_wait(struct inode *inode)
 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
+	unsigned int max_sessions;
 	int ret, err = 0;
 
 	spin_lock(&ci->i_unsafe_lock);
@@ -2293,37 +2294,45 @@  static int unsafe_request_wait(struct inode *inode)
 	}
 	spin_unlock(&ci->i_unsafe_lock);
 
+	/*
+	 * The mdsc->max_sessions is unlikely to be changed
+	 * mostly, here we will retry it by reallocating the
+	 * sessions arrary memory to get rid of the mdsc->mutex
+	 * lock.
+	 */
+retry:
+	max_sessions = mdsc->max_sessions;
+
 	/*
 	 * Trigger to flush the journal logs in all the relevant MDSes
 	 * manually, or in the worst case we must wait at most 5 seconds
 	 * to wait the journal logs to be flushed by the MDSes periodically.
 	 */
-	if (req1 || req2) {
+	if ((req1 || req2) && likely(max_sessions)) {
 		struct ceph_mds_session **sessions = NULL;
 		struct ceph_mds_session *s;
 		struct ceph_mds_request *req;
-		unsigned int max;
 		int i;
 
-		/*
-		 * The mdsc->max_sessions is unlikely to be changed
-		 * mostly, here we will retry it by reallocating the
-		 * sessions arrary memory to get rid of the mdsc->mutex
-		 * lock.
-		 */
-retry:
-		max = mdsc->max_sessions;
-		sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
-		if (!sessions)
-			return -ENOMEM;
+		sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+		if (!sessions) {
+			err = -ENOMEM;
+			goto out;
+		}
 
 		spin_lock(&ci->i_unsafe_lock);
 		if (req1) {
 			list_for_each_entry(req, &ci->i_unsafe_dirops,
 					    r_unsafe_dir_item) {
 				s = req->r_session;
-				if (unlikely(s->s_mds >= max)) {
+				if (unlikely(s->s_mds >= max_sessions)) {
 					spin_unlock(&ci->i_unsafe_lock);
+					for (i = 0; i < max_sessions; i++) {
+						s = sessions[i];
+						if (s)
+							ceph_put_mds_session(s);
+					}
+					kfree(sessions);
 					goto retry;
 				}
 				if (!sessions[s->s_mds]) {
@@ -2336,8 +2345,14 @@  static int unsafe_request_wait(struct inode *inode)
 			list_for_each_entry(req, &ci->i_unsafe_iops,
 					    r_unsafe_target_item) {
 				s = req->r_session;
-				if (unlikely(s->s_mds >= max)) {
+				if (unlikely(s->s_mds >= max_sessions)) {
 					spin_unlock(&ci->i_unsafe_lock);
+					for (i = 0; i < max_sessions; i++) {
+						s = sessions[i];
+						if (s)
+							ceph_put_mds_session(s);
+					}
+					kfree(sessions);
 					goto retry;
 				}
 				if (!sessions[s->s_mds]) {
@@ -2358,7 +2373,7 @@  static int unsafe_request_wait(struct inode *inode)
 		spin_unlock(&ci->i_ceph_lock);
 
 		/* send flush mdlog request to MDSes */
-		for (i = 0; i < max; i++) {
+		for (i = 0; i < max_sessions; i++) {
 			s = sessions[i];
 			if (s) {
 				send_flush_mdlog(s);
@@ -2375,15 +2390,19 @@  static int unsafe_request_wait(struct inode *inode)
 					ceph_timeout_jiffies(req1->r_timeout));
 		if (ret)
 			err = -EIO;
-		ceph_mdsc_put_request(req1);
 	}
 	if (req2) {
 		ret = !wait_for_completion_timeout(&req2->r_safe_completion,
 					ceph_timeout_jiffies(req2->r_timeout));
 		if (ret)
 			err = -EIO;
-		ceph_mdsc_put_request(req2);
 	}
+
+out:
+	if (req1)
+		ceph_mdsc_put_request(req1);
+	if (req2)
+		ceph_mdsc_put_request(req2);
 	return err;
 }