diff mbox series

[v2] ceph: defer flushing the capsnap if the Fb is used

Message ID 20210107023051.119063-1-xiubli@redhat.com
State New
Headers show
Series [v2] ceph: defer flushing the capsnap if the Fb is used | expand

Commit Message

Xiubo Li Jan. 7, 2021, 2:30 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

If the Fb cap is used it means the client is flushing the dirty
data to OSD, just defer flushing the capsnap.

URL: https://tracker.ceph.com/issues/48679
URL: https://tracker.ceph.com/issues/48640
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V2:
- Fix inode reference leak bug

 fs/ceph/caps.c | 32 +++++++++++++++++++-------------
 fs/ceph/snap.c |  6 +++---
 2 files changed, 22 insertions(+), 16 deletions(-)

Comments

Jeff Layton Jan. 8, 2021, 6:24 p.m. UTC | #1
On Thu, 2021-01-07 at 10:30 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>

> 

> If the Fb cap is used it means the client is flushing the dirty

> data to OSD, just defer flushing the capsnap.

> 

> URL: https://tracker.ceph.com/issues/48679

> URL: https://tracker.ceph.com/issues/48640

> Signed-off-by: Xiubo Li <xiubli@redhat.com>

> ---

> 

> V2:

> - Fix inode reference leak bug

> 

>  fs/ceph/caps.c | 32 +++++++++++++++++++-------------

>  fs/ceph/snap.c |  6 +++---

>  2 files changed, 22 insertions(+), 16 deletions(-)

> 

> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

> index abbf48fc6230..2f2451d563bd 100644

> --- a/fs/ceph/caps.c

> +++ b/fs/ceph/caps.c

> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>  {

>  	struct inode *inode = &ci->vfs_inode;

>  	int last = 0, put = 0, flushsnaps = 0, wake = 0;

> +	bool check_flushsnaps = false;

>  

> 

> 

> 

>  	spin_lock(&ci->i_ceph_lock);

>  	if (had & CEPH_CAP_PIN)

> @@ -3064,25 +3065,15 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>  		if (--ci->i_wb_ref == 0) {

>  			last++;

>  			put++;

> +			check_flushsnaps = true;

>  		}

>  		dout("put_cap_refs %p wb %d -> %d (?)\n",

>  		     inode, ci->i_wb_ref+1, ci->i_wb_ref);

>  	}

> -	if (had & CEPH_CAP_FILE_WR)

> +	if (had & CEPH_CAP_FILE_WR) {

>  		if (--ci->i_wr_ref == 0) {

>  			last++;

> -			if (__ceph_have_pending_cap_snap(ci)) {

> -				struct ceph_cap_snap *capsnap =

> -					list_last_entry(&ci->i_cap_snaps,

> -							struct ceph_cap_snap,

> -							ci_item);

> -				capsnap->writing = 0;

> -				if (ceph_try_drop_cap_snap(ci, capsnap))

> -					put++;

> -				else if (__ceph_finish_cap_snap(ci, capsnap))

> -					flushsnaps = 1;

> -				wake = 1;

> -			}

> +			check_flushsnaps = true;

>  			if (ci->i_wrbuffer_ref_head == 0 &&

>  			    ci->i_dirty_caps == 0 &&

>  			    ci->i_flushing_caps == 0) {

> @@ -3094,6 +3085,21 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>  			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)

>  				drop_inode_snap_realm(ci);

>  		}

> +	}

> +	if (check_flushsnaps) {

> +		if (__ceph_have_pending_cap_snap(ci)) {

> +			struct ceph_cap_snap *capsnap =

> +				list_last_entry(&ci->i_cap_snaps,

> +						struct ceph_cap_snap,

> +						ci_item);

> +			capsnap->writing = 0;

> +			if (ceph_try_drop_cap_snap(ci, capsnap))

> +				put++;

> +			else if (__ceph_finish_cap_snap(ci, capsnap))

> +				flushsnaps = 1;

> +			wake = 1;

> +		}

> +	}



Ok, so let's assume you're putting Fb. You increment put and set
check_flushsnaps to true. Later, you get down to here and call
ceph_try_drop_cap_snap and it returns true and now you've incremented
"put" twice.

Is that right? Do Fb caps hold two inode references?

Either way, I think this function needs some better
documentation/comments, particularly since you're making a significant
change to how it works.

>  	spin_unlock(&ci->i_ceph_lock);

>  

> 

> 

> 

>  	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),

> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c

> index b611f829cb61..639fb91cc9db 100644

> --- a/fs/ceph/snap.c

> +++ b/fs/ceph/snap.c

> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)

>  	capsnap->context = old_snapc;

>  	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);

>  

> 

> 

> 

> -	if (used & CEPH_CAP_FILE_WR) {

> +	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {

>  		dout("queue_cap_snap %p cap_snap %p snapc %p"

> -		     " seq %llu used WR, now pending\n", inode,

> -		     capsnap, old_snapc, old_snapc->seq);

> +		     " seq %llu used WR | BUFFFER, now pending\n",

> +		     inode, capsnap, old_snapc, old_snapc->seq);

>  		capsnap->writing = 1;

>  	} else {

>  		/* note mtime, size NOW. */


-- 
Jeff Layton <jlayton@kernel.org>
Xiubo Li Jan. 9, 2021, 2:08 a.m. UTC | #2
On 2021/1/9 2:24, Jeff Layton wrote:
> On Thu, 2021-01-07 at 10:30 +0800, xiubli@redhat.com wrote:

>> From: Xiubo Li <xiubli@redhat.com>

>>

>> If the Fb cap is used it means the client is flushing the dirty

>> data to OSD, just defer flushing the capsnap.

>>

>> URL: https://tracker.ceph.com/issues/48679

>> URL: https://tracker.ceph.com/issues/48640

>> Signed-off-by: Xiubo Li <xiubli@redhat.com>

>> ---

>>

>> V2:

>> - Fix inode reference leak bug

>>

>>   fs/ceph/caps.c | 32 +++++++++++++++++++-------------

>>   fs/ceph/snap.c |  6 +++---

>>   2 files changed, 22 insertions(+), 16 deletions(-)

>>

>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

>> index abbf48fc6230..2f2451d563bd 100644

>> --- a/fs/ceph/caps.c

>> +++ b/fs/ceph/caps.c

>> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>>   {

>>   	struct inode *inode = &ci->vfs_inode;

>>   	int last = 0, put = 0, flushsnaps = 0, wake = 0;

>> +	bool check_flushsnaps = false;

>>   

>>

>>

>>

>>   	spin_lock(&ci->i_ceph_lock);

>>   	if (had & CEPH_CAP_PIN)

>> @@ -3064,25 +3065,15 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>>   		if (--ci->i_wb_ref == 0) {

>>   			last++;

>>   			put++;

>> +			check_flushsnaps = true;

>>   		}

>>   		dout("put_cap_refs %p wb %d -> %d (?)\n",

>>   		     inode, ci->i_wb_ref+1, ci->i_wb_ref);

>>   	}

>> -	if (had & CEPH_CAP_FILE_WR)

>> +	if (had & CEPH_CAP_FILE_WR) {

>>   		if (--ci->i_wr_ref == 0) {

>>   			last++;

>> -			if (__ceph_have_pending_cap_snap(ci)) {

>> -				struct ceph_cap_snap *capsnap =

>> -					list_last_entry(&ci->i_cap_snaps,

>> -							struct ceph_cap_snap,

>> -							ci_item);

>> -				capsnap->writing = 0;

>> -				if (ceph_try_drop_cap_snap(ci, capsnap))

>> -					put++;

>> -				else if (__ceph_finish_cap_snap(ci, capsnap))

>> -					flushsnaps = 1;

>> -				wake = 1;

>> -			}

>> +			check_flushsnaps = true;

>>   			if (ci->i_wrbuffer_ref_head == 0 &&

>>   			    ci->i_dirty_caps == 0 &&

>>   			    ci->i_flushing_caps == 0) {

>> @@ -3094,6 +3085,21 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,

>>   			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)

>>   				drop_inode_snap_realm(ci);

>>   		}

>> +	}

>> +	if (check_flushsnaps) {

>> +		if (__ceph_have_pending_cap_snap(ci)) {

>> +			struct ceph_cap_snap *capsnap =

>> +				list_last_entry(&ci->i_cap_snaps,

>> +						struct ceph_cap_snap,

>> +						ci_item);

>> +			capsnap->writing = 0;

>> +			if (ceph_try_drop_cap_snap(ci, capsnap))

>> +				put++;

>> +			else if (__ceph_finish_cap_snap(ci, capsnap))

>> +				flushsnaps = 1;

>> +			wake = 1;

>> +		}

>> +	}

>

> Ok, so let's assume you're putting Fb. You increment put and set

> check_flushsnaps to true. Later, you get down to here and call

> ceph_try_drop_cap_snap and it returns true and now you've incremented

> "put" twice.

>

> Is that right? Do Fb caps hold two inode references?


Yeah, one in ceph_take_cap_refs().

Another one is in ceph_queue_cap_snap() and when `used & (Fb | Fw)` is 
true, the flush capsnap will be delayed by holding the inode ref, so we 
need to put the inode ref here or in __ceph_finish_cap_snap().


> Either way, I think this function needs some better

> documentation/comments, particularly since you're making a significant

> change to how it works.


Okay, I will post the V3 after my back later to add more comments about 
this.

Thanks

>

>>   	spin_unlock(&ci->i_ceph_lock);

>>   

>>

>>

>>

>>   	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),

>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c

>> index b611f829cb61..639fb91cc9db 100644

>> --- a/fs/ceph/snap.c

>> +++ b/fs/ceph/snap.c

>> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)

>>   	capsnap->context = old_snapc;

>>   	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);

>>   

>>

>>

>>

>> -	if (used & CEPH_CAP_FILE_WR) {

>> +	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {

>>   		dout("queue_cap_snap %p cap_snap %p snapc %p"

>> -		     " seq %llu used WR, now pending\n", inode,

>> -		     capsnap, old_snapc, old_snapc->seq);

>> +		     " seq %llu used WR | BUFFFER, now pending\n",

>> +		     inode, capsnap, old_snapc, old_snapc->seq);

>>   		capsnap->writing = 1;

>>   	} else {

>>   		/* note mtime, size NOW. */
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index abbf48fc6230..2f2451d563bd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3047,6 +3047,7 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 {
 	struct inode *inode = &ci->vfs_inode;
 	int last = 0, put = 0, flushsnaps = 0, wake = 0;
+	bool check_flushsnaps = false;
 
 	spin_lock(&ci->i_ceph_lock);
 	if (had & CEPH_CAP_PIN)
@@ -3064,25 +3065,15 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 		if (--ci->i_wb_ref == 0) {
 			last++;
 			put++;
+			check_flushsnaps = true;
 		}
 		dout("put_cap_refs %p wb %d -> %d (?)\n",
 		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
 	}
-	if (had & CEPH_CAP_FILE_WR)
+	if (had & CEPH_CAP_FILE_WR) {
 		if (--ci->i_wr_ref == 0) {
 			last++;
-			if (__ceph_have_pending_cap_snap(ci)) {
-				struct ceph_cap_snap *capsnap =
-					list_last_entry(&ci->i_cap_snaps,
-							struct ceph_cap_snap,
-							ci_item);
-				capsnap->writing = 0;
-				if (ceph_try_drop_cap_snap(ci, capsnap))
-					put++;
-				else if (__ceph_finish_cap_snap(ci, capsnap))
-					flushsnaps = 1;
-				wake = 1;
-			}
+			check_flushsnaps = true;
 			if (ci->i_wrbuffer_ref_head == 0 &&
 			    ci->i_dirty_caps == 0 &&
 			    ci->i_flushing_caps == 0) {
@@ -3094,6 +3085,21 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
 				drop_inode_snap_realm(ci);
 		}
+	}
+	if (check_flushsnaps) {
+		if (__ceph_have_pending_cap_snap(ci)) {
+			struct ceph_cap_snap *capsnap =
+				list_last_entry(&ci->i_cap_snaps,
+						struct ceph_cap_snap,
+						ci_item);
+			capsnap->writing = 0;
+			if (ceph_try_drop_cap_snap(ci, capsnap))
+				put++;
+			else if (__ceph_finish_cap_snap(ci, capsnap))
+				flushsnaps = 1;
+			wake = 1;
+		}
+	}
 	spin_unlock(&ci->i_ceph_lock);
 
 	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b611f829cb61..639fb91cc9db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -561,10 +561,10 @@  void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 	capsnap->context = old_snapc;
 	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
 
-	if (used & CEPH_CAP_FILE_WR) {
+	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
 		dout("queue_cap_snap %p cap_snap %p snapc %p"
-		     " seq %llu used WR, now pending\n", inode,
-		     capsnap, old_snapc, old_snapc->seq);
+		     " seq %llu used WR | BUFFFER, now pending\n",
+		     inode, capsnap, old_snapc, old_snapc->seq);
 		capsnap->writing = 1;
 	} else {
 		/* note mtime, size NOW. */