diff mbox series

rbd: always kick acquire on "acquired" and "released" notifications

Message ID 20210706171137.851-1-idryomov@gmail.com
State New
Headers show
Series rbd: always kick acquire on "acquired" and "released" notifications | expand

Commit Message

Ilya Dryomov July 6, 2021, 5:11 p.m. UTC
Skipping the "lock has been released" notification if the lock owner
is not what we expect based on owner_cid can lead to I/O hangs.
One example is our own notifications: because owner_cid is cleared
in rbd_unlock(), when we get our own notification it is processed as
unexpected/duplicate and maybe_kick_acquire() isn't called.  If a peer
that requested the lock then doesn't go through with acquiring it,
I/O requests that came in while the lock was being quiesced would
be stalled until another I/O request is submitted and kicks acquire
from rbd_img_exclusive_lock().

This makes the comment in rbd_release_lock() actually true: prior to
this change the canceled work was being requeued in response to the
"lock has been acquired" notification from rbd_handle_acquired_lock().

Cc: stable@vger.kernel.org # 5.3+
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index be033e179299..a3caecae5d6a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4203,15 +4203,11 @@  static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
 		down_write(&rbd_dev->lock_rwsem);
 		if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-			/*
-			 * we already know that the remote client is
-			 * the owner
-			 */
-			up_write(&rbd_dev->lock_rwsem);
-			return;
+			dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
+			     __func__, rbd_dev, cid.gid, cid.handle);
+		} else {
+			rbd_set_owner_cid(rbd_dev, &cid);
 		}
-
-		rbd_set_owner_cid(rbd_dev, &cid);
 		downgrade_write(&rbd_dev->lock_rwsem);
 	} else {
 		down_read(&rbd_dev->lock_rwsem);
@@ -4236,14 +4232,12 @@  static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
 		down_write(&rbd_dev->lock_rwsem);
 		if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-			dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
+			dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
 			     __func__, rbd_dev, cid.gid, cid.handle,
 			     rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
-			up_write(&rbd_dev->lock_rwsem);
-			return;
+		} else {
+			rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
 		}
-
-		rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
 		downgrade_write(&rbd_dev->lock_rwsem);
 	} else {
 		down_read(&rbd_dev->lock_rwsem);