diff mbox series

[2/5] block/io: introduce bdrv_co_range_try_lock

Message ID 20200620143649.225852-3-vsementsov@virtuozzo.com
State New
Headers show
Series preallocate filter | expand

Commit Message

Vladimir Sementsov-Ogievskiy June 20, 2020, 2:36 p.m. UTC
Introduce an interface to make a "critical section" on top of
serialising requests feature. This is needed to avoid intersecting with
other requests during a set of operations. Will be used in a next
commit to implement preallocate filter.

To keep assertions during intersecting requests handling, we need to
add _locked versions of read/write requests, to be used inside locked
section. The following commit will need only locked version of
write-zeroes, so add only it.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/block.h     |  9 ++++
 include/block/block_int.h | 16 ++++++++
 include/qemu/typedefs.h   |  1 +
 block/io.c                | 86 +++++++++++++++++++++++++++++++--------
 4 files changed, 95 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/include/block/block.h b/include/block/block.h
index 25e299605e..c4b2a493b4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -391,6 +391,15 @@  int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
  */
 int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
                                        int bytes, BdrvRequestFlags flags);
+
+/*
+ * Version of bdrv_co_pwrite_zeroes to be used inside bdrv_co_range_try_lock()
+ * .. bdrv_co_range_unlock() pair.
+ */
+int coroutine_fn bdrv_co_pwrite_zeroes_locked(BdrvChild *child,
+    int64_t offset, int bytes, BdrvRequestFlags flags,
+    BdrvTrackedRequest *lock);
+
 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
     const char *backing_file);
 void bdrv_refresh_filename(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 791de6a59c..9ec56f1825 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -66,6 +66,7 @@  enum BdrvTrackedRequestType {
     BDRV_TRACKED_WRITE,
     BDRV_TRACKED_DISCARD,
     BDRV_TRACKED_TRUNCATE,
+    BDRV_TRACKED_LOCK,
 };
 
 typedef struct BdrvTrackedRequest {
@@ -83,6 +84,12 @@  typedef struct BdrvTrackedRequest {
     CoQueue wait_queue; /* coroutines blocked on this request */
 
     struct BdrvTrackedRequest *waiting_for;
+
+    /*
+     * If non-zero, the request is under lock, so it's allowed to intersect
+     * (actully it must be inside) the @lock request.
+     */
+    struct BdrvTrackedRequest *lock;
 } BdrvTrackedRequest;
 
 struct BlockDriver {
@@ -994,6 +1001,15 @@  int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
     int64_t offset, unsigned int bytes,
     QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
+/*
+ * Creates serializing BDRV_TRACKED_LOCK request if no conflicts, on success
+ * return pointer to it, to be used in bdrv_co_range_unlock().
+ */
+BdrvTrackedRequest *coroutine_fn bdrv_co_range_try_lock(BlockDriverState *bs,
+                                                        int64_t offset,
+                                                        int64_t bytes);
+void coroutine_fn bdrv_co_range_unlock(BdrvTrackedRequest *req);
+
 static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
     int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
 {
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index ecf3cde26c..e2bbe3af96 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -28,6 +28,7 @@  typedef struct Aml Aml;
 typedef struct AnnounceTimer AnnounceTimer;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
 typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
+typedef struct BdrvTrackedRequest BdrvTrackedRequest;
 typedef struct BlockBackend BlockBackend;
 typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
diff --git a/block/io.c b/block/io.c
index 60cee1d759..0de72cdb4c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -691,7 +691,8 @@  static void tracked_request_begin(BdrvTrackedRequest *req,
                                   BlockDriverState *bs,
                                   int64_t offset,
                                   uint64_t bytes,
-                                  enum BdrvTrackedRequestType type)
+                                  enum BdrvTrackedRequestType type,
+                                  BdrvTrackedRequest *lock)
 {
     assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
 
@@ -704,6 +705,7 @@  static void tracked_request_begin(BdrvTrackedRequest *req,
         .serialising    = false,
         .overlap_offset = offset,
         .overlap_bytes  = bytes,
+        .lock           = lock,
     };
 
     qemu_co_queue_init(&req->wait_queue);
@@ -745,15 +747,26 @@  static bool coroutine_fn wait_or_find_conflicts(BdrvTrackedRequest *self,
             if (tracked_request_overlaps(req, self->overlap_offset,
                                          self->overlap_bytes))
             {
-                /* Hitting this means there was a reentrant request, for
-                 * example, a block driver issuing nested requests.  This must
-                 * never happen since it means deadlock.
+                if (self->lock == req) {
+                    /* This is atomic request under range_lock */
+                    assert(req->type == BDRV_TRACKED_LOCK);
+                    assert(self->offset >= req->offset);
+                    assert(self->bytes <= req->bytes);
+                    continue;
+                }
+                /*
+                 * Hitting this means there was a reentrant request (except for
+                 * range_lock, handled above), for example, a block driver
+                 * issuing nested requests.  This must never happen since it
+                 * means deadlock.
                  */
                 assert(qemu_coroutine_self() != req->co);
 
-                /* If the request is already (indirectly) waiting for us, or
+                /*
+                 * If the request is already (indirectly) waiting for us, or
                  * will wait for us as soon as it wakes up, then just go on
-                 * (instead of producing a deadlock in the former case). */
+                 * (instead of producing a deadlock in the former case).
+                 */
                 if (!req->waiting_for) {
                     if (!wait) {
                         return true;
@@ -821,7 +834,6 @@  bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
 }
 
 /* Return true on success, false if there are some conflicts */
-__attribute__ ((unused))
 static bool bdrv_try_mark_request_serialising(BdrvTrackedRequest *req,
                                               uint64_t align)
 {
@@ -1796,7 +1808,7 @@  int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 
     bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
 
-    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
+    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ, NULL);
     ret = bdrv_aligned_preadv(child, &req, offset, bytes,
                               bs->bl.request_alignment,
                               qiov, qiov_offset, flags);
@@ -2169,9 +2181,9 @@  int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
 }
 
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+static int coroutine_fn bdrv_co_pwritev_part_locked(BdrvChild *child,
     int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
-    BdrvRequestFlags flags)
+    BdrvRequestFlags flags, BdrvTrackedRequest *lock)
 {
     BlockDriverState *bs = child->bs;
     BdrvTrackedRequest req;
@@ -2215,7 +2227,7 @@  int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
      * Pad qiov with the read parts and be sure to have a tracked request not
      * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
      */
-    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
+    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE, lock);
 
     if (flags & BDRV_REQ_ZERO_WRITE) {
         ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
@@ -2239,8 +2251,23 @@  out:
     return ret;
 }
 
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+    BdrvRequestFlags flags)
+{
+    return bdrv_co_pwritev_part_locked(child, offset, bytes, qiov, qiov_offset,
+                                       flags, NULL);
+}
+
 int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
                                        int bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes_locked(child, offset, bytes, flags, NULL);
+}
+
+int coroutine_fn bdrv_co_pwrite_zeroes_locked(BdrvChild *child, int64_t offset,
+                                              int bytes, BdrvRequestFlags flags,
+                                              BdrvTrackedRequest *lock)
 {
     trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
 
@@ -2248,8 +2275,8 @@  int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
         flags &= ~BDRV_REQ_MAY_UNMAP;
     }
 
-    return bdrv_co_pwritev(child, offset, bytes, NULL,
-                           BDRV_REQ_ZERO_WRITE | flags);
+    return bdrv_co_pwritev_part_locked(child, offset, bytes, NULL, 0,
+                                       BDRV_REQ_ZERO_WRITE | flags, lock);
 }
 
 /*
@@ -2980,7 +3007,7 @@  int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
     tail = (offset + bytes) % align;
 
     bdrv_inc_in_flight(bs);
-    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
+    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD, NULL);
 
     ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
     if (ret < 0) {
@@ -3252,7 +3279,7 @@  static int coroutine_fn bdrv_co_copy_range_internal(
     if (recurse_src) {
         bdrv_inc_in_flight(src->bs);
         tracked_request_begin(&req, src->bs, src_offset, bytes,
-                              BDRV_TRACKED_READ);
+                              BDRV_TRACKED_READ, NULL);
 
         /* BDRV_REQ_SERIALISING is only for write operation */
         assert(!(read_flags & BDRV_REQ_SERIALISING));
@@ -3269,7 +3296,7 @@  static int coroutine_fn bdrv_co_copy_range_internal(
     } else {
         bdrv_inc_in_flight(dst->bs);
         tracked_request_begin(&req, dst->bs, dst_offset, bytes,
-                              BDRV_TRACKED_WRITE);
+                              BDRV_TRACKED_WRITE, NULL);
         ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
                                         write_flags);
         if (!ret) {
@@ -3381,7 +3408,7 @@  int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
 
     bdrv_inc_in_flight(bs);
     tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
-                          BDRV_TRACKED_TRUNCATE);
+                          BDRV_TRACKED_TRUNCATE, NULL);
 
     /* If we are growing the image and potentially using preallocation for the
      * new area, we need to make sure that no write requests are made to it
@@ -3494,3 +3521,28 @@  int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
 
     return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
 }
+
+BdrvTrackedRequest *coroutine_fn bdrv_co_range_try_lock(BlockDriverState *bs,
+                                                        int64_t offset,
+                                                        int64_t bytes)
+{
+    BdrvTrackedRequest *req = g_new(BdrvTrackedRequest, 1);
+    bool success;
+
+    tracked_request_begin(req, bs, offset, bytes, BDRV_TRACKED_LOCK, NULL);
+    success = bdrv_try_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+    if (!success) {
+        g_free(req);
+        req = NULL;
+    }
+
+    return req;
+}
+
+void coroutine_fn bdrv_co_range_unlock(BdrvTrackedRequest *req)
+{
+    assert(req->type == BDRV_TRACKED_LOCK);
+
+    tracked_request_end(req);
+    g_free(req);
+}