diff mbox series

[v2,5/9] block/io: expand in_flight inc/dec section: simple cases

Message ID 20200427143907.5710-6-vsementsov@virtuozzo.com
State New
Headers show
Series block/io: safer inc/dec in_flight sections | expand

Commit Message

Vladimir Sementsov-Ogievskiy April 27, 2020, 2:39 p.m. UTC
It's safer to expand in_flight request to start before enter to
coroutine in synchronous wrappers, due to the following (theoretical)
problem:

Consider write.
It's possible, that qemu_coroutine_enter only schedules execution,
assume such case.

Then we may possibly have the following:

1. Somehow check that we are not in drained section in outer code.

2. Call bdrv_pwritev(), assuming that it will increase in_flight, which
will protect us from starting drained section.

3. It calls bdrv_prwv_co() -> bdrv_coroutine_enter() (not yet increased
in_flight).

4. Assume coroutine not yet actually entered, only scheduled, and we go
to some code, which starts drained section (as in_flight is zero).

5. Scheduled coroutine starts, and blindly increases in_flight, and we
are in drained section with in_flight request.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/io.c | 161 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 124 insertions(+), 37 deletions(-)
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index 061f3f2590..a91d8c1e21 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1511,7 +1511,8 @@  int coroutine_fn bdrv_co_preadv(BdrvChild *child,
     return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
 }
 
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_preadv_part(BdrvChild *child,
     int64_t offset, unsigned int bytes,
     QEMUIOVector *qiov, size_t qiov_offset,
     BdrvRequestFlags flags)
@@ -1540,8 +1541,6 @@  int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
         return 0;
     }
 
-    bdrv_inc_in_flight(bs);
-
     /* Don't do copy-on-read if we read data before write operation */
     if (atomic_read(&bs->copy_on_read)) {
         flags |= BDRV_REQ_COPY_ON_READ;
@@ -1554,13 +1553,26 @@  int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
                               bs->bl.request_alignment,
                               qiov, qiov_offset, flags);
     tracked_request_end(&req);
-    bdrv_dec_in_flight(bs);
 
     bdrv_padding_destroy(&pad);
 
     return ret;
 }
 
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+    int64_t offset, unsigned int bytes,
+    QEMUIOVector *qiov, size_t qiov_offset,
+    BdrvRequestFlags flags)
+{
+    int ret;
+
+    bdrv_inc_in_flight(child->bs);
+    ret = bdrv_do_preadv_part(child, offset, bytes, qiov, qiov_offset, flags);
+    bdrv_dec_in_flight(child->bs);
+
+    return ret;
+}
+
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int bytes, BdrvRequestFlags flags)
 {
@@ -1922,7 +1934,8 @@  int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
 }
 
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_pwritev_part(BdrvChild *child,
     int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
     BdrvRequestFlags flags)
 {
@@ -1962,7 +1975,6 @@  int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
         return 0;
     }
 
-    bdrv_inc_in_flight(bs);
     /*
      * Align write if necessary by performing a read-modify-write cycle.
      * Pad qiov with the read parts and be sure to have a tracked request not
@@ -1987,7 +1999,19 @@  int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 
 out:
     tracked_request_end(&req);
-    bdrv_dec_in_flight(bs);
+
+    return ret;
+}
+
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+    BdrvRequestFlags flags)
+{
+    int ret;
+
+    bdrv_inc_in_flight(child->bs);
+    ret = bdrv_do_pwritev_part(child, offset, bytes, qiov, qiov_offset, flags);
+    bdrv_dec_in_flight(child->bs);
 
     return ret;
 }
@@ -2014,17 +2038,18 @@  typedef struct RwCo {
     BdrvRequestFlags flags;
 } RwCo;
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 {
     RwCo *rwco = opaque;
 
     if (!rwco->is_write) {
-        rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
-                                   rwco->qiov->size, rwco->qiov,
+        rwco->ret = bdrv_do_preadv_part(rwco->child, rwco->offset,
+                                   rwco->qiov->size, rwco->qiov, 0,
                                    rwco->flags);
     } else {
-        rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
-                                    rwco->qiov->size, rwco->qiov,
+        rwco->ret = bdrv_do_pwritev_part(rwco->child, rwco->offset,
+                                    rwco->qiov->size, rwco->qiov, 0,
                                     rwco->flags);
     }
     aio_wait_kick();
@@ -2047,6 +2072,8 @@  static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
         .flags = flags,
     };
 
+    bdrv_inc_in_flight(child->bs);
+
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
         bdrv_rw_co_entry(&rwco);
@@ -2055,6 +2082,9 @@  static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
         bdrv_coroutine_enter(child->bs, co);
         BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
     }
+
+    bdrv_dec_in_flight(child->bs);
+
     return rwco.ret;
 }
 
@@ -2699,15 +2729,14 @@  typedef struct BdrvVmstateCo {
     int                 ret;
 } BdrvVmstateCo;
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+bdrv_do_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
                    bool is_read)
 {
     BlockDriver *drv = bs->drv;
     int ret = -ENOTSUP;
 
-    bdrv_inc_in_flight(bs);
-
     if (!drv) {
         ret = -ENOMEDIUM;
     } else if (drv->bdrv_load_vmstate) {
@@ -2717,17 +2746,19 @@  bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
             ret = drv->bdrv_save_vmstate(bs, qiov, pos);
         }
     } else if (bs->file) {
-        ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+        bdrv_inc_in_flight(bs->file->bs);
+        ret = bdrv_do_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+        bdrv_dec_in_flight(bs->file->bs);
     }
 
-    bdrv_dec_in_flight(bs);
     return ret;
 }
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
 {
     BdrvVmstateCo *co = opaque;
-    co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+    co->ret = bdrv_do_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
     aio_wait_kick();
 }
 
@@ -2735,8 +2766,12 @@  static inline int
 bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
                 bool is_read)
 {
+    int ret;
+
+    bdrv_inc_in_flight(bs);
+
     if (qemu_in_coroutine()) {
-        return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+        ret = bdrv_do_rw_vmstate(bs, qiov, pos, is_read);
     } else {
         BdrvVmstateCo data = {
             .bs         = bs,
@@ -2749,8 +2784,12 @@  bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
 
         bdrv_coroutine_enter(bs, co);
         BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
-        return data.ret;
+        ret = data.ret;
     }
+
+    bdrv_dec_in_flight(bs);
+
+    return ret;
 }
 
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
@@ -2828,16 +2867,14 @@  void bdrv_aio_cancel_async(BlockAIOCB *acb)
 /**************************************************************/
 /* Coroutine block device emulation */
 
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_flush(BlockDriverState *bs)
 {
     int current_gen;
-    int ret = 0;
-
-    bdrv_inc_in_flight(bs);
+    int ret;
 
-    if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
-        bdrv_is_sg(bs)) {
-        goto early_exit;
+    if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) {
+        return 0;
     }
 
     qemu_co_mutex_lock(&bs->reqs_lock);
@@ -2935,8 +2972,17 @@  out:
     qemu_co_queue_next(&bs->flush_queue);
     qemu_co_mutex_unlock(&bs->reqs_lock);
 
-early_exit:
+    return ret;
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+    int ret;
+
+    bdrv_inc_in_flight(bs);
+    ret = bdrv_do_flush(bs);
     bdrv_dec_in_flight(bs);
+
     return ret;
 }
 
@@ -2945,11 +2991,12 @@  typedef struct FlushCo {
     int ret;
 } FlushCo;
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 {
     FlushCo *rwco = opaque;
 
-    rwco->ret = bdrv_co_flush(rwco->bs);
+    rwco->ret = bdrv_do_flush(rwco->bs);
     aio_wait_kick();
 }
 
@@ -2961,6 +3008,8 @@  int bdrv_flush(BlockDriverState *bs)
         .ret = NOT_DONE,
     };
 
+    bdrv_inc_in_flight(bs);
+
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
         bdrv_flush_co_entry(&flush_co);
@@ -2970,11 +3019,14 @@  int bdrv_flush(BlockDriverState *bs)
         BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
     }
 
+    bdrv_dec_in_flight(bs);
+
     return flush_co.ret;
 }
 
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
-                                  int64_t bytes)
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
+static int coroutine_fn bdrv_do_pdiscard(BdrvChild *child, int64_t offset,
+                                         int64_t bytes)
 {
     BdrvTrackedRequest req;
     int max_pdiscard, ret;
@@ -3012,7 +3064,6 @@  int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
     head = offset % align;
     tail = (offset + bytes) % align;
 
-    bdrv_inc_in_flight(bs);
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
 
     ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
@@ -3083,7 +3134,18 @@  int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
 out:
     bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret);
     tracked_request_end(&req);
-    bdrv_dec_in_flight(bs);
+    return ret;
+}
+
+int coroutine_fn bdrv_co_pdiscard(BdrvChild *child,
+                                  int64_t offset, int64_t bytes)
+{
+    int ret;
+
+    bdrv_inc_in_flight(child->bs);
+    ret = bdrv_do_pdiscard(child, offset, bytes);
+    bdrv_dec_in_flight(child->bs);
+
     return ret;
 }
 
@@ -3094,11 +3156,12 @@  typedef struct DiscardCo {
     int ret;
 } DiscardCo;
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
 {
     DiscardCo *rwco = opaque;
 
-    rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
+    rwco->ret = bdrv_do_pdiscard(rwco->child, rwco->offset, rwco->bytes);
     aio_wait_kick();
 }
 
@@ -3112,6 +3175,8 @@  int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
         .ret = NOT_DONE,
     };
 
+    bdrv_inc_in_flight(child->bs);
+
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
         bdrv_pdiscard_co_entry(&rwco);
@@ -3121,6 +3186,8 @@  int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
         BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
     }
 
+    bdrv_dec_in_flight(child->bs);
+
     return rwco.ret;
 }
 
@@ -3411,9 +3478,12 @@  static void bdrv_parent_cb_resize(BlockDriverState *bs)
  * If 'exact' is true, the file must be resized to exactly the given
  * 'offset'.  Otherwise, it is sufficient for the node to be at least
  * 'offset' bytes in length.
+ *
+ * To be called between exactly one pair of bdrv_inc/dec_in_flight()
  */
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
-                                  PreallocMode prealloc, Error **errp)
+static int coroutine_fn bdrv_do_truncate(BdrvChild *child,
+                                         int64_t offset, bool exact,
+                                         PreallocMode prealloc, Error **errp)
 {
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
@@ -3444,7 +3514,6 @@  int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
         new_bytes = 0;
     }
 
-    bdrv_inc_in_flight(bs);
     tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
                           BDRV_TRACKED_TRUNCATE);
 
@@ -3493,6 +3562,19 @@  int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
 
 out:
     tracked_request_end(&req);
+
+    return ret;
+}
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child,
+                                  int64_t offset, bool exact,
+                                  PreallocMode prealloc, Error **errp)
+{
+    int ret;
+    BlockDriverState *bs = child->bs;
+
+    bdrv_inc_in_flight(bs);
+    ret = bdrv_do_truncate(child, offset, exact, prealloc, errp);
     bdrv_dec_in_flight(bs);
 
     return ret;
@@ -3507,10 +3589,11 @@  typedef struct TruncateCo {
     int ret;
 } TruncateCo;
 
+/* To be called between exactly one pair of bdrv_inc/dec_in_flight() */
 static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
 {
     TruncateCo *tco = opaque;
-    tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
+    tco->ret = bdrv_do_truncate(tco->child, tco->offset, tco->exact,
                                 tco->prealloc, tco->errp);
     aio_wait_kick();
 }
@@ -3528,6 +3611,8 @@  int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
         .ret        = NOT_DONE,
     };
 
+    bdrv_inc_in_flight(child->bs);
+
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
         bdrv_truncate_co_entry(&tco);
@@ -3537,5 +3622,7 @@  int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
         BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
     }
 
+    bdrv_dec_in_flight(child->bs);
+
     return tco.ret;
 }