diff mbox series

[PULL,31/38] device-core: use RCU for list of children of a bus

Message ID 20201012203343.1105018-32-pbonzini@redhat.com
State Superseded
Headers show
Series SCSI, qdev, qtest, meson patches for 2020-10-10 | expand

Commit Message

Paolo Bonzini Oct. 12, 2020, 8:33 p.m. UTC
From: Maxim Levitsky <mlevitsk@redhat.com>

This fixes the race between device emulation code that tries to find
a child device to dispatch the request to (e.g a scsi disk),
and hotplug of a new device to that bus.

Note that this doesn't convert all the readers of the list
but only these that might go over that list without BQL held.

This is a very small first step to make this code thread safe.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200913160259.32145-5-mlevitsk@redhat.com>
[Use RCU_READ_LOCK_GUARD in more places, adjust testcase now that
 the delay in DEVICE_DELETED due to RCU is more consistent. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20201006123904.610658-9-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/core/bus.c          | 28 +++++++++++++++++-----------
 hw/core/qdev.c         | 37 +++++++++++++++++++++++--------------
 hw/scsi/scsi-bus.c     | 12 +++++++++---
 hw/scsi/virtio-scsi.c  |  6 +++++-
 include/hw/qdev-core.h |  9 +++++++++
 5 files changed, 63 insertions(+), 29 deletions(-)

Comments

Eric Blake Oct. 27, 2020, 7:34 p.m. UTC | #1
On 10/12/20 3:33 PM, Paolo Bonzini wrote:
> From: Maxim Levitsky <mlevitsk@redhat.com>

> 

> This fixes the race between device emulation code that tries to find

> a child device to dispatch the request to (e.g a scsi disk),

> and hotplug of a new device to that bus.

> 

> Note that this doesn't convert all the readers of the list

> but only these that might go over that list without BQL held.

> 

> This is a very small first step to make this code thread safe.

> 

> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>

> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>

> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

> Message-Id: <20200913160259.32145-5-mlevitsk@redhat.com>

> [Use RCU_READ_LOCK_GUARD in more places, adjust testcase now that

>  the delay in DEVICE_DELETED due to RCU is more consistent. - Paolo]

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

> Message-Id: <20201006123904.610658-9-mlevitsk@redhat.com>

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>


Git bisect lands on this commit as the reason that iotest 240 is failing:

--- /home/eblake/qemu-tmp2/tests/qemu-iotests/240.out	2020-10-23
10:47:02.268392745 -0500
+++ /home/eblake/qemu-tmp2/build/tests/qemu-iotests/240.out.bad
2020-10-27 14:27:38.417188285 -0500
@@ -10,10 +10,10 @@
 {"return": {}}
 {"return": {}}
 {"return": {}}
+{"error": {"class": "GenericError", "desc": "Duplicate ID 'scsi-hd0'
for device"}}
+{"error": {"class": "DeviceNotFound", "desc": "Device 'scsi-hd0' not
found"}}
 {"return": {}}
-{"return": {}}
-{"return": {}}
-{"return": {}}
+{"error": {"class": "GenericError", "desc": "Node hd0 is in use"}}
 {"return": {}}

 === Attach two SCSI disks using the same block device and the same
iothread ===
@@ -29,7 +29,7 @@
 {"return": {}}
 {"return": {}}
 {"return": {}}
-{"return": {}}
+{"error": {"class": "GenericError", "desc": "Node hd0 is in use"}}
 {"return": {}}

 === Attach two SCSI disks using the same block device but different
iothreads ===
@@ -45,8 +45,8 @@
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "Cannot change iothread of
active block backend"}}
 {"return": {}}
-{"return": {}}
-{"return": {}}
+{"error": {"class": "GenericError", "desc": "Cannot change iothread of
active block backend"}}
+{"error": {"class": "DeviceNotFound", "desc": "Device 'scsi-hd1' not
found"}}
 {"return": {}}
 {"return": {}}
 {"return": {}}
Failures: 240
Failed 1 of 1 iotests


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org
Paolo Bonzini Oct. 27, 2020, 7:48 p.m. UTC | #2
Maxim has already posted a fix for the test, Kevin should pull it.

Paolo

Il mar 27 ott 2020, 20:34 Eric Blake <eblake@redhat.com> ha scritto:

> On 10/12/20 3:33 PM, Paolo Bonzini wrote:

> > From: Maxim Levitsky <mlevitsk@redhat.com>

> >

> > This fixes the race between device emulation code that tries to find

> > a child device to dispatch the request to (e.g a scsi disk),

> > and hotplug of a new device to that bus.

> >

> > Note that this doesn't convert all the readers of the list

> > but only these that might go over that list without BQL held.

> >

> > This is a very small first step to make this code thread safe.

> >

> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>

> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>

> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

> > Message-Id: <20200913160259.32145-5-mlevitsk@redhat.com>

> > [Use RCU_READ_LOCK_GUARD in more places, adjust testcase now that

> >  the delay in DEVICE_DELETED due to RCU is more consistent. - Paolo]

> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

> > Message-Id: <20201006123904.610658-9-mlevitsk@redhat.com>

> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

>

> Git bisect lands on this commit as the reason that iotest 240 is failing:

>

> --- /home/eblake/qemu-tmp2/tests/qemu-iotests/240.out   2020-10-23

> 10:47:02.268392745 -0500

> +++ /home/eblake/qemu-tmp2/build/tests/qemu-iotests/240.out.bad

> 2020-10-27 14:27:38.417188285 -0500

> @@ -10,10 +10,10 @@

>  {"return": {}}

>  {"return": {}}

>  {"return": {}}

> +{"error": {"class": "GenericError", "desc": "Duplicate ID 'scsi-hd0'

> for device"}}

> +{"error": {"class": "DeviceNotFound", "desc": "Device 'scsi-hd0' not

> found"}}

>  {"return": {}}

> -{"return": {}}

> -{"return": {}}

> -{"return": {}}

> +{"error": {"class": "GenericError", "desc": "Node hd0 is in use"}}

>  {"return": {}}

>

>  === Attach two SCSI disks using the same block device and the same

> iothread ===

> @@ -29,7 +29,7 @@

>  {"return": {}}

>  {"return": {}}

>  {"return": {}}

> -{"return": {}}

> +{"error": {"class": "GenericError", "desc": "Node hd0 is in use"}}

>  {"return": {}}

>

>  === Attach two SCSI disks using the same block device but different

> iothreads ===

> @@ -45,8 +45,8 @@

>  {"return": {}}

>  {"error": {"class": "GenericError", "desc": "Cannot change iothread of

> active block backend"}}

>  {"return": {}}

> -{"return": {}}

> -{"return": {}}

> +{"error": {"class": "GenericError", "desc": "Cannot change iothread of

> active block backend"}}

> +{"error": {"class": "DeviceNotFound", "desc": "Device 'scsi-hd1' not

> found"}}

>  {"return": {}}

>  {"return": {}}

>  {"return": {}}

> Failures: 240

> Failed 1 of 1 iotests

>

>

> --

> Eric Blake, Principal Software Engineer

> Red Hat, Inc.           +1-919-301-3226

> Virtualization:  qemu.org | libvirt.org

>

>
<div dir="auto">Maxim has already posted a fix for the test, Kevin should pull it.<div dir="auto"><br></div><div dir="auto">Paolo</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">Il mar 27 ott 2020, 20:34 Eric Blake &lt;<a href="mailto:eblake@redhat.com">eblake@redhat.com</a>&gt; ha scritto:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On 10/12/20 3:33 PM, Paolo Bonzini wrote:<br>
&gt; From: Maxim Levitsky &lt;<a href="mailto:mlevitsk@redhat.com" target="_blank" rel="noreferrer">mlevitsk@redhat.com</a>&gt;<br>
&gt; <br>
&gt; This fixes the race between device emulation code that tries to find<br>
&gt; a child device to dispatch the request to (e.g a scsi disk),<br>
&gt; and hotplug of a new device to that bus.<br>
&gt; <br>
&gt; Note that this doesn&#39;t convert all the readers of the list<br>
&gt; but only these that might go over that list without BQL held.<br>
&gt; <br>
&gt; This is a very small first step to make this code thread safe.<br>
&gt; <br>
&gt; Suggested-by: Paolo Bonzini &lt;<a href="mailto:pbonzini@redhat.com" target="_blank" rel="noreferrer">pbonzini@redhat.com</a>&gt;<br>
&gt; Signed-off-by: Maxim Levitsky &lt;<a href="mailto:mlevitsk@redhat.com" target="_blank" rel="noreferrer">mlevitsk@redhat.com</a>&gt;<br>
&gt; Reviewed-by: Stefan Hajnoczi &lt;<a href="mailto:stefanha@redhat.com" target="_blank" rel="noreferrer">stefanha@redhat.com</a>&gt;<br>
&gt; Message-Id: &lt;<a href="mailto:20200913160259.32145-5-mlevitsk@redhat.com" target="_blank" rel="noreferrer">20200913160259.32145-5-mlevitsk@redhat.com</a>&gt;<br>
&gt; [Use RCU_READ_LOCK_GUARD in more places, adjust testcase now that<br>
&gt;  the delay in DEVICE_DELETED due to RCU is more consistent. - Paolo]<br>
&gt; Signed-off-by: Paolo Bonzini &lt;<a href="mailto:pbonzini@redhat.com" target="_blank" rel="noreferrer">pbonzini@redhat.com</a>&gt;<br>
&gt; Message-Id: &lt;<a href="mailto:20201006123904.610658-9-mlevitsk@redhat.com" target="_blank" rel="noreferrer">20201006123904.610658-9-mlevitsk@redhat.com</a>&gt;<br>
&gt; Signed-off-by: Paolo Bonzini &lt;<a href="mailto:pbonzini@redhat.com" target="_blank" rel="noreferrer">pbonzini@redhat.com</a>&gt;<br>
<br>
Git bisect lands on this commit as the reason that iotest 240 is failing:<br>
<br>
--- /home/eblake/qemu-tmp2/tests/qemu-iotests/240.out   2020-10-23<br>
10:47:02.268392745 -0500<br>
+++ /home/eblake/qemu-tmp2/build/tests/qemu-iotests/240.out.bad<br>
2020-10-27 14:27:38.417188285 -0500<br>
@@ -10,10 +10,10 @@<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;GenericError&quot;, &quot;desc&quot;: &quot;Duplicate ID &#39;scsi-hd0&#39;<br>
for device&quot;}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;DeviceNotFound&quot;, &quot;desc&quot;: &quot;Device &#39;scsi-hd0&#39; not<br>
found&quot;}}<br>
 {&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;GenericError&quot;, &quot;desc&quot;: &quot;Node hd0 is in use&quot;}}<br>
 {&quot;return&quot;: {}}<br>
<br>
 === Attach two SCSI disks using the same block device and the same<br>
iothread ===<br>
@@ -29,7 +29,7 @@<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;GenericError&quot;, &quot;desc&quot;: &quot;Node hd0 is in use&quot;}}<br>
 {&quot;return&quot;: {}}<br>
<br>
 === Attach two SCSI disks using the same block device but different<br>
iothreads ===<br>
@@ -45,8 +45,8 @@<br>
 {&quot;return&quot;: {}}<br>
 {&quot;error&quot;: {&quot;class&quot;: &quot;GenericError&quot;, &quot;desc&quot;: &quot;Cannot change iothread of<br>
active block backend&quot;}}<br>
 {&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
-{&quot;return&quot;: {}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;GenericError&quot;, &quot;desc&quot;: &quot;Cannot change iothread of<br>
active block backend&quot;}}<br>
+{&quot;error&quot;: {&quot;class&quot;: &quot;DeviceNotFound&quot;, &quot;desc&quot;: &quot;Device &#39;scsi-hd1&#39; not<br>
found&quot;}}<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
 {&quot;return&quot;: {}}<br>
Failures: 240<br>
Failed 1 of 1 iotests<br>
<br>
<br>
-- <br>
Eric Blake, Principal Software Engineer<br>
Red Hat, Inc.           +1-919-301-3226<br>
Virtualization:  <a href="http://qemu.org" rel="noreferrer noreferrer" target="_blank">qemu.org</a> | <a href="http://libvirt.org" rel="noreferrer noreferrer" target="_blank">libvirt.org</a><br>
<br>
</blockquote></div>
diff mbox series

Patch

diff --git a/hw/core/bus.c b/hw/core/bus.c
index 6b987b6946..a0483859ae 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -49,12 +49,14 @@  int qbus_walk_children(BusState *bus,
         }
     }
 
-    QTAILQ_FOREACH(kid, &bus->children, sibling) {
-        err = qdev_walk_children(kid->child,
-                                 pre_devfn, pre_busfn,
-                                 post_devfn, post_busfn, opaque);
-        if (err < 0) {
-            return err;
+    WITH_RCU_READ_LOCK_GUARD() {
+        QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) {
+            err = qdev_walk_children(kid->child,
+                                     pre_devfn, pre_busfn,
+                                     post_devfn, post_busfn, opaque);
+            if (err < 0) {
+                return err;
+            }
         }
     }
 
@@ -90,8 +92,10 @@  static void bus_reset_child_foreach(Object *obj, ResettableChildCallback cb,
     BusState *bus = BUS(obj);
     BusChild *kid;
 
-    QTAILQ_FOREACH(kid, &bus->children, sibling) {
-        cb(OBJECT(kid->child), opaque, type);
+    WITH_RCU_READ_LOCK_GUARD() {
+        QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) {
+            cb(OBJECT(kid->child), opaque, type);
+        }
     }
 }
 
@@ -194,9 +198,11 @@  static void bus_set_realized(Object *obj, bool value, Error **errp)
 
         /* TODO: recursive realization */
     } else if (!value && bus->realized) {
-        QTAILQ_FOREACH(kid, &bus->children, sibling) {
-            DeviceState *dev = kid->child;
-            qdev_unrealize(dev);
+        WITH_RCU_READ_LOCK_GUARD() {
+            QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) {
+                DeviceState *dev = kid->child;
+                qdev_unrealize(dev);
+            }
         }
         if (bc->unrealize) {
             bc->unrealize(bus);
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 74db78df36..59e5e710b7 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -51,6 +51,12 @@  const VMStateDescription *qdev_get_vmsd(DeviceState *dev)
     return dc->vmsd;
 }
 
+static void bus_free_bus_child(BusChild *kid)
+{
+    object_unref(OBJECT(kid->child));
+    g_free(kid);
+}
+
 static void bus_remove_child(BusState *bus, DeviceState *child)
 {
     BusChild *kid;
@@ -60,15 +66,16 @@  static void bus_remove_child(BusState *bus, DeviceState *child)
             char name[32];
 
             snprintf(name, sizeof(name), "child[%d]", kid->index);
-            QTAILQ_REMOVE(&bus->children, kid, sibling);
+            QTAILQ_REMOVE_RCU(&bus->children, kid, sibling);
 
             bus->num_children--;
 
             /* This gives back ownership of kid->child back to us.  */
             object_property_del(OBJECT(bus), name);
-            object_unref(OBJECT(kid->child));
-            g_free(kid);
-            return;
+
+            /* free the bus kid, when it is safe to do so*/
+            call_rcu(kid, bus_free_bus_child, rcu);
+            break;
         }
     }
 }
@@ -83,7 +90,7 @@  static void bus_add_child(BusState *bus, DeviceState *child)
     kid->child = child;
     object_ref(OBJECT(kid->child));
 
-    QTAILQ_INSERT_HEAD(&bus->children, kid, sibling);
+    QTAILQ_INSERT_HEAD_RCU(&bus->children, kid, sibling);
 
     /* This transfers ownership of kid->child to the property.  */
     snprintf(name, sizeof(name), "child[%d]", kid->index);
@@ -672,17 +679,19 @@  DeviceState *qdev_find_recursive(BusState *bus, const char *id)
     DeviceState *ret;
     BusState *child;
 
-    QTAILQ_FOREACH(kid, &bus->children, sibling) {
-        DeviceState *dev = kid->child;
+    WITH_RCU_READ_LOCK_GUARD() {
+        QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) {
+            DeviceState *dev = kid->child;
 
-        if (dev->id && strcmp(dev->id, id) == 0) {
-            return dev;
-        }
+            if (dev->id && strcmp(dev->id, id) == 0) {
+                return dev;
+            }
 
-        QLIST_FOREACH(child, &dev->child_bus, sibling) {
-            ret = qdev_find_recursive(child, id);
-            if (ret) {
-                return ret;
+            QLIST_FOREACH(child, &dev->child_bus, sibling) {
+                ret = qdev_find_recursive(child, id);
+                if (ret) {
+                    return ret;
+                }
             }
         }
     }
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 6b1ed7ae9a..4cf1f404b4 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -400,7 +400,10 @@  static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
     id = r->req.dev->id;
     found_lun0 = false;
     n = 0;
-    QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) {
+
+    RCU_READ_LOCK_GUARD();
+
+    QTAILQ_FOREACH_RCU(kid, &r->req.bus->qbus.children, sibling) {
         DeviceState *qdev = kid->child;
         SCSIDevice *dev = SCSI_DEVICE(qdev);
 
@@ -421,7 +424,7 @@  static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
     memset(r->buf, 0, len);
     stl_be_p(&r->buf[0], n);
     i = found_lun0 ? 8 : 16;
-    QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) {
+    QTAILQ_FOREACH_RCU(kid, &r->req.bus->qbus.children, sibling) {
         DeviceState *qdev = kid->child;
         SCSIDevice *dev = SCSI_DEVICE(qdev);
 
@@ -430,6 +433,7 @@  static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
             i += 8;
         }
     }
+
     assert(i == n + 8);
     r->len = len;
     return true;
@@ -1572,7 +1576,8 @@  SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun)
     BusChild *kid;
     SCSIDevice *target_dev = NULL;
 
-    QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
+    RCU_READ_LOCK_GUARD();
+    QTAILQ_FOREACH_RCU(kid, &bus->qbus.children, sibling) {
         DeviceState *qdev = kid->child;
         SCSIDevice *dev = SCSI_DEVICE(qdev);
 
@@ -1591,6 +1596,7 @@  SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun)
             }
         }
     }
+
     return target_dev;
 }
 
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 3a71ea7097..971afbb217 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -367,12 +367,16 @@  static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
     case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
         target = req->req.tmf.lun[1];
         s->resetting++;
-        QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+
+        rcu_read_lock();
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
              d = SCSI_DEVICE(kid->child);
              if (d->channel == 0 && d->id == target) {
                 qdev_reset_all(&d->qdev);
              }
         }
+        rcu_read_unlock();
+
         s->resetting--;
         break;
 
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 14d476c587..2c6307e3ed 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -3,6 +3,8 @@ 
 
 #include "qemu/queue.h"
 #include "qemu/bitmap.h"
+#include "qemu/rcu.h"
+#include "qemu/rcu_queue.h"
 #include "qom/object.h"
 #include "hw/hotplug.h"
 #include "hw/resettable.h"
@@ -238,6 +240,7 @@  struct BusClass {
 };
 
 typedef struct BusChild {
+    struct rcu_head rcu;
     DeviceState *child;
     int index;
     QTAILQ_ENTRY(BusChild) sibling;
@@ -258,6 +261,12 @@  struct BusState {
     int max_index;
     bool realized;
     int num_children;
+
+    /*
+     * children is a RCU QTAILQ, thus readers must use RCU to access it,
+     * and writers must hold the big qemu lock
+     */
+
     QTAILQ_HEAD(, BusChild) children;
     QLIST_ENTRY(BusState) sibling;
     ResettableState reset;