diff mbox series

[PULL,03/16] ssi: cache SSIPeripheralClass to avoid GET_CLASS()

Message ID 20221025152042.278287-4-clg@kaod.org
State New
Headers show
Series None | expand

Commit Message

Cédric Le Goater Oct. 25, 2022, 3:20 p.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>

Investigating why some BMC models are so slow compared to a plain ARM
virt machines I did some profiling of:

  ./qemu-system-arm -M romulus-bmc -nic user \
    -drive
    file=obmc-phosphor-image-romulus.static.mtd,format=raw,if=mtd \
    -nographic -serial mon:stdio

And saw that object_class_dynamic_cast_assert was dominating the
profile times. We have a number of cases in this model of the SSI bus.
As the class is static once the object is created we just cache it and
use it instead of the dynamic case macros.

Profiling against:

  ./tests/venv/bin/avocado run \
    tests/avocado/machine_aspeed.py:test_arm_ast2500_romulus_openbmc_v2_9_0

Before: 35.565 s ±  0.087 s
After: 15.713 s ±  0.287 s

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Cc: Cédric Le Goater <clg@kaod.org>
Tested-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20220811151413.3350684-6-alex.bennee@linaro.org>
Message-Id: <20220923084803.498337-6-clg@kaod.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ssi/ssi.h |  3 +++
 hw/ssi/ssi.c         | 18 ++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

Comments

Philippe Mathieu-Daudé May 12, 2023, 4:02 a.m. UTC | #1
Hi Alex,

On 25/10/22 17:20, Cédric Le Goater wrote:
> From: Alex Bennée <alex.bennee@linaro.org>
> 
> Investigating why some BMC models are so slow compared to a plain ARM
> virt machines I did some profiling of:
> 
>    ./qemu-system-arm -M romulus-bmc -nic user \
>      -drive
>      file=obmc-phosphor-image-romulus.static.mtd,format=raw,if=mtd \
>      -nographic -serial mon:stdio
> 
> And saw that object_class_dynamic_cast_assert was dominating the
> profile times. We have a number of cases in this model of the SSI bus.
> As the class is static once the object is created we just cache it and
> use it instead of the dynamic case macros.
> 
> Profiling against:
> 
>    ./tests/venv/bin/avocado run \
>      tests/avocado/machine_aspeed.py:test_arm_ast2500_romulus_openbmc_v2_9_0
> 
> Before: 35.565 s ±  0.087 s
> After: 15.713 s ±  0.287 s

Do you remember if you were using --enable-qom-cast-debug when
profiling this?

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Cc: Cédric Le Goater <clg@kaod.org>
> Tested-by: Cédric Le Goater <clg@kaod.org>
> Reviewed-by: Cédric Le Goater <clg@kaod.org>
> Message-Id: <20220811151413.3350684-6-alex.bennee@linaro.org>
> Message-Id: <20220923084803.498337-6-clg@kaod.org>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>   include/hw/ssi/ssi.h |  3 +++
>   hw/ssi/ssi.c         | 18 ++++++++----------
>   2 files changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
> index f411858ab083..6950f86810d3 100644
> --- a/include/hw/ssi/ssi.h
> +++ b/include/hw/ssi/ssi.h
> @@ -59,6 +59,9 @@ struct SSIPeripheralClass {
>   struct SSIPeripheral {
>       DeviceState parent_obj;
>   
> +    /* cache the class */
> +    SSIPeripheralClass *spc;
> +
>       /* Chip select state */
>       bool cs;
>   };
> diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
> index 003931fb509e..d54a109beeb5 100644
> --- a/hw/ssi/ssi.c
> +++ b/hw/ssi/ssi.c
> @@ -38,9 +38,8 @@ static void ssi_cs_default(void *opaque, int n, int level)
>       bool cs = !!level;
>       assert(n == 0);
>       if (s->cs != cs) {
> -        SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(s);
> -        if (ssc->set_cs) {
> -            ssc->set_cs(s, cs);
> +        if (s->spc->set_cs) {
> +            s->spc->set_cs(s, cs);
>           }
>       }
>       s->cs = cs;
> @@ -48,11 +47,11 @@ static void ssi_cs_default(void *opaque, int n, int level)
>   
>   static uint32_t ssi_transfer_raw_default(SSIPeripheral *dev, uint32_t val)
>   {
> -    SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(dev);
> +    SSIPeripheralClass *ssc = dev->spc;
>   
>       if ((dev->cs && ssc->cs_polarity == SSI_CS_HIGH) ||
> -            (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
> -            ssc->cs_polarity == SSI_CS_NONE) {
> +        (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
> +        ssc->cs_polarity == SSI_CS_NONE) {
>           return ssc->transfer(dev, val);
>       }
>       return 0;
> @@ -67,6 +66,7 @@ static void ssi_peripheral_realize(DeviceState *dev, Error **errp)
>               ssc->cs_polarity != SSI_CS_NONE) {
>           qdev_init_gpio_in_named(dev, ssi_cs_default, SSI_GPIO_CS, 1);
>       }
> +    s->spc = ssc;
>   
>       ssc->realize(s, errp);
>   }
> @@ -115,13 +115,11 @@ uint32_t ssi_transfer(SSIBus *bus, uint32_t val)
>   {
>       BusState *b = BUS(bus);
>       BusChild *kid;
> -    SSIPeripheralClass *ssc;
>       uint32_t r = 0;
>   
>       QTAILQ_FOREACH(kid, &b->children, sibling) {
> -        SSIPeripheral *peripheral = SSI_PERIPHERAL(kid->child);
> -        ssc = SSI_PERIPHERAL_GET_CLASS(peripheral);
> -        r |= ssc->transfer_raw(peripheral, val);
> +        SSIPeripheral *p = SSI_PERIPHERAL(kid->child);
> +        r |= p->spc->transfer_raw(p, val);
>       }
>   
>       return r;
diff mbox series

Patch

diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
index f411858ab083..6950f86810d3 100644
--- a/include/hw/ssi/ssi.h
+++ b/include/hw/ssi/ssi.h
@@ -59,6 +59,9 @@  struct SSIPeripheralClass {
 struct SSIPeripheral {
     DeviceState parent_obj;
 
+    /* cache the class */
+    SSIPeripheralClass *spc;
+
     /* Chip select state */
     bool cs;
 };
diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
index 003931fb509e..d54a109beeb5 100644
--- a/hw/ssi/ssi.c
+++ b/hw/ssi/ssi.c
@@ -38,9 +38,8 @@  static void ssi_cs_default(void *opaque, int n, int level)
     bool cs = !!level;
     assert(n == 0);
     if (s->cs != cs) {
-        SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(s);
-        if (ssc->set_cs) {
-            ssc->set_cs(s, cs);
+        if (s->spc->set_cs) {
+            s->spc->set_cs(s, cs);
         }
     }
     s->cs = cs;
@@ -48,11 +47,11 @@  static void ssi_cs_default(void *opaque, int n, int level)
 
 static uint32_t ssi_transfer_raw_default(SSIPeripheral *dev, uint32_t val)
 {
-    SSIPeripheralClass *ssc = SSI_PERIPHERAL_GET_CLASS(dev);
+    SSIPeripheralClass *ssc = dev->spc;
 
     if ((dev->cs && ssc->cs_polarity == SSI_CS_HIGH) ||
-            (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
-            ssc->cs_polarity == SSI_CS_NONE) {
+        (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
+        ssc->cs_polarity == SSI_CS_NONE) {
         return ssc->transfer(dev, val);
     }
     return 0;
@@ -67,6 +66,7 @@  static void ssi_peripheral_realize(DeviceState *dev, Error **errp)
             ssc->cs_polarity != SSI_CS_NONE) {
         qdev_init_gpio_in_named(dev, ssi_cs_default, SSI_GPIO_CS, 1);
     }
+    s->spc = ssc;
 
     ssc->realize(s, errp);
 }
@@ -115,13 +115,11 @@  uint32_t ssi_transfer(SSIBus *bus, uint32_t val)
 {
     BusState *b = BUS(bus);
     BusChild *kid;
-    SSIPeripheralClass *ssc;
     uint32_t r = 0;
 
     QTAILQ_FOREACH(kid, &b->children, sibling) {
-        SSIPeripheral *peripheral = SSI_PERIPHERAL(kid->child);
-        ssc = SSI_PERIPHERAL_GET_CLASS(peripheral);
-        r |= ssc->transfer_raw(peripheral, val);
+        SSIPeripheral *p = SSI_PERIPHERAL(kid->child);
+        r |= p->spc->transfer_raw(p, val);
     }
 
     return r;