[v1,08/13] plugins: expand the bb plugin to be thread safe and track per-cpu

Message ID 20200709141327.14631-9-alex.bennee@linaro.org
State Superseded
Headers show
Series
  • misc rc0 fixes (docs, plugins, docker)
Related show

Commit Message

Alex Bennée July 9, 2020, 2:13 p.m.
While there isn't any easy way to make the inline counts thread safe
we can ensure the callback based ones are. While we are at it we can
reduce introduce a new option ("idle") to dump a report of the current
bb and insn count each time a vCPU enters the idle state.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Cc: Dave Bort <dbort@dbort.com>

---
v2
  - fixup for non-inline linux-user case
  - minor cleanup and re-factor
---
 tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 83 insertions(+), 13 deletions(-)

-- 
2.20.1

Comments

Robert Foley July 9, 2020, 5:12 p.m. | #1
Reviewed-by: Robert Foley <robert.foley@linaro.org>


On Thu, 9 Jul 2020 at 10:13, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> While there isn't any easy way to make the inline counts thread safe

> we can ensure the callback based ones are. While we are at it we can

> reduce introduce a new option ("idle") to dump a report of the current

> bb and insn count each time a vCPU enters the idle state.

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Dave Bort <dbort@dbort.com>

>

> ---

> v2

>   - fixup for non-inline linux-user case

>   - minor cleanup and re-factor

> ---

>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>  1 file changed, 83 insertions(+), 13 deletions(-)

>

> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

> index df19fd359df3..89c373e19cd8 100644

> --- a/tests/plugin/bb.c

> +++ b/tests/plugin/bb.c

> @@ -16,24 +16,67 @@

>

>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>

> -static uint64_t bb_count;

> -static uint64_t insn_count;

> +typedef struct {

> +    GMutex lock;

> +    int index;

> +    uint64_t bb_count;

> +    uint64_t insn_count;

> +} CPUCount;

> +

> +/* Used by the inline & linux-user counts */

>  static bool do_inline;

> +static CPUCount inline_count;

> +

> +/* Dump running CPU total on idle? */

> +static bool idle_report;

> +static GPtrArray *counts;

> +static int max_cpus;

> +

> +static void gen_one_cpu_report(CPUCount *count, GString *report)

> +{

> +    if (count->bb_count) {

> +        g_string_append_printf(report, "CPU%d: "

> +                               "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> +                               count->index,

> +                               count->bb_count, count->insn_count);

> +    }

> +}

>

>  static void plugin_exit(qemu_plugin_id_t id, void *p)

>  {

> -    g_autofree gchar *out = g_strdup_printf(

> -        "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> -        bb_count, insn_count);

> -    qemu_plugin_outs(out);

> +    g_autoptr(GString) report = g_string_new("");

> +

> +    if (do_inline || !max_cpus) {

> +        g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> +                        inline_count.bb_count, inline_count.insn_count);

> +    } else {

> +        g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report);

> +    }

> +    qemu_plugin_outs(report->str);

> +}

> +

> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)

> +{

> +    CPUCount *count = g_ptr_array_index(counts, cpu_index);

> +    g_autoptr(GString) report = g_string_new("");

> +    gen_one_cpu_report(count, report);

> +

> +    if (report->len > 0) {

> +        g_string_prepend(report, "Idling ");

> +        qemu_plugin_outs(report->str);

> +    }

>  }

>

>  static void vcpu_tb_exec(unsigned int cpu_index, void *udata)

>  {

> -    unsigned long n_insns = (unsigned long)udata;

> +    CPUCount *count = max_cpus ?

> +        g_ptr_array_index(counts, cpu_index) : &inline_count;

>

> -    insn_count += n_insns;

> -    bb_count++;

> +    unsigned long n_insns = (unsigned long)udata;

> +    g_mutex_lock(&count->lock);

> +    count->insn_count += n_insns;

> +    count->bb_count++;

> +    g_mutex_unlock(&count->lock);

>  }

>

>  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

> @@ -42,9 +85,9 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

>

>      if (do_inline) {

>          qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,

> -                                                 &bb_count, 1);

> +                                                 &inline_count.bb_count, 1);

>          qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,

> -                                                 &insn_count, n_insns);

> +                                                 &inline_count.insn_count, n_insns);

>      } else {

>          qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,

>                                               QEMU_PLUGIN_CB_NO_REGS,

> @@ -56,8 +99,35 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,

>                                             const qemu_info_t *info,

>                                             int argc, char **argv)

>  {

> -    if (argc && strcmp(argv[0], "inline") == 0) {

> -        do_inline = true;

> +    int i;

> +

> +    for (i = 0; i < argc; i++) {

> +        char *opt = argv[i];

> +        if (g_strcmp0(opt, "inline") == 0) {

> +            do_inline = true;

> +        } else if (g_strcmp0(opt, "idle") == 0) {

> +            idle_report = true;

> +        } else {

> +            fprintf(stderr, "option parsing failed: %s\n", opt);

> +            return -1;

> +        }

> +    }

> +

> +    if (info->system_emulation && !do_inline) {

> +        max_cpus = info->system.max_vcpus;

> +        counts = g_ptr_array_new();

> +        for (i = 0; i < max_cpus; i++) {

> +            CPUCount *count = g_new0(CPUCount, 1);

> +            g_mutex_init(&count->lock);

> +            count->index = i;

> +            g_ptr_array_add(counts, count);

> +        }

> +    } else if (!do_inline) {

> +        g_mutex_init(&inline_count.lock);

> +    }

> +

> +    if (idle_report) {

> +        qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);

>      }

>

>      qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);

> --

> 2.20.1

>
Emilio G. Cota July 11, 2020, 9:56 p.m. | #2
On Thu, Jul 09, 2020 at 15:13:22 +0100, Alex Bennée wrote:
> While there isn't any easy way to make the inline counts thread safe


Why not? At least in 64-bit hosts TCG will emit a single write to
update the 64-bit counter.

> we can ensure the callback based ones are. While we are at it we can

> reduce introduce a new option ("idle") to dump a report of the current


s/reduce//

> bb and insn count each time a vCPU enters the idle state.

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Dave Bort <dbort@dbort.com>

> 

> ---

> v2

>   - fixup for non-inline linux-user case

>   - minor cleanup and re-factor

> ---

>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>  1 file changed, 83 insertions(+), 13 deletions(-)

> 

> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

> index df19fd359df3..89c373e19cd8 100644

> --- a/tests/plugin/bb.c

> +++ b/tests/plugin/bb.c

> @@ -16,24 +16,67 @@

>  

>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>  

> -static uint64_t bb_count;

> -static uint64_t insn_count;

> +typedef struct {

> +    GMutex lock;

> +    int index;

> +    uint64_t bb_count;

> +    uint64_t insn_count;

> +} CPUCount;


Why use a mutex?

Just have a per-vCPU struct that each vCPU thread updates with atomic_write.
Then when we want to print a report we just have to collect the counts
with atomic_read().

Also, consider just adding a comment to bb.c noting that it is not thread-safe,
and having a separate bb-threadsafe.c plugin for patch. The reason is that bb.c is
very simple, which is useful to understand the interface.

Thanks,
		E.
Alex Bennée July 12, 2020, 9:48 a.m. | #3
Emilio G. Cota <cota@braap.org> writes:

> On Thu, Jul 09, 2020 at 15:13:22 +0100, Alex Bennée wrote:

>> While there isn't any easy way to make the inline counts thread safe

>

> Why not? At least in 64-bit hosts TCG will emit a single write to

> update the 64-bit counter.


Well the operation is an add so that is a load/add/store cycle on
non-x86. If we want to do it properly we should expose an atomic add
operation which may be helper based or maybe generate an atomic
operation if the backend can support it easily.

>

>> we can ensure the callback based ones are. While we are at it we can

>> reduce introduce a new option ("idle") to dump a report of the current

>

> s/reduce//

>

>> bb and insn count each time a vCPU enters the idle state.

>> 

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Cc: Dave Bort <dbort@dbort.com>

>> 

>> ---

>> v2

>>   - fixup for non-inline linux-user case

>>   - minor cleanup and re-factor

>> ---

>>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>>  1 file changed, 83 insertions(+), 13 deletions(-)

>> 

>> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

>> index df19fd359df3..89c373e19cd8 100644

>> --- a/tests/plugin/bb.c

>> +++ b/tests/plugin/bb.c

>> @@ -16,24 +16,67 @@

>>  

>>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>>  

>> -static uint64_t bb_count;

>> -static uint64_t insn_count;

>> +typedef struct {

>> +    GMutex lock;

>> +    int index;

>> +    uint64_t bb_count;

>> +    uint64_t insn_count;

>> +} CPUCount;

>

> Why use a mutex?

>

> Just have a per-vCPU struct that each vCPU thread updates with atomic_write.

> Then when we want to print a report we just have to collect the counts

> with atomic_read().

>

> Also, consider just adding a comment to bb.c noting that it is not thread-safe,

> and having a separate bb-threadsafe.c plugin for patch. The reason is that bb.c is

> very simple, which is useful to understand the interface.

>

> Thanks,

> 		E.



-- 
Alex Bennée

Patch

diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c
index df19fd359df3..89c373e19cd8 100644
--- a/tests/plugin/bb.c
+++ b/tests/plugin/bb.c
@@ -16,24 +16,67 @@ 
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-static uint64_t bb_count;
-static uint64_t insn_count;
+typedef struct {
+    GMutex lock;
+    int index;
+    uint64_t bb_count;
+    uint64_t insn_count;
+} CPUCount;
+
+/* Used by the inline & linux-user counts */
 static bool do_inline;
+static CPUCount inline_count;
+
+/* Dump running CPU total on idle? */
+static bool idle_report;
+static GPtrArray *counts;
+static int max_cpus;
+
+static void gen_one_cpu_report(CPUCount *count, GString *report)
+{
+    if (count->bb_count) {
+        g_string_append_printf(report, "CPU%d: "
+                               "bb's: %" PRIu64", insns: %" PRIu64 "\n",
+                               count->index,
+                               count->bb_count, count->insn_count);
+    }
+}
 
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
-    g_autofree gchar *out = g_strdup_printf(
-        "bb's: %" PRIu64", insns: %" PRIu64 "\n",
-        bb_count, insn_count);
-    qemu_plugin_outs(out);
+    g_autoptr(GString) report = g_string_new("");
+
+    if (do_inline || !max_cpus) {
+        g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n",
+                        inline_count.bb_count, inline_count.insn_count);
+    } else {
+        g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report);
+    }
+    qemu_plugin_outs(report->str);
+}
+
+static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    CPUCount *count = g_ptr_array_index(counts, cpu_index);
+    g_autoptr(GString) report = g_string_new("");
+    gen_one_cpu_report(count, report);
+
+    if (report->len > 0) {
+        g_string_prepend(report, "Idling ");
+        qemu_plugin_outs(report->str);
+    }
 }
 
 static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
 {
-    unsigned long n_insns = (unsigned long)udata;
+    CPUCount *count = max_cpus ?
+        g_ptr_array_index(counts, cpu_index) : &inline_count;
 
-    insn_count += n_insns;
-    bb_count++;
+    unsigned long n_insns = (unsigned long)udata;
+    g_mutex_lock(&count->lock);
+    count->insn_count += n_insns;
+    count->bb_count++;
+    g_mutex_unlock(&count->lock);
 }
 
 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
@@ -42,9 +85,9 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 
     if (do_inline) {
         qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
-                                                 &bb_count, 1);
+                                                 &inline_count.bb_count, 1);
         qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
-                                                 &insn_count, n_insns);
+                                                 &inline_count.insn_count, n_insns);
     } else {
         qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
                                              QEMU_PLUGIN_CB_NO_REGS,
@@ -56,8 +99,35 @@  QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
 {
-    if (argc && strcmp(argv[0], "inline") == 0) {
-        do_inline = true;
+    int i;
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_strcmp0(opt, "inline") == 0) {
+            do_inline = true;
+        } else if (g_strcmp0(opt, "idle") == 0) {
+            idle_report = true;
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    if (info->system_emulation && !do_inline) {
+        max_cpus = info->system.max_vcpus;
+        counts = g_ptr_array_new();
+        for (i = 0; i < max_cpus; i++) {
+            CPUCount *count = g_new0(CPUCount, 1);
+            g_mutex_init(&count->lock);
+            count->index = i;
+            g_ptr_array_add(counts, count);
+        }
+    } else if (!do_inline) {
+        g_mutex_init(&inline_count.lock);
+    }
+
+    if (idle_report) {
+        qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
     }
 
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);