diff mbox series

[v1,8/9] plugins: new hwprofile plugin

Message ID 20200602154624.4460-9-alex.bennee@linaro.org
State New
Headers show
Series plugins/next (bug fixes, hwprofile, lockstep) | expand

Commit Message

Alex Bennée June 2, 2020, 3:46 p.m. UTC
This is a plugin intended to help with profiling access to various
bits of system hardware. It only really makes sense for system
emulation.

It takes advantage of the recently exposed helper API that allows us
to see the device name (memory region name) associated with a device.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
 tests/plugin/hwprofile.c | 248 +++++++++++++++++++++++++++++++++++++++
 tests/plugin/Makefile    |   1 +
 2 files changed, 249 insertions(+)
 create mode 100644 tests/plugin/hwprofile.c

-- 
2.20.1

Comments

Robert Foley June 2, 2020, 7:16 p.m. UTC | #1
Hi,

On Tue, 2 Jun 2020 at 11:46, Alex Bennée <alex.bennee@linaro.org> wrote:
<snip>
> diff --git a/tests/plugin/hwprofile.c b/tests/plugin/hwprofile.c

> new file mode 100644

> index 00000000000..f5e0639e762

> --- /dev/null

> +++ b/tests/plugin/hwprofile.c

<snip>
> +static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,

> +                       uint64_t vaddr, void *udata)

> +{

> +    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);

> +

> +    if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {

> +        return;

> +    } else {

> +        char *name = qemu_plugin_hwaddr_device_name(hwaddr);

> +        DeviceCounts *counts;

> +

> +        g_mutex_lock(&lock);

> +        counts = (DeviceCounts *) g_hash_table_lookup(devices, name);

> +        if (!counts) {

> +            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);

> +            uint64_t base = vaddr - off;

> +            counts = new_count(name, base);

> +        } else {

> +            g_free(name);

> +        }

> +

> +        if (detail) {

> +            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);

> +            IOLocationCounts *io_count = g_hash_table_lookup(counts->access_pattern, &off);

> +            if (!io_count) {

> +                io_count = new_location(off);

> +                g_hash_table_insert(counts->access_pattern, &off, io_count);

> +            }

> +            if (qemu_plugin_mem_is_store(meminfo)) {

> +                io_count->writes++;

> +                io_count->cpu_write |= (1 << cpu_index);

> +            } else {

> +                io_count->reads++;

> +                io_count->cpu_read |= (1 << cpu_index);

> +            }

> +        } else {

> +            if (qemu_plugin_mem_is_store(meminfo)) {

> +                counts->total_writes++;

> +                counts->cpu_write |= (1 << cpu_index);

> +            } else {

> +                counts->total_reads++;

> +                counts->cpu_read |= (1 << cpu_index);


The bitmasks cpu_read and cpu_write are ints.  Maybe to account for
larger core counts
> 32, we could assert if the cpu_index is >= 32?


> +            }

> +        }

> +        g_mutex_unlock(&lock);

> +    }

> +}

> +

> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

> +{

> +    size_t n = qemu_plugin_tb_n_insns(tb);

> +    size_t i;

> +

> +    for (i = 0; i < n; i++) {

> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);

> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,

> +                                         QEMU_PLUGIN_CB_NO_REGS,

> +                                         rw, NULL);

> +    }

> +}

> +

> +QEMU_PLUGIN_EXPORT

> +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,

> +                        int argc, char **argv)

> +{

> +    int i;

> +

> +    for (i = 0; i < argc; i++) {

> +        char *opt = argv[i];

> +        if (g_strcmp0(opt, "read") == 0) {

> +            rw = QEMU_PLUGIN_MEM_R;

> +        } else if (g_strcmp0(opt, "write") == 0) {

> +            rw = QEMU_PLUGIN_MEM_W;

> +        } else if (g_strcmp0(opt, "detail") == 0) {


When testing out the options, I noticed that
if we supply arguments of "read", and "write", then we will only get
the last one set, "write", since rw gets overwritten.
One option would be to error out if more than one of these read/write
args is supplied.

Reviewed-by: Robert Foley <robert.foley@linaro.org>

Tested-by: Robert Foley <robert.foley@linaro.org>


> +            detail = true;

> +        } else {

> +            fprintf(stderr, "option parsing failed: %s\n", opt);

> +            return -1;

> +        }

> +    }

> +

> +    plugin_init();

> +

> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);

> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);

> +    return 0;

> +}

> diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile

> index b3250e2504c..d87b8d40699 100644

> --- a/tests/plugin/Makefile

> +++ b/tests/plugin/Makefile

> @@ -14,6 +14,7 @@ NAMES += hotblocks

>  NAMES += howvec

>  NAMES += hotpages

>  NAMES += lockstep

> +NAMES += hwprofile

>

>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))

>

> --

> 2.20.1

>
Alex Bennée June 3, 2020, 11:43 a.m. UTC | #2
Robert Foley <robert.foley@linaro.org> writes:

> Hi,

>

> On Tue, 2 Jun 2020 at 11:46, Alex Bennée <alex.bennee@linaro.org> wrote:

> <snip>

>> diff --git a/tests/plugin/hwprofile.c b/tests/plugin/hwprofile.c

>> new file mode 100644

>> index 00000000000..f5e0639e762

>> --- /dev/null

>> +++ b/tests/plugin/hwprofile.c

> <snip>

>> +static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,

>> +                       uint64_t vaddr, void *udata)

>> +{

>> +    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);

>> +

>> +    if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {

>> +        return;

>> +    } else {

>> +        char *name = qemu_plugin_hwaddr_device_name(hwaddr);

>> +        DeviceCounts *counts;

>> +

>> +        g_mutex_lock(&lock);

>> +        counts = (DeviceCounts *) g_hash_table_lookup(devices, name);

>> +        if (!counts) {

>> +            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);

>> +            uint64_t base = vaddr - off;

>> +            counts = new_count(name, base);

>> +        } else {

>> +            g_free(name);

>> +        }

>> +

>> +        if (detail) {

>> +            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);

>> +            IOLocationCounts *io_count = g_hash_table_lookup(counts->access_pattern, &off);

>> +            if (!io_count) {

>> +                io_count = new_location(off);

>> +                g_hash_table_insert(counts->access_pattern, &off, io_count);

>> +            }

>> +            if (qemu_plugin_mem_is_store(meminfo)) {

>> +                io_count->writes++;

>> +                io_count->cpu_write |= (1 << cpu_index);

>> +            } else {

>> +                io_count->reads++;

>> +                io_count->cpu_read |= (1 << cpu_index);

>> +            }

>> +        } else {

>> +            if (qemu_plugin_mem_is_store(meminfo)) {

>> +                counts->total_writes++;

>> +                counts->cpu_write |= (1 << cpu_index);

>> +            } else {

>> +                counts->total_reads++;

>> +                counts->cpu_read |= (1 << cpu_index);

>

> The bitmasks cpu_read and cpu_write are ints.  Maybe to account for

> larger core counts


I could make them uint64_t and then just warn if we exceed that on start-up.

>> 32, we could assert if the cpu_index is >= 32?

>

>> +            }

>> +        }

>> +        g_mutex_unlock(&lock);

>> +    }

>> +}

>> +

>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

>> +{

>> +    size_t n = qemu_plugin_tb_n_insns(tb);

>> +    size_t i;

>> +

>> +    for (i = 0; i < n; i++) {

>> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);

>> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,

>> +                                         QEMU_PLUGIN_CB_NO_REGS,

>> +                                         rw, NULL);

>> +    }

>> +}

>> +

>> +QEMU_PLUGIN_EXPORT

>> +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,

>> +                        int argc, char **argv)

>> +{

>> +    int i;

>> +

>> +    for (i = 0; i < argc; i++) {

>> +        char *opt = argv[i];

>> +        if (g_strcmp0(opt, "read") == 0) {

>> +            rw = QEMU_PLUGIN_MEM_R;

>> +        } else if (g_strcmp0(opt, "write") == 0) {

>> +            rw = QEMU_PLUGIN_MEM_W;

>> +        } else if (g_strcmp0(opt, "detail") == 0) {

>

> When testing out the options, I noticed that

> if we supply arguments of "read", and "write", then we will only get

> the last one set, "write", since rw gets overwritten.

> One option would be to error out if more than one of these read/write

> args is supplied.


Yeah the option parsing is a little clunky although given the way you
pass them from the QEMU command line perhaps not too worth finessing.
The default is rw so you make a conscious decision to only care about one
or the other.

All you can really do is fail to initialise the plugin. Hopefully the
output should be enough clue.

>

> Reviewed-by: Robert Foley <robert.foley@linaro.org>

> Tested-by: Robert Foley <robert.foley@linaro.org>


Thanks.

Out of interest what did you measure? Are there any useful use cases you can
think of?

>

>> +            detail = true;

>> +        } else {

>> +            fprintf(stderr, "option parsing failed: %s\n", opt);

>> +            return -1;

>> +        }

>> +    }

>> +

>> +    plugin_init();

>> +

>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);

>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);

>> +    return 0;

>> +}

>> diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile

>> index b3250e2504c..d87b8d40699 100644

>> --- a/tests/plugin/Makefile

>> +++ b/tests/plugin/Makefile

>> @@ -14,6 +14,7 @@ NAMES += hotblocks

>>  NAMES += howvec

>>  NAMES += hotpages

>>  NAMES += lockstep

>> +NAMES += hwprofile

>>

>>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))

>>

>> --

>> 2.20.1

>>



-- 
Alex Bennée
Robert Foley June 3, 2020, 3:42 p.m. UTC | #3
On Wed, 3 Jun 2020 at 07:43, Alex Bennée <alex.bennee@linaro.org> wrote:
>

>

> Robert Foley <robert.foley@linaro.org> writes:

>

<snip>
> >

> > When testing out the options, I noticed that

> > if we supply arguments of "read", and "write", then we will only get

> > the last one set, "write", since rw gets overwritten.

> > One option would be to error out if more than one of these read/write

> > args is supplied.

>

> Yeah the option parsing is a little clunky although given the way you

> pass them from the QEMU command line perhaps not too worth finessing.

> The default is rw so you make a conscious decision to only care about one

> or the other.

>

> All you can really do is fail to initialise the plugin. Hopefully the

> output should be enough clue.

>

> >

> > Reviewed-by: Robert Foley <robert.foley@linaro.org>

> > Tested-by: Robert Foley <robert.foley@linaro.org>

>

> Thanks.

>

> Out of interest what did you measure? Are there any useful use cases you can

> think of?


We did some testing where we booted an aarch64 VM and an i386 VM a few times
with differentcore counts (up to 64), and viewed the counters.  We
also did a test where
we inserted another device (a virtfs mount), booted up and checked
that there was another
device listed (for virtio-9p).

There are a few useful use cases we are thinking of, in general for debug/perf
 testing of PCI devices/drivers.
For example, debug and performance test of a case where we use a queue pair,
(maybe for something like DPDK/SPDK), this plugin would be interesting for
checking that the quantity and locations of accesses are expected.

Thanks & Regards,
-Rob
>

> >

> >> +            detail = true;

> >> +        } else {

> >> +            fprintf(stderr, "option parsing failed: %s\n", opt);

> >> +            return -1;

> >> +        }

> >> +    }

> >> +

> >> +    plugin_init();

> >> +

> >> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);

> >> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);

> >> +    return 0;

> >> +}

> >> diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile

> >> index b3250e2504c..d87b8d40699 100644

> >> --- a/tests/plugin/Makefile

> >> +++ b/tests/plugin/Makefile

> >> @@ -14,6 +14,7 @@ NAMES += hotblocks

> >>  NAMES += howvec

> >>  NAMES += hotpages

> >>  NAMES += lockstep

> >> +NAMES += hwprofile

> >>

> >>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))

> >>

> >> --

> >> 2.20.1

> >>

>

>

> --

> Alex Bennée
Peter Maydell June 3, 2020, 3:48 p.m. UTC | #4
On Tue, 2 Jun 2020 at 16:54, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> This is a plugin intended to help with profiling access to various

> bits of system hardware. It only really makes sense for system

> emulation.

>

> It takes advantage of the recently exposed helper API that allows us

> to see the device name (memory region name) associated with a device.


This feels like we've let the plugin API get slightly more
access to QEMU's internals than is ideal. Whether an area
of memory happens to be an IO memory region or a memory-backed
one (or whether a device is implemented with one region or
three, or what names we happened to assign them) is kind of
a QEMU internal implementation detail.

thanks
-- PMM
Alex Bennée June 3, 2020, 5:23 p.m. UTC | #5
Peter Maydell <peter.maydell@linaro.org> writes:

> On Tue, 2 Jun 2020 at 16:54, Alex Bennée <alex.bennee@linaro.org> wrote:

>>

>> This is a plugin intended to help with profiling access to various

>> bits of system hardware. It only really makes sense for system

>> emulation.

>>

>> It takes advantage of the recently exposed helper API that allows us

>> to see the device name (memory region name) associated with a device.

>

> This feels like we've let the plugin API get slightly more

> access to QEMU's internals than is ideal. Whether an area

> of memory happens to be an IO memory region or a memory-backed

> one (or whether a device is implemented with one region or

> three, or what names we happened to assign them) is kind of

> a QEMU internal implementation detail.


I'm not so sure it's that much of an implementation detail.

The distinction is between plain RAM and everything else. The details of
the everything else is opaque but the name we pass is public information
(you can get it from "info mtree -o") and you can certainly infer useful
stuff from it. For example the virtio-pci-notify areas are regions of
access that will trap on a real hypervisor so allow us to measure how
many vmexits some software might cause.

At the moment I do make up names for regions that get re-generated due
to "reasons" (I never quite understood what the region code was doing
under the hood). Maybe we should only export names of devices the user
has explicitly tagged with -device foo,id=bar?

What should we do about the offset? Most devices export several regions
and there is no reason why those regions should all be together in the
memory map. Does just exposing a physical address make sense here?

-- 
Alex Bennée
Alex Bennée June 3, 2020, 5:26 p.m. UTC | #6
Robert Foley <robert.foley@linaro.org> writes:

> On Wed, 3 Jun 2020 at 07:43, Alex Bennée <alex.bennee@linaro.org> wrote:

>>

>>

>> Robert Foley <robert.foley@linaro.org> writes:

>>

> <snip>

>> >

>> > When testing out the options, I noticed that

>> > if we supply arguments of "read", and "write", then we will only get

>> > the last one set, "write", since rw gets overwritten.

>> > One option would be to error out if more than one of these read/write

>> > args is supplied.

>>

>> Yeah the option parsing is a little clunky although given the way you

>> pass them from the QEMU command line perhaps not too worth finessing.

>> The default is rw so you make a conscious decision to only care about one

>> or the other.

>>

>> All you can really do is fail to initialise the plugin. Hopefully the

>> output should be enough clue.

>>

>> >

>> > Reviewed-by: Robert Foley <robert.foley@linaro.org>

>> > Tested-by: Robert Foley <robert.foley@linaro.org>

>>

>> Thanks.

>>

>> Out of interest what did you measure? Are there any useful use cases you can

>> think of?

>

> We did some testing where we booted an aarch64 VM and an i386 VM a few times

> with differentcore counts (up to 64), and viewed the counters.  We

> also did a test where

> we inserted another device (a virtfs mount), booted up and checked

> that there was another

> device listed (for virtio-9p).

>

> There are a few useful use cases we are thinking of, in general for debug/perf

>  testing of PCI devices/drivers.

> For example, debug and performance test of a case where we use a queue pair,

> (maybe for something like DPDK/SPDK), this plugin would be interesting for

> checking that the quantity and locations of accesses are expected.


So one thing that has come up in the VIRT-366 discussion is the
potential efficiencies of the various kick models for MMIO based
hypervisors. Each interaction with a trapped region of memory triggers a
vmexit and one thing I wanted to understand for example was the
difference between "normal" IRQs and MSIs.

-- 
Alex Bennée
diff mbox series

Patch

diff --git a/tests/plugin/hwprofile.c b/tests/plugin/hwprofile.c
new file mode 100644
index 00000000000..f5e0639e762
--- /dev/null
+++ b/tests/plugin/hwprofile.c
@@ -0,0 +1,248 @@ 
+/*
+ * Copyright (C) 2020, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * HW Profile - breakdown access patterns for IO to devices
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef struct {
+    uint64_t offset;
+    int size;
+    int cpu_read;
+    int cpu_write;
+    uint64_t reads;
+    uint64_t writes;
+} IOLocationCounts;
+
+typedef struct {
+    const char *name;
+    uint64_t base;
+    int cpu_read;
+    int cpu_write;
+    uint64_t total_writes;
+    uint64_t total_reads;
+    GHashTable *access_pattern;
+} DeviceCounts;
+
+static GMutex lock;
+static GHashTable *devices;
+static bool detail;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static inline bool track_reads(void)
+{
+    return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R;
+}
+
+static inline bool track_writes(void)
+{
+    return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W;
+}
+
+static void plugin_init(void)
+{
+    devices = g_hash_table_new(g_str_hash, g_str_equal);
+}
+
+static gint sort_cmp(gconstpointer a, gconstpointer b)
+{
+    DeviceCounts *ea = (DeviceCounts *) a;
+    DeviceCounts *eb = (DeviceCounts *) b;
+    return ea->total_reads + ea->total_writes >
+        eb->total_reads + eb->total_writes ? -1 : 1;
+}
+
+static gint sort_off(gconstpointer a, gconstpointer b)
+{
+    IOLocationCounts *ea = (IOLocationCounts *) a;
+    IOLocationCounts *eb = (IOLocationCounts *) b;
+    return ea->offset > eb->offset;
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autoptr(GString) report = g_string_new("");
+    GList *counts;
+
+    if (!detail) {
+        g_string_printf(report, "Device, Address");
+        if (track_reads()) {
+            g_string_append_printf(report, ", RCPUs, Reads");
+        }
+        if (track_writes()) {
+            g_string_append_printf(report, ",  WCPUs, Writes");
+        }
+        g_string_append_c(report, '\n');
+    }
+
+    counts = g_hash_table_get_values(devices);
+    if (counts && g_list_next(counts)) {
+        GList *it;
+
+        it = g_list_sort(counts, sort_cmp);
+
+        while (it) {
+            DeviceCounts *rec = (DeviceCounts *) it->data;
+            if (detail) {
+                GList *accesses = g_hash_table_get_values(rec->access_pattern);
+                GList *io_it = g_list_sort(accesses, sort_off);
+                g_string_append_printf(report, "%s @ 0x%"PRIx64"\n", rec->name, rec->base);
+                while (io_it) {
+                    IOLocationCounts *loc = (IOLocationCounts *) io_it->data;
+                    g_string_append_printf(report, "  off:%08"PRIx64, loc->offset);
+                    if (track_reads()) {
+                        g_string_append_printf(report, ", 0x%04x, %"PRId64,
+                                               loc->cpu_read, loc->reads);
+                    }
+                    if (track_writes()) {
+                       g_string_append_printf(report, ", 0x%04x, %"PRId64,
+                                               loc->cpu_write, loc->writes);
+                    }
+                    g_string_append_c(report,'\n');
+                    io_it = io_it->next;
+                }
+            } else {
+                g_string_append_printf(report, "%s, 0x%"PRIx64,
+                                       rec->name, rec->base);
+                if (track_reads()) {
+                    g_string_append_printf(report, ", 0x%04x, %"PRId64,
+                                           rec->cpu_read, rec->total_reads);
+                }
+                if (track_writes()) {
+                    g_string_append_printf(report, ", 0x%04x, %"PRId64,
+                                           rec->cpu_write, rec->total_writes);
+                }
+                g_string_append_c(report, '\n');
+            }
+            it = it->next;
+        };
+        g_list_free(it);
+    }
+
+    qemu_plugin_outs(report->str);
+}
+
+static DeviceCounts * new_count(char *name, uint64_t base)
+{
+    DeviceCounts *count = g_new0(DeviceCounts, 1);
+    count->name = name;
+    count->base = base;
+    if (detail) {
+        count->access_pattern = g_hash_table_new(g_int64_hash, g_int64_equal);
+    }
+    g_hash_table_insert(devices, name, count);
+    return count;
+}
+
+static IOLocationCounts * new_location(uint64_t offset)
+{
+    IOLocationCounts *loc = g_new0(IOLocationCounts, 1);
+    loc->offset = offset;
+    return loc;
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+                       uint64_t vaddr, void *udata)
+{
+    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+
+    if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {
+        return;
+    } else {
+        char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+        DeviceCounts *counts;
+
+        g_mutex_lock(&lock);
+        counts = (DeviceCounts *) g_hash_table_lookup(devices, name);
+        if (!counts) {
+            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);
+            uint64_t base = vaddr - off;
+            counts = new_count(name, base);
+        } else {
+            g_free(name);
+        }
+
+        if (detail) {
+            uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);
+            IOLocationCounts *io_count = g_hash_table_lookup(counts->access_pattern, &off);
+            if (!io_count) {
+                io_count = new_location(off);
+                g_hash_table_insert(counts->access_pattern, &off, io_count);
+            }
+            if (qemu_plugin_mem_is_store(meminfo)) {
+                io_count->writes++;
+                io_count->cpu_write |= (1 << cpu_index);
+            } else {
+                io_count->reads++;
+                io_count->cpu_read |= (1 << cpu_index);
+            }
+        } else {
+            if (qemu_plugin_mem_is_store(meminfo)) {
+                counts->total_writes++;
+                counts->cpu_write |= (1 << cpu_index);
+            } else {
+                counts->total_reads++;
+                counts->cpu_read |= (1 << cpu_index);
+            }
+        }
+        g_mutex_unlock(&lock);
+    }
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         rw, NULL);
+    }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    int i;
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_strcmp0(opt, "read") == 0) {
+            rw = QEMU_PLUGIN_MEM_R;
+        } else if (g_strcmp0(opt, "write") == 0) {
+            rw = QEMU_PLUGIN_MEM_W;
+        } else if (g_strcmp0(opt, "detail") == 0) {
+            detail = true;
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    plugin_init();
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile
index b3250e2504c..d87b8d40699 100644
--- a/tests/plugin/Makefile
+++ b/tests/plugin/Makefile
@@ -14,6 +14,7 @@  NAMES += hotblocks
 NAMES += howvec
 NAMES += hotpages
 NAMES += lockstep
+NAMES += hwprofile
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))